mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-10-31 23:28:51 +01:00
31cfbb1013
* use javascript generators as much cleaner API Also add ways to access completion as promise and EventSource * export llama_timings as struct and expose them in server * update readme, update baked includes * llama : uniform variable names + struct init --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
169 lines
4.4 KiB
JavaScript
169 lines
4.4 KiB
JavaScript
const paramDefaults = {
|
|
stream: true,
|
|
n_predict: 500,
|
|
temperature: 0.2,
|
|
stop: ["</s>"]
|
|
};
|
|
|
|
let generation_settings = null;
|
|
|
|
|
|
// Completes the prompt as a generator. Recommended for most use cases.
|
|
//
|
|
// Example:
|
|
//
|
|
// import { llama } from '/completion.js'
|
|
//
|
|
// const request = llama("Tell me a joke", {n_predict: 800})
|
|
// for await (const chunk of request) {
|
|
// document.write(chunk.data.content)
|
|
// }
|
|
//
|
|
export async function* llama(prompt, params = {}, config = {}) {
|
|
let controller = config.controller;
|
|
|
|
if (!controller) {
|
|
controller = new AbortController();
|
|
}
|
|
|
|
const completionParams = { ...paramDefaults, ...params, prompt };
|
|
|
|
const response = await fetch("/completion", {
|
|
method: 'POST',
|
|
body: JSON.stringify(completionParams),
|
|
headers: {
|
|
'Connection': 'keep-alive',
|
|
'Content-Type': 'application/json',
|
|
'Accept': 'text/event-stream'
|
|
},
|
|
signal: controller.signal,
|
|
});
|
|
|
|
const reader = response.body.getReader();
|
|
const decoder = new TextDecoder();
|
|
|
|
let content = "";
|
|
|
|
try {
|
|
let cont = true;
|
|
|
|
while (cont) {
|
|
const result = await reader.read();
|
|
if (result.done) {
|
|
break;
|
|
}
|
|
|
|
// sse answers in the form multiple lines of: value\n with data always present as a key. in our case we
|
|
// mainly care about the data: key here, which we expect as json
|
|
const text = decoder.decode(result.value);
|
|
|
|
// parse all sse events and add them to result
|
|
const regex = /^(\S+):\s(.*)$/gm;
|
|
for (const match of text.matchAll(regex)) {
|
|
result[match[1]] = match[2]
|
|
}
|
|
|
|
// since we know this is llama.cpp, let's just decode the json in data
|
|
result.data = JSON.parse(result.data);
|
|
content += result.data.content;
|
|
|
|
// yield
|
|
yield result;
|
|
|
|
// if we got a stop token from server, we will break here
|
|
if (result.data.stop) {
|
|
if (result.data.generation_settings) {
|
|
generation_settings = result.data.generation_settings;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
} catch (e) {
|
|
if (e.name !== 'AbortError') {
|
|
console.error("llama error: ", e);
|
|
}
|
|
throw e;
|
|
}
|
|
finally {
|
|
controller.abort();
|
|
}
|
|
|
|
return content;
|
|
}
|
|
|
|
// Call llama, return an event target that you can subcribe to
|
|
//
|
|
// Example:
|
|
//
|
|
// import { llamaEventTarget } from '/completion.js'
|
|
//
|
|
// const conn = llamaEventTarget(prompt)
|
|
// conn.addEventListener("message", (chunk) => {
|
|
// document.write(chunk.detail.content)
|
|
// })
|
|
//
|
|
export const llamaEventTarget = (prompt, params = {}, config = {}) => {
|
|
const eventTarget = new EventTarget();
|
|
(async () => {
|
|
let content = "";
|
|
for await (const chunk of llama(prompt, params, config)) {
|
|
if (chunk.data) {
|
|
content += chunk.data.content;
|
|
eventTarget.dispatchEvent(new CustomEvent("message", { detail: chunk.data }));
|
|
}
|
|
if (chunk.data.generation_settings) {
|
|
eventTarget.dispatchEvent(new CustomEvent("generation_settings", { detail: chunk.data.generation_settings }));
|
|
}
|
|
if (chunk.data.timings) {
|
|
eventTarget.dispatchEvent(new CustomEvent("timings", { detail: chunk.data.timings }));
|
|
}
|
|
}
|
|
eventTarget.dispatchEvent(new CustomEvent("done", { detail: { content } }));
|
|
})();
|
|
return eventTarget;
|
|
}
|
|
|
|
// Call llama, return a promise that resolves to the completed text. This does not support streaming
|
|
//
|
|
// Example:
|
|
//
|
|
// llamaPromise(prompt).then((content) => {
|
|
// document.write(content)
|
|
// })
|
|
//
|
|
// or
|
|
//
|
|
// const content = await llamaPromise(prompt)
|
|
// document.write(content)
|
|
//
|
|
export const llamaPromise = (prompt, params = {}, config = {}) => {
|
|
return new Promise(async (resolve, reject) => {
|
|
let content = "";
|
|
try {
|
|
for await (const chunk of llama(prompt, params, config)) {
|
|
content += chunk.data.content;
|
|
}
|
|
resolve(content);
|
|
} catch (error) {
|
|
reject(error);
|
|
}
|
|
});
|
|
};
|
|
|
|
/**
|
|
* (deprecated)
|
|
*/
|
|
export const llamaComplete = async (params, controller, callback) => {
|
|
for await (const chunk of llama(params.prompt, params, { controller })) {
|
|
callback(chunk);
|
|
}
|
|
}
|
|
|
|
// Get the model info from the server. This is useful for getting the context window and so on.
|
|
export const llamaModelInfo = async () => {
|
|
if (!generation_settings) {
|
|
generation_settings = await fetch("/model.json").then(r => r.json());
|
|
}
|
|
return generation_settings;
|
|
}
|