diff --git a/examples/server/public/completion.js b/examples/server/public/completion.js index 6cb1bf023..2841c0aba 100644 --- a/examples/server/public/completion.js +++ b/examples/server/public/completion.js @@ -7,20 +7,27 @@ const paramDefaults = { let generation_settings = null; -/** - * This function completes the input text using a llama dictionary. - * @param {object} params - The parameters for the completion request. - * @param {object} controller - an instance of AbortController if you need one, or null. - * @param {function} callback - The callback function to call when the completion is done. - * @returns {string} the completed text as a string. Ideally ignored, and you get at it via the callback. - */ -export const llamaComplete = async (params, controller, callback) => { - if (!controller) { - controller = new AbortController(); - } - const completionParams = { ...paramDefaults, ...params }; - // we use fetch directly here becasue the built in fetchEventSource does not support POST +// Completes the prompt as a generator. Recommended for most use cases. +// +// Example: +// +// import { llama } from '/completion.js' +// +// const request = llama("Tell me a joke", {n_predict: 800}) +// for await (const chunk of request) { +// document.write(chunk.data.content) +// } +// +export async function* llama(prompt, params = {}, config = {}) { + let controller = config.controller; + + if (!controller) { + controller = new AbortController(); + } + + const completionParams = { ...paramDefaults, ...params, prompt }; + const response = await fetch("/completion", { method: 'POST', body: JSON.stringify(completionParams), @@ -38,7 +45,6 @@ export const llamaComplete = async (params, controller, callback) => { let content = ""; try { - let cont = true; while (cont) { @@ -61,10 +67,8 @@ export const llamaComplete = async (params, controller, callback) => { result.data = JSON.parse(result.data); content += result.data.content; - // callack - if (callback) { - cont = callback(result) != false; - } + // yield + yield result; // if we got a stop token from server, we will break here if (result.data.stop) { @@ -75,7 +79,9 @@ export const llamaComplete = async (params, controller, callback) => { } } } catch (e) { - console.error("llama error: ", e); + if (e.name !== 'AbortError') { + console.error("llama error: ", e); + } throw e; } finally { @@ -85,10 +91,78 @@ export const llamaComplete = async (params, controller, callback) => { return content; } +// Call llama, return an event target that you can subcribe to +// +// Example: +// +// import { llamaEventTarget } from '/completion.js' +// +// const conn = llamaEventTarget(prompt) +// conn.addEventListener("message", (chunk) => { +// document.write(chunk.detail.content) +// }) +// +export const llamaEventTarget = (prompt, params = {}, config = {}) => { + const eventTarget = new EventTarget(); + (async () => { + let content = ""; + for await (const chunk of llama(prompt, params, config)) { + if (chunk.data) { + content += chunk.data.content; + eventTarget.dispatchEvent(new CustomEvent("message", {detail: chunk.data})); + } + if(chunk.data.generation_settings) { + eventTarget.dispatchEvent(new CustomEvent("generation_settings", {detail: chunk.data.generation_settings})); + } + if(chunk.data.timings) { + eventTarget.dispatchEvent(new CustomEvent("timings", {detail: chunk.data.timings})); + } + } + eventTarget.dispatchEvent(new CustomEvent("done", {detail: {content}})); + })(); + return eventTarget; +} + +// Call llama, return a promise that resolves to the completed text. This does not support streaming +// +// Example: +// +// llamaPromise(prompt).then((content) => { +// document.write(content) +// }) +// +// or +// +// const content = await llamaPromise(prompt) +// document.write(content) +// +export const llamaPromise = (prompt, params = {}, config = {}) => { + return new Promise(async (resolve, reject) => { + let content = ""; + try { + for await (const chunk of llama(prompt, params, config)) { + content += chunk.data.content; + } + resolve(content); + } catch (error) { + reject(error); + } + }); +}; + +/** + * (deprecated) + */ +export const llamaComplete = async (params, controller, callback) => { + for await (const chunk of llama(params.prompt, params, {controller})) { + callback(chunk); + } +} + +// Get the model info from the server. This is useful for getting the context window and so on. export const llamaModelInfo = async () => { if (!generation_settings) { generation_settings = await fetch("/model.json").then(r => r.json()); } return generation_settings; } - diff --git a/examples/server/public/index.html b/examples/server/public/index.html index e65f468fb..8ace0b0af 100644 --- a/examples/server/public/index.html +++ b/examples/server/public/index.html @@ -110,7 +110,7 @@ html, h, signal, effect, computed, render, useSignal, useEffect, useRef } from '/index.js'; - import { llamaComplete } from '/completion.js'; + import { llama } from '/completion.js'; const session = signal({ prompt: "This is a conversation between user and llama, a friendly chatbot. respond in simple markdown.", @@ -163,7 +163,7 @@ transcriptUpdate([...session.value.transcript, ["{{user}}", msg]]) - const payload = template(session.value.template, { + const prompt = template(session.value.template, { message: msg, history: session.value.transcript.flatMap(([name, message]) => template(session.value.historyTemplate, {name, message})).join("\n"), }); @@ -173,13 +173,13 @@ const llamaParams = { ...params.value, - prompt: payload, stop: ["", template("{{char}}:"), template("{{user}}:")], } - await llamaComplete(llamaParams, controller.value, (message) => { - const data = message.data; + for await (const chunk of llama(prompt, llamaParams, { controller: controller.value })) { + const data = chunk.data; currentMessage += data.content; + // remove leading whitespace currentMessage = currentMessage.replace(/^\s+/, "") @@ -192,7 +192,7 @@ if (data.timings) { llamaStats.value = data.timings; } - }) + } controller.value = null; }