llama.cpp/examples/server/public/completion.js

const paramDefaults = {
  stream: true,
  n_predict: 500,
  temperature: 0.2,
  stop: ["</s>"]
};

/**
 * This function completes the input text using a llama dictionary.
 * @param {object} params - The parameters for the completion request.
 * @param {object} controller - an instance of AbortController if you need one, or null.
 * @param {function} callback - The callback function to call when the completion is done.
 * @returns {string} the completed text as a string. Ideally ignored, and you get at it via the callback.
 */
export const llamaComplete = async (params, controller, callback) => {
  if (!controller) {
    controller = new AbortController();
  }
  const completionParams = { ...paramDefaults, ...params };

  // we use fetch directly here becasue the built in fetchEventSource does not support POST
  const response = await fetch("/completion", {
    method: 'POST',
    body: JSON.stringify(completionParams),
    headers: {
      'Connection': 'keep-alive',
      'Content-Type': 'application/json',
      'Accept': 'text/event-stream'
    },
    signal: controller.signal,
  });

  const reader = response.body.getReader();
  const decoder = new TextDecoder();

  let content = "";

  try {

    let cont = true;

    while (cont) {
      const result = await reader.read();
      if (result.done) {
        break;
      }

      // sse answers in the form multiple lines of: value\n with data always present as a key. in our case we
      // mainly care about the data: key here, which we expect as json
      const text = decoder.decode(result.value);

      // parse all sse events and add them to result
      const regex = /^(\S+):\s(.*)$/gm;
      for (const match of text.matchAll(regex)) {
        result[match[1]] = match[2]
      }

      // since we know this is llama.cpp, let's just decode the json in data
      result.data = JSON.parse(result.data);
      content += result.data.content;

      // callack
      if (callback) {
        cont = callback(result) != false;
      }

      // if we got a stop token from server, we will break here
      if (result.data.stop) {
        break;
      }
    }
  } catch (e) {
    console.error("llama error: ", e);
    throw e;
  }
  finally {
    controller.abort();
  }

  return content;
}
Simple webchat for server (#1998) * expose simple web interface on root domain * embed index and add --path for choosing static dir * allow server to multithread because web browsers send a lot of garbage requests we want the server to multithread when serving 404s for favicon's etc. To avoid blowing up llama we just take a mutex when it's invoked. * let's try this with the xxd tool instead and see if msvc is happier with that * enable server in Makefiles * add /completion.js file to make it easy to use the server from js * slightly nicer css * rework state management into session, expose historyTemplate to settings --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> 2023-07-04 16:05:27 +02:00			`const paramDefaults = {`
			`stream: true,`
			`n_predict: 500,`
			`temperature: 0.2,`
			`stop: ["</s>"]`
			`};`

			`/**`
			`* This function completes the input text using a llama dictionary.`
			`* @param {object} params - The parameters for the completion request.`
			`* @param {object} controller - an instance of AbortController if you need one, or null.`
			`* @param {function} callback - The callback function to call when the completion is done.`
			`* @returns {string} the completed text as a string. Ideally ignored, and you get at it via the callback.`
			`*/`
			`export const llamaComplete = async (params, controller, callback) => {`
			`if (!controller) {`
			`controller = new AbortController();`
			`}`
			`const completionParams = { ...paramDefaults, ...params };`

			`// we use fetch directly here becasue the built in fetchEventSource does not support POST`
			`const response = await fetch("/completion", {`
			`method: 'POST',`
			`body: JSON.stringify(completionParams),`
			`headers: {`
			`'Connection': 'keep-alive',`
			`'Content-Type': 'application/json',`
			`'Accept': 'text/event-stream'`
			`},`
			`signal: controller.signal,`
			`});`

			`const reader = response.body.getReader();`
			`const decoder = new TextDecoder();`

			`let content = "";`

			`try {`

			`let cont = true;`

			`while (cont) {`
			`const result = await reader.read();`
			`if (result.done) {`
			`break;`
			`}`

			`// sse answers in the form multiple lines of: value\n with data always present as a key. in our case we`
			`// mainly care about the data: key here, which we expect as json`
			`const text = decoder.decode(result.value);`

			`// parse all sse events and add them to result`
			`const regex = /^(\S+):\s(.*)$/gm;`
			`for (const match of text.matchAll(regex)) {`
			`result[match[1]] = match[2]`
			`}`

			`// since we know this is llama.cpp, let's just decode the json in data`
			`result.data = JSON.parse(result.data);`
			`content += result.data.content;`

			`// callack`
			`if (callback) {`
			`cont = callback(result) != false;`
			`}`

			`// if we got a stop token from server, we will break here`
			`if (result.data.stop) {`
			`break;`
			`}`
			`}`
			`} catch (e) {`
			`console.error("llama error: ", e);`
			`throw e;`
			`}`
			`finally {`
			`controller.abort();`
			`}`

			`return content;`
			`}`