mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-06 02:48:57 +01:00
remove need for @microsoft/fetch-event-source dep (-7kb)
This commit is contained in:
parent
e192f950a3
commit
34fc3c7e9f
@ -5,7 +5,7 @@
|
|||||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||||
PUBLIC=$DIR/public
|
PUBLIC=$DIR/public
|
||||||
|
|
||||||
curl https://npm.reversehttp.com/@preact/signals-core,@preact/signals,htm/preact,preact,preact/hooks,@microsoft/fetch-event-source > $PUBLIC/index.js
|
curl https://npm.reversehttp.com/@preact/signals-core,@preact/signals,htm/preact,preact,preact/hooks > $PUBLIC/index.js
|
||||||
echo >> $PUBLIC/index.js # add newline
|
echo >> $PUBLIC/index.js # add newline
|
||||||
|
|
||||||
echo "// Generated file, run deps.sh to update. Do not edit directly
|
echo "// Generated file, run deps.sh to update. Do not edit directly
|
||||||
|
@ -66,7 +66,7 @@
|
|||||||
|
|
||||||
<script type="module">
|
<script type="module">
|
||||||
import {
|
import {
|
||||||
html, h, signal, effect, computed, render, useSignal, useEffect, useRef, fetchEventSource
|
html, h, signal, effect, computed, render, useSignal, useEffect, useRef
|
||||||
} from '/index.js';
|
} from '/index.js';
|
||||||
|
|
||||||
const transcript = signal([])
|
const transcript = signal([])
|
||||||
@ -93,50 +93,101 @@
|
|||||||
return String(str).replaceAll(/\{\{(.*?)\}\}/g, (_, key) => template(params[key]));
|
return String(str).replaceAll(/\{\{(.*?)\}\}/g, (_, key) => template(params[key]));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const llamaCompletionStream = async (params) => {
|
||||||
|
controller.value = new AbortController();
|
||||||
|
const sig = controller.value.signal;
|
||||||
|
|
||||||
|
const data = JSON.stringify({
|
||||||
|
stream: true,
|
||||||
|
prompt: params.prompt,
|
||||||
|
n_predict: parseInt(nPredict.value),
|
||||||
|
temperature: parseFloat(temperature.value),
|
||||||
|
stop: ["</s>", template("{{bot}}:"), template("{{user}}:")]
|
||||||
|
});
|
||||||
|
|
||||||
|
// we use fetch directly here becasue the built in fetchEventSource does not support POST
|
||||||
|
const response = await fetch("/completion", {
|
||||||
|
method: 'POST',
|
||||||
|
body: data,
|
||||||
|
headers: {
|
||||||
|
'Connection': 'keep-alive',
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Accept': 'text/event-stream'
|
||||||
|
},
|
||||||
|
signal: sig,
|
||||||
|
});
|
||||||
|
|
||||||
|
const reader = response.body.getReader();
|
||||||
|
const decoder = new TextDecoder();
|
||||||
|
|
||||||
|
try {
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
const result = await reader.read();
|
||||||
|
if (result.done) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// sse answers in the form multiple lines of: value\n with data always present as a key. in our case we
|
||||||
|
// mainly care about the data: key here, which we expect as json
|
||||||
|
const text = decoder.decode(result.value);
|
||||||
|
|
||||||
|
// parse all sse events and add them to result
|
||||||
|
const regex = /^(\S+):\s(.*)$/gm;
|
||||||
|
for (const match of text.matchAll(regex)) {
|
||||||
|
result[match[1]] = match[2]
|
||||||
|
}
|
||||||
|
|
||||||
|
// since we know this is llama.cpp, let's just decode the json in data
|
||||||
|
result.data = JSON.parse(result.data);
|
||||||
|
|
||||||
|
// callack
|
||||||
|
params.onmessage(result);
|
||||||
|
|
||||||
|
// if we got a stop token from server, we will break here
|
||||||
|
if (result.data.stop) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error(e);
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
controller.value.abort();
|
||||||
|
controller.value = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// send message to server
|
// send message to server
|
||||||
const chat = async (msg) => {
|
const chat = async (msg) => {
|
||||||
if (controller.value) {
|
if (controller.value) {
|
||||||
console.log('already running...');
|
console.log('already running...');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
controller.value = new AbortController();
|
transcript.value = [...transcript.value, ['{{user}}', msg]];
|
||||||
|
|
||||||
const history = [...transcript.value, ['{{user}}', msg]];
|
const payload = template(chatTemplate.value, {
|
||||||
transcript.value = history;
|
|
||||||
|
|
||||||
let additionalParams = {
|
|
||||||
message: msg,
|
message: msg,
|
||||||
history: history.flatMap(([name, msg]) => `${name}: ${msg}`).join("\n"),
|
history: transcript.value.flatMap(([name, msg]) => `${name}: ${msg}`).join("\n"),
|
||||||
}
|
});
|
||||||
|
|
||||||
const payload = template(chatTemplate.value, additionalParams)
|
let currentMessage = '';
|
||||||
|
let history = transcript.value;
|
||||||
|
|
||||||
let currentMessage = "";
|
llamaCompletionStream({
|
||||||
await fetchEventSource('/completion', {
|
prompt: payload,
|
||||||
method: 'POST',
|
onmessage: (message) => {
|
||||||
signal: controller.value.signal,
|
const data = message.data;
|
||||||
body: JSON.stringify({
|
|
||||||
stream: true,
|
|
||||||
prompt: payload,
|
|
||||||
n_predict: parseInt(nPredict.value),
|
|
||||||
temperature: parseFloat(temperature.value),
|
|
||||||
stop: ["</s>", template("{{bot}}:"), template("{{user}}:")]
|
|
||||||
}),
|
|
||||||
onmessage(e) {
|
|
||||||
const data = JSON.parse(e.data);
|
|
||||||
currentMessage += data.content;
|
currentMessage += data.content;
|
||||||
|
|
||||||
|
transcript.value = [...history,["{{bot}}", currentMessage]];
|
||||||
|
|
||||||
if (data.stop) {
|
if (data.stop) {
|
||||||
console.log("-->", data, ' response was:', currentMessage, 'transcript state:', transcript.value);
|
console.log("-->", data, ' response was:', currentMessage, 'transcript state:', transcript.value);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
transcript.value = [...history, ['{{bot}}', currentMessage]]
|
|
||||||
return true;
|
|
||||||
},
|
|
||||||
onclose(e) {
|
|
||||||
controller.value = null;
|
|
||||||
return false;
|
|
||||||
},
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user