mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-22 17:49:10 +01:00
328 lines
8.5 KiB
HTML
328 lines
8.5 KiB
HTML
<html>
|
|
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
<title>llama.cpp - chat</title>
|
|
|
|
<style>
|
|
#container {
|
|
max-width: 80rem;
|
|
margin: 4em auto;
|
|
}
|
|
|
|
main {
|
|
border: 1px solid #ddd;
|
|
padding: 1em;
|
|
}
|
|
|
|
#chat {
|
|
height: 50vh;
|
|
overflow-y: auto;
|
|
}
|
|
|
|
body {
|
|
max-width: 650px;
|
|
line-height: 1.2;
|
|
font-size: 16px;
|
|
margin: 0 auto;
|
|
}
|
|
|
|
p {
|
|
overflow-wrap: break-word;
|
|
word-wrap: break-word;
|
|
hyphens: auto;
|
|
margin-top: 0.5em;
|
|
margin-bottom: 0.5em;
|
|
}
|
|
|
|
form {
|
|
margin: 1em 0 0 0;
|
|
display: flex;
|
|
gap: 0.5em;
|
|
flex-direction: row;
|
|
align-items: center;
|
|
}
|
|
|
|
form > * {
|
|
padding: 4px;
|
|
}
|
|
|
|
form input {
|
|
flex-grow: 1;
|
|
}
|
|
|
|
fieldset {
|
|
width: 100%;
|
|
padding: 1em;
|
|
}
|
|
|
|
fieldset label {
|
|
margin: 0.5em 0;
|
|
display: block;
|
|
}
|
|
</style>
|
|
|
|
|
|
<script type="module">
|
|
import {
|
|
html, h, signal, effect, computed, render, useSignal, useEffect, useRef
|
|
} from '/index.js';
|
|
|
|
const transcript = signal([])
|
|
const chatStarted = computed(() => transcript.value.length > 0)
|
|
|
|
const chatTemplate = signal("{{prompt}}\n\n{{history}}\n{{bot}}:")
|
|
const settings = signal({
|
|
prompt: "This is a conversation between user and llama, a friendly chatbot.",
|
|
bot: "llama",
|
|
user: "User"
|
|
})
|
|
|
|
const temperature = signal(0.2)
|
|
const nPredict = signal(80)
|
|
const controller = signal(null)
|
|
const generating = computed(() => controller.value == null )
|
|
|
|
// simple template replace
|
|
const template = (str, map) => {
|
|
let params = settings.value;
|
|
if (map) {
|
|
params = { ...params, ...map };
|
|
}
|
|
return String(str).replaceAll(/\{\{(.*?)\}\}/g, (_, key) => template(params[key]));
|
|
}
|
|
|
|
const llamaCompletionStream = async (params) => {
|
|
controller.value = new AbortController();
|
|
const sig = controller.value.signal;
|
|
|
|
const data = JSON.stringify({
|
|
stream: true,
|
|
prompt: params.prompt,
|
|
n_predict: parseInt(nPredict.value),
|
|
temperature: parseFloat(temperature.value),
|
|
stop: ["</s>", template("{{bot}}:"), template("{{user}}:")]
|
|
});
|
|
|
|
// we use fetch directly here becasue the built in fetchEventSource does not support POST
|
|
const response = await fetch("/completion", {
|
|
method: 'POST',
|
|
body: data,
|
|
headers: {
|
|
'Connection': 'keep-alive',
|
|
'Content-Type': 'application/json',
|
|
'Accept': 'text/event-stream'
|
|
},
|
|
signal: sig,
|
|
});
|
|
|
|
const reader = response.body.getReader();
|
|
const decoder = new TextDecoder();
|
|
|
|
try {
|
|
|
|
while (true) {
|
|
const result = await reader.read();
|
|
if (result.done) {
|
|
break;
|
|
}
|
|
|
|
// sse answers in the form multiple lines of: value\n with data always present as a key. in our case we
|
|
// mainly care about the data: key here, which we expect as json
|
|
const text = decoder.decode(result.value);
|
|
|
|
// parse all sse events and add them to result
|
|
const regex = /^(\S+):\s(.*)$/gm;
|
|
for (const match of text.matchAll(regex)) {
|
|
result[match[1]] = match[2]
|
|
}
|
|
|
|
// since we know this is llama.cpp, let's just decode the json in data
|
|
result.data = JSON.parse(result.data);
|
|
|
|
// callack
|
|
params.onmessage(result);
|
|
|
|
// if we got a stop token from server, we will break here
|
|
if (result.data.stop) {
|
|
break;
|
|
}
|
|
}
|
|
} catch (e) {
|
|
console.error(e);
|
|
throw e;
|
|
}
|
|
finally {
|
|
controller.value.abort();
|
|
controller.value = null;
|
|
}
|
|
}
|
|
|
|
|
|
// send message to server
|
|
const chat = async (msg) => {
|
|
if (controller.value) {
|
|
console.log('already running...');
|
|
return;
|
|
}
|
|
transcript.value = [...transcript.value, ['{{user}}', msg]];
|
|
|
|
const payload = template(chatTemplate.value, {
|
|
message: msg,
|
|
history: transcript.value.flatMap(([name, msg]) => `${name}: ${msg}`).join("\n"),
|
|
});
|
|
|
|
let currentMessage = '';
|
|
let history = transcript.value;
|
|
|
|
llamaCompletionStream({
|
|
prompt: payload,
|
|
onmessage: (message) => {
|
|
const data = message.data;
|
|
currentMessage += data.content;
|
|
|
|
transcript.value = [...history,["{{bot}}", currentMessage]];
|
|
|
|
if (data.stop) {
|
|
console.log("-->", data, ' response was:', currentMessage, 'transcript state:', transcript.value);
|
|
}
|
|
}
|
|
});
|
|
}
|
|
|
|
function MessageInput() {
|
|
const message = useSignal("")
|
|
|
|
const stop = (e) => {
|
|
e.preventDefault();
|
|
if (controller.value) {
|
|
controller.value.abort();
|
|
controller.value = null;
|
|
}
|
|
}
|
|
|
|
const reset = (e) => {
|
|
stop(e);
|
|
transcript.value = [];
|
|
}
|
|
|
|
const submit = (e) => {
|
|
stop(e);
|
|
chat(message.value);
|
|
message.value = "";
|
|
}
|
|
|
|
return html`
|
|
<form onsubmit=${submit}>
|
|
<input type="text" value="${message}" oninput=${(e) => message.value = e.target.value} autofocus placeholder="Chat here..."/>
|
|
<button type="submit" disabled=${!generating.value} >Send</button>
|
|
<button onclick=${stop} disabled=${generating}>Stop</button>
|
|
<button onclick=${reset}>Reset</button>
|
|
</form>
|
|
`
|
|
}
|
|
|
|
const ChatLog = (props) => {
|
|
const messages = transcript.value;
|
|
const container = useRef(null)
|
|
|
|
useEffect(() => {
|
|
// scroll to bottom (if needed)
|
|
if (container.current && container.current.scrollHeight <= container.current.scrollTop + container.current.offsetHeight + 100) {
|
|
container.current.scrollTo(0, container.current.scrollHeight)
|
|
}
|
|
}, [messages])
|
|
|
|
const chatLine = ([user, msg]) => {
|
|
return html`<p key=${msg}><strong>${template(user)}:</strong> ${template(msg)}</p>`
|
|
};
|
|
|
|
return html`
|
|
<section id="chat" ref=${container}>
|
|
${messages.flatMap((m) => chatLine(m))}
|
|
</section>`;
|
|
};
|
|
|
|
const ConfigForm = (props) => {
|
|
|
|
return html`
|
|
<form>
|
|
<fieldset>
|
|
<legend>Settings</legend>
|
|
|
|
<div>
|
|
<label for="prompt">Prompt</label>
|
|
<textarea type="text" id="prompt" value="${settings.value.prompt}" oninput=${(e) => settings.value.prompt = e.target.value} rows="3" cols="60" />
|
|
</div>
|
|
|
|
<div>
|
|
<label for="user">User name</label>
|
|
<input type="text" id="user" value="${settings.value.user}" oninput=${(e) => settings.value.user = e.target.value} />
|
|
</div>
|
|
|
|
<div>
|
|
<label for="bot">Bot name</label>
|
|
<input type="text" id="bot" value="${settings.value.bot}" oninput=${(e) => settings.value.bot = e.target.value} />
|
|
</div>
|
|
|
|
<div>
|
|
<label for="template">Prompt template</label>
|
|
<textarea id="template" value="${chatTemplate}" oninput=${(e) => chatTemplate.value = e.target.value} rows="8" cols="60" />
|
|
</div>
|
|
|
|
<div>
|
|
<label for="temperature">Temperature</label>
|
|
<input type="range" id="temperature" min="0.0" max="1.0" step="0.01" value="${temperature.value}" oninput=${(e) => temperature.value = e.target.value} />
|
|
<span>${temperature}</span>
|
|
</div>
|
|
|
|
<div>
|
|
<label for="nPredict">Predictions</label>
|
|
<input type="range" id="nPredict" min="1" max="2048" step="1" value="${nPredict.value}" oninput=${(e) => nPredict.value = e.target.value} />
|
|
<span>${nPredict}</span>
|
|
</div>
|
|
</fieldset>
|
|
|
|
</form>
|
|
`
|
|
|
|
}
|
|
|
|
function App(props) {
|
|
|
|
return html`
|
|
<div id="container">
|
|
<header>
|
|
<h1>llama.cpp</h1>
|
|
</header>
|
|
|
|
<main>
|
|
<section class="chat">
|
|
<${chatStarted.value ? ChatLog : ConfigForm
|
|
} />
|
|
</section>
|
|
|
|
<hr/>
|
|
|
|
<section class="chat">
|
|
<${MessageInput} />
|
|
</section>
|
|
</main>
|
|
|
|
<footer>
|
|
<p>Powered by <a href="https://github.com/ggerganov/llama.cpp">llama.cpp</a> and <a href="https://ggml.ai">ggml.ai</a></p>
|
|
</footer>
|
|
</div>
|
|
`;
|
|
}
|
|
|
|
render(h(App), document.body);
|
|
</script>
|
|
</head>
|
|
|
|
<body>
|
|
</body>
|
|
|
|
</html>
|