mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-07 11:23:56 +01:00
embed index and add --path for choosing static dir
This commit is contained in:
parent
e3fba85d14
commit
022bf2bb48
16
examples/server/deps.sh
Executable file
16
examples/server/deps.sh
Executable file
@ -0,0 +1,16 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Download and update deps for binary
|
||||||
|
|
||||||
|
# get the directory of this script file
|
||||||
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||||
|
PUBLIC=$DIR/public
|
||||||
|
|
||||||
|
curl https://npm.reversehttp.com/@preact/signals-core,@preact/signals,htm/preact,preact,preact/hooks,@microsoft/fetch-event-source > $PUBLIC/index.js
|
||||||
|
|
||||||
|
echo "// Generated file, run deps.sh to update. Do not edit directly
|
||||||
|
R\"htmlraw($(cat $PUBLIC/index.html))htmlraw\"
|
||||||
|
" > $DIR/index.html.cpp
|
||||||
|
|
||||||
|
echo "// Generated file, run deps.sh to update. Do not edit directly
|
||||||
|
R\"jsraw($(cat $PUBLIC/index.js))jsraw\"
|
||||||
|
" > $DIR/index.js.cpp
|
278
examples/server/index.html.cpp
Normal file
278
examples/server/index.html.cpp
Normal file
@ -0,0 +1,278 @@
|
|||||||
|
// Generated file, run deps.sh to update. Do not edit directly
|
||||||
|
R"htmlraw(<html>
|
||||||
|
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>llama.cpp - chat</title>
|
||||||
|
|
||||||
|
<style>
|
||||||
|
#container {
|
||||||
|
max-width: 80rem;
|
||||||
|
margin: 4em auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
main {
|
||||||
|
border: 1px solid #ddd;
|
||||||
|
padding: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
#chat {
|
||||||
|
height: 50vh;
|
||||||
|
overflow-y: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
body {
|
||||||
|
max-width: 650px;
|
||||||
|
line-height: 1.2;
|
||||||
|
font-size: 16px;
|
||||||
|
margin: 0 auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
p {
|
||||||
|
overflow-wrap: break-word;
|
||||||
|
word-wrap: break-word;
|
||||||
|
hyphens: auto;
|
||||||
|
margin-top: 0.5em;
|
||||||
|
margin-bottom: 0.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
form {
|
||||||
|
margin: 1em 0 0 0;
|
||||||
|
display: flex;
|
||||||
|
gap: 0.5em;
|
||||||
|
flex-direction: row;
|
||||||
|
align-items: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
form > * {
|
||||||
|
padding: 4px;
|
||||||
|
}
|
||||||
|
|
||||||
|
form input {
|
||||||
|
flex-grow: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
fieldset {
|
||||||
|
width: 100%;
|
||||||
|
padding: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
fieldset label {
|
||||||
|
margin: 0.5em 0;
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
|
||||||
|
|
||||||
|
<script type="module">
|
||||||
|
import {
|
||||||
|
html, h, signal, effect, computed, render, useSignal, useEffect, useRef, fetchEventSource
|
||||||
|
} from '/index.js';
|
||||||
|
|
||||||
|
const transcript = signal([])
|
||||||
|
const chatStarted = computed(() => transcript.value.length > 0)
|
||||||
|
|
||||||
|
const chatTemplate = signal("{{prompt}}\n\n{{history}}\n{{bot}}:")
|
||||||
|
const settings = signal({
|
||||||
|
prompt: "This is a conversation between user and llama, a friendly chatbot.",
|
||||||
|
bot: "llama",
|
||||||
|
user: "User"
|
||||||
|
})
|
||||||
|
|
||||||
|
const temperature = signal(0.2)
|
||||||
|
const nPredict = signal(80)
|
||||||
|
const controller = signal(null)
|
||||||
|
const generating = computed(() => controller.value == null )
|
||||||
|
|
||||||
|
// simple template replace
|
||||||
|
const template = (str, map) => {
|
||||||
|
let params = settings.value;
|
||||||
|
if (map) {
|
||||||
|
params = { ...params, ...map };
|
||||||
|
}
|
||||||
|
return String(str).replaceAll(/\{\{(.*?)\}\}/g, (_, key) => template(params[key]));
|
||||||
|
}
|
||||||
|
|
||||||
|
// send message to server
|
||||||
|
const chat = async (msg) => {
|
||||||
|
if (controller.value) {
|
||||||
|
console.log('already running...');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
controller.value = new AbortController();
|
||||||
|
|
||||||
|
const history = [...transcript.value, ['{{user}}', msg]];
|
||||||
|
transcript.value = history;
|
||||||
|
|
||||||
|
let additionalParams = {
|
||||||
|
message: msg,
|
||||||
|
history: history.flatMap(([name, msg]) => `${name}: ${msg}`).join("\n"),
|
||||||
|
}
|
||||||
|
|
||||||
|
const payload = template(chatTemplate.value, additionalParams)
|
||||||
|
|
||||||
|
let currentMessage = "";
|
||||||
|
await fetchEventSource('/completion', {
|
||||||
|
method: 'POST',
|
||||||
|
signal: controller.value.signal,
|
||||||
|
body: JSON.stringify({
|
||||||
|
stream: true,
|
||||||
|
prompt: payload,
|
||||||
|
n_predict: parseInt(nPredict.value),
|
||||||
|
temperature: parseFloat(temperature.value),
|
||||||
|
stop: ["</s>", template("{{bot}}:"), template("{{user}}:")]
|
||||||
|
}),
|
||||||
|
onmessage(e) {
|
||||||
|
const data = JSON.parse(e.data);
|
||||||
|
currentMessage += data.content;
|
||||||
|
|
||||||
|
if (data.stop) {
|
||||||
|
console.log("-->", data, ' response was:', currentMessage, 'transcript state:', transcript.value);
|
||||||
|
}
|
||||||
|
|
||||||
|
transcript.value = [...history, ['{{bot}}', currentMessage]]
|
||||||
|
return true;
|
||||||
|
},
|
||||||
|
onclose(e) {
|
||||||
|
controller.value = null;
|
||||||
|
return false;
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function MessageInput() {
|
||||||
|
const message = useSignal("")
|
||||||
|
|
||||||
|
const stop = (e) => {
|
||||||
|
e.preventDefault();
|
||||||
|
if (controller.value) {
|
||||||
|
controller.value.abort();
|
||||||
|
controller.value = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const reset = (e) => {
|
||||||
|
stop(e);
|
||||||
|
transcript.value = [];
|
||||||
|
}
|
||||||
|
|
||||||
|
const submit = (e) => {
|
||||||
|
stop(e);
|
||||||
|
chat(message.value);
|
||||||
|
message.value = "";
|
||||||
|
}
|
||||||
|
|
||||||
|
return html`
|
||||||
|
<form onsubmit=${submit}>
|
||||||
|
<input type="text" value="${message}" oninput=${(e) => message.value = e.target.value} autofocus placeholder="Chat here..."/>
|
||||||
|
<button type="submit" disabled=${!generating.value} >Send</button>
|
||||||
|
<button onclick=${(e) => stop(e)} disabled=${generating.value}>Stop</button>
|
||||||
|
<button onclick=${(e) => reset(e)}>Reset</button>
|
||||||
|
</form>
|
||||||
|
`
|
||||||
|
}
|
||||||
|
|
||||||
|
const ChatLog = (props) => {
|
||||||
|
const messages = transcript.value;
|
||||||
|
const container = useRef(null)
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
// scroll to bottom (if needed)
|
||||||
|
if (container.current && container.current.scrollHeight <= container.current.scrollTop + container.current.offsetHeight + 100) {
|
||||||
|
container.current.scrollTo(0, container.current.scrollHeight)
|
||||||
|
}
|
||||||
|
}, [messages])
|
||||||
|
|
||||||
|
const chatLine = ([user, msg]) => {
|
||||||
|
return html`<p><strong>${template(user, {})}:</strong> ${template(msg, {})}</p>`
|
||||||
|
};
|
||||||
|
|
||||||
|
return html`
|
||||||
|
<section id="chat" ref=${container}>
|
||||||
|
${messages.flatMap((m) => chatLine(m))}
|
||||||
|
</section>`;
|
||||||
|
};
|
||||||
|
|
||||||
|
const ConfigForm = (props) => {
|
||||||
|
|
||||||
|
return html`
|
||||||
|
<form>
|
||||||
|
<fieldset>
|
||||||
|
<legend>Settings</legend>
|
||||||
|
|
||||||
|
<div>
|
||||||
|
<label for="prompt">Prompt</label>
|
||||||
|
<textarea type="text" id="prompt" value="${settings.value.prompt}" oninput=${(e) => settings.value.prompt = e.target.value} rows="3" cols="60" />
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div>
|
||||||
|
<label for="user">User name</label>
|
||||||
|
<input type="text" id="user" value="${settings.value.user}" oninput=${(e) => settings.value.user = e.target.value} />
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div>
|
||||||
|
<label for="bot">Bot name</label>
|
||||||
|
<input type="text" id="bot" value="${settings.value.bot}" oninput=${(e) => settings.value.bot = e.target.value} />
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div>
|
||||||
|
<label for="template">Prompt template</label>
|
||||||
|
<textarea id="template" value="${chatTemplate}" oninput=${(e) => chatTemplate.value = e.target.value} rows="8" cols="60" />
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div>
|
||||||
|
<label for="temperature">Temperature</label>
|
||||||
|
<input type="range" id="temperature" min="0.0" max="1.0" step="0.01" value="${temperature.value}" oninput=${(e) => temperature.value = e.target.value} />
|
||||||
|
<span>${temperature}</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div>
|
||||||
|
<label for="nPredict">Predictions</label>
|
||||||
|
<input type="range" id="nPredict" min="1" max="2048" step="1" value="${nPredict.value}" oninput=${(e) => nPredict.value = e.target.value} />
|
||||||
|
<span>${nPredict}</span>
|
||||||
|
</div>
|
||||||
|
</fieldset>
|
||||||
|
|
||||||
|
</form>
|
||||||
|
`
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
function App(props) {
|
||||||
|
|
||||||
|
return html`
|
||||||
|
<div id="container">
|
||||||
|
<header>
|
||||||
|
<h1>llama.cpp</h1>
|
||||||
|
</header>
|
||||||
|
|
||||||
|
<main>
|
||||||
|
<section class="chat">
|
||||||
|
<${chatStarted.value ? ChatLog : ConfigForm
|
||||||
|
} />
|
||||||
|
</section >
|
||||||
|
|
||||||
|
<hr/>
|
||||||
|
|
||||||
|
<section class="chat">
|
||||||
|
<${MessageInput} />
|
||||||
|
</section>
|
||||||
|
|
||||||
|
</main >
|
||||||
|
<footer>
|
||||||
|
<p>Powered by <a href="https://github.com/ggerganov/llama.cpp">llama.cpp</a> and <a href="https://ggml.ai">ggml.ai</a></p>
|
||||||
|
</footer>
|
||||||
|
</div>
|
||||||
|
`;
|
||||||
|
}
|
||||||
|
|
||||||
|
render(h(App), document.body);
|
||||||
|
</script>
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
</html>)htmlraw"
|
||||||
|
|
1
examples/server/index.js
Normal file
1
examples/server/index.js
Normal file
File diff suppressed because one or more lines are too long
3
examples/server/index.js.cpp
Normal file
3
examples/server/index.js.cpp
Normal file
File diff suppressed because one or more lines are too long
@ -67,7 +67,7 @@
|
|||||||
<script type="module">
|
<script type="module">
|
||||||
import {
|
import {
|
||||||
html, h, signal, effect, computed, render, useSignal, useEffect, useRef, fetchEventSource
|
html, h, signal, effect, computed, render, useSignal, useEffect, useRef, fetchEventSource
|
||||||
} from 'https://npm.reversehttp.com/@preact/signals-core,@preact/signals,htm/preact,preact,preact/hooks,@microsoft/fetch-event-source';
|
} from '/index.js';
|
||||||
|
|
||||||
const transcript = signal([])
|
const transcript = signal([])
|
||||||
const chatStarted = computed(() => transcript.value.length > 0)
|
const chatStarted = computed(() => transcript.value.length > 0)
|
1
examples/server/public/index.js
Normal file
1
examples/server/public/index.js
Normal file
File diff suppressed because one or more lines are too long
@ -12,6 +12,15 @@
|
|||||||
#include "httplib.h"
|
#include "httplib.h"
|
||||||
#include "json.hpp"
|
#include "json.hpp"
|
||||||
|
|
||||||
|
// auto generated files (update with ./deps.sh)
|
||||||
|
const char* indexHtml =
|
||||||
|
#include "index.html.cpp"
|
||||||
|
;
|
||||||
|
const char* indexJs =
|
||||||
|
#include "index.js.cpp"
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
#ifndef SERVER_VERBOSE
|
#ifndef SERVER_VERBOSE
|
||||||
#define SERVER_VERBOSE 1
|
#define SERVER_VERBOSE 1
|
||||||
#endif
|
#endif
|
||||||
@ -21,9 +30,11 @@ using json = nlohmann::json;
|
|||||||
|
|
||||||
struct server_params {
|
struct server_params {
|
||||||
std::string hostname = "127.0.0.1";
|
std::string hostname = "127.0.0.1";
|
||||||
|
std::string public_path = "examples/server/public";
|
||||||
int32_t port = 8080;
|
int32_t port = 8080;
|
||||||
int32_t read_timeout = 600;
|
int32_t read_timeout = 600;
|
||||||
int32_t write_timeout = 600;
|
int32_t write_timeout = 600;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// completion token output with probabilities
|
// completion token output with probabilities
|
||||||
@ -539,6 +550,7 @@ static void server_print_usage(const char * argv0, const gpt_params & params,
|
|||||||
fprintf(stderr, " --lora-base FNAME optional model to use as a base for the layers modified by the LoRA adapter\n");
|
fprintf(stderr, " --lora-base FNAME optional model to use as a base for the layers modified by the LoRA adapter\n");
|
||||||
fprintf(stderr, " --host ip address to listen (default (default: %s)\n", sparams.hostname.c_str());
|
fprintf(stderr, " --host ip address to listen (default (default: %s)\n", sparams.hostname.c_str());
|
||||||
fprintf(stderr, " --port PORT port to listen (default (default: %d)\n", sparams.port);
|
fprintf(stderr, " --port PORT port to listen (default (default: %d)\n", sparams.port);
|
||||||
|
fprintf(stderr, " --path PUBLIC_PATH path from which to serve static files (default %s)\n", sparams.public_path.c_str());
|
||||||
fprintf(stderr, " -to N, --timeout N server read/write timeout in seconds (default: %d)\n", sparams.read_timeout);
|
fprintf(stderr, " -to N, --timeout N server read/write timeout in seconds (default: %d)\n", sparams.read_timeout);
|
||||||
fprintf(stderr, " --embedding enable embedding vector output (default: %s)\n", params.embedding ? "enabled" : "disabled");
|
fprintf(stderr, " --embedding enable embedding vector output (default: %s)\n", params.embedding ? "enabled" : "disabled");
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
@ -565,6 +577,12 @@ static void server_params_parse(int argc, char ** argv, server_params & sparams,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
sparams.hostname = argv[i];
|
sparams.hostname = argv[i];
|
||||||
|
} else if (arg == "--path") {
|
||||||
|
if (++i >= argc) {
|
||||||
|
invalid_param = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
sparams.public_path = argv[i];
|
||||||
} else if (arg == "--timeout" || arg == "-to") {
|
} else if (arg == "--timeout" || arg == "-to") {
|
||||||
if (++i >= argc) {
|
if (++i >= argc) {
|
||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
@ -846,7 +864,7 @@ static void log_server_request(const Request & req, const Response & res) {
|
|||||||
{ "status", res.status },
|
{ "status", res.status },
|
||||||
{ "path", req.path },
|
{ "path", req.path },
|
||||||
{ "request", req.body },
|
{ "request", req.body },
|
||||||
{ "response", res.body },
|
// { "response", res.body },
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -888,14 +906,15 @@ int main(int argc, char ** argv) {
|
|||||||
{ "Access-Control-Allow-Headers", "content-type" }
|
{ "Access-Control-Allow-Headers", "content-type" }
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// this is only called if no index.js is found in the public --path
|
||||||
|
svr.Get("/index.js", [](const Request &, Response & res) {
|
||||||
|
res.set_content(indexJs, "text/javascript");
|
||||||
|
return false;
|
||||||
|
});
|
||||||
|
|
||||||
|
// this is only called if no index.html is found in the public --path
|
||||||
svr.Get("/", [](const Request &, Response & res) {
|
svr.Get("/", [](const Request &, Response & res) {
|
||||||
// return content of server.html file
|
res.set_content(indexHtml, "text/html");
|
||||||
|
|
||||||
std::ifstream t("examples/server/server.html");
|
|
||||||
std::stringstream buffer;
|
|
||||||
buffer << t.rdbuf();
|
|
||||||
|
|
||||||
res.set_content(buffer.str(), "text/html");
|
|
||||||
return false;
|
return false;
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -1051,6 +1070,9 @@ int main(int argc, char ** argv) {
|
|||||||
svr.set_read_timeout(sparams.read_timeout);
|
svr.set_read_timeout(sparams.read_timeout);
|
||||||
svr.set_write_timeout(sparams.write_timeout);
|
svr.set_write_timeout(sparams.write_timeout);
|
||||||
|
|
||||||
|
// Set the base directory for serving static files
|
||||||
|
svr.set_base_dir(sparams.public_path);
|
||||||
|
|
||||||
if (!svr.bind_to_port(sparams.hostname, sparams.port)) {
|
if (!svr.bind_to_port(sparams.hostname, sparams.port)) {
|
||||||
LOG_ERROR("couldn't bind to server socket", {
|
LOG_ERROR("couldn't bind to server socket", {
|
||||||
{ "hostname", sparams.hostname },
|
{ "hostname", sparams.hostname },
|
||||||
@ -1059,10 +1081,9 @@ int main(int argc, char ** argv) {
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_INFO("HTTP server listening", {
|
std::cout << std::endl;
|
||||||
{ "hostname", sparams.hostname },
|
std::cout << "llama server listening at http://" << sparams.hostname << ":" << sparams.port << std::endl;
|
||||||
{ "port", sparams.port },
|
std::cout << std::endl;
|
||||||
});
|
|
||||||
|
|
||||||
if (!svr.listen_after_bind()) {
|
if (!svr.listen_after_bind()) {
|
||||||
return 1;
|
return 1;
|
||||||
|
Loading…
Reference in New Issue
Block a user