Fixing race condition in server and partial stream handling in frontend. (#2391)

* Fixing race condition in server.cpp and partial stream handling in completion.js

* Reverting assert edits.

* Adding newline to eof
This commit is contained in:
Stephen Nichols 2023-08-04 06:37:24 -05:00 committed by GitHub
parent 415e99fec2
commit 5f631c2679
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 41 additions and 18 deletions

View File

@ -43,6 +43,7 @@ export async function* llama(prompt, params = {}, config = {}) {
const decoder = new TextDecoder(); const decoder = new TextDecoder();
let content = ""; let content = "";
let leftover = ""; // Buffer for partially read lines
try { try {
let cont = true; let cont = true;
@ -53,29 +54,47 @@ export async function* llama(prompt, params = {}, config = {}) {
break; break;
} }
// sse answers in the form multiple lines of: value\n with data always present as a key. in our case we // Add any leftover data to the current chunk of data
// mainly care about the data: key here, which we expect as json const text = leftover + decoder.decode(result.value);
const text = decoder.decode(result.value);
// parse all sse events and add them to result // Check if the last character is a line break
const regex = /^(\S+):\s(.*)$/gm; const endsWithLineBreak = text.endsWith('\n');
for (const match of text.matchAll(regex)) {
result[match[1]] = match[2] // Split the text into lines
let lines = text.split('\n');
// If the text doesn't end with a line break, then the last line is incomplete
// Store it in leftover to be added to the next chunk of data
if (!endsWithLineBreak) {
leftover = lines.pop();
} else {
leftover = ""; // Reset leftover if we have a line break at the end
} }
// since we know this is llama.cpp, let's just decode the json in data // Parse all sse events and add them to result
result.data = JSON.parse(result.data); const regex = /^(\S+):\s(.*)$/gm;
content += result.data.content; for (const line of lines) {
const match = regex.exec(line);
if (match) {
result[match[1]] = match[2]
// since we know this is llama.cpp, let's just decode the json in data
if (result.data) {
result.data = JSON.parse(result.data);
content += result.data.content;
// yield // yield
yield result; yield result;
// if we got a stop token from server, we will break here // if we got a stop token from server, we will break here
if (result.data.stop) { if (result.data.stop) {
if (result.data.generation_settings) { if (result.data.generation_settings) {
generation_settings = result.data.generation_settings; generation_settings = result.data.generation_settings;
}
cont = false;
break;
}
}
} }
break;
} }
} }
} catch (e) { } catch (e) {

View File

@ -1274,7 +1274,11 @@ int main(int argc, char **argv)
sink.done(); sink.done();
return true; return true;
}; };
res.set_chunked_content_provider("text/event-stream", chunked_content_provider); const auto on_complete = [&](bool) {
llama.mutex.unlock();
};
lock.release();
res.set_chunked_content_provider("text/event-stream", chunked_content_provider, on_complete);
} }); } });
svr.Get("/model.json", [&llama](const Request &, Response &res) svr.Get("/model.json", [&llama](const Request &, Response &res)