mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-11 21:10:24 +01:00
Fixing race condition in server and partial stream handling in frontend. (#2391)
* Fixing race condition in server.cpp and partial stream handling in completion.js * Reverting assert edits. * Adding newline to eof
This commit is contained in:
parent
415e99fec2
commit
5f631c2679
@ -43,6 +43,7 @@ export async function* llama(prompt, params = {}, config = {}) {
|
|||||||
const decoder = new TextDecoder();
|
const decoder = new TextDecoder();
|
||||||
|
|
||||||
let content = "";
|
let content = "";
|
||||||
|
let leftover = ""; // Buffer for partially read lines
|
||||||
|
|
||||||
try {
|
try {
|
||||||
let cont = true;
|
let cont = true;
|
||||||
@ -53,29 +54,47 @@ export async function* llama(prompt, params = {}, config = {}) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// sse answers in the form multiple lines of: value\n with data always present as a key. in our case we
|
// Add any leftover data to the current chunk of data
|
||||||
// mainly care about the data: key here, which we expect as json
|
const text = leftover + decoder.decode(result.value);
|
||||||
const text = decoder.decode(result.value);
|
|
||||||
|
|
||||||
// parse all sse events and add them to result
|
// Check if the last character is a line break
|
||||||
const regex = /^(\S+):\s(.*)$/gm;
|
const endsWithLineBreak = text.endsWith('\n');
|
||||||
for (const match of text.matchAll(regex)) {
|
|
||||||
result[match[1]] = match[2]
|
// Split the text into lines
|
||||||
|
let lines = text.split('\n');
|
||||||
|
|
||||||
|
// If the text doesn't end with a line break, then the last line is incomplete
|
||||||
|
// Store it in leftover to be added to the next chunk of data
|
||||||
|
if (!endsWithLineBreak) {
|
||||||
|
leftover = lines.pop();
|
||||||
|
} else {
|
||||||
|
leftover = ""; // Reset leftover if we have a line break at the end
|
||||||
}
|
}
|
||||||
|
|
||||||
// since we know this is llama.cpp, let's just decode the json in data
|
// Parse all sse events and add them to result
|
||||||
result.data = JSON.parse(result.data);
|
const regex = /^(\S+):\s(.*)$/gm;
|
||||||
content += result.data.content;
|
for (const line of lines) {
|
||||||
|
const match = regex.exec(line);
|
||||||
|
if (match) {
|
||||||
|
result[match[1]] = match[2]
|
||||||
|
// since we know this is llama.cpp, let's just decode the json in data
|
||||||
|
if (result.data) {
|
||||||
|
result.data = JSON.parse(result.data);
|
||||||
|
content += result.data.content;
|
||||||
|
|
||||||
// yield
|
// yield
|
||||||
yield result;
|
yield result;
|
||||||
|
|
||||||
// if we got a stop token from server, we will break here
|
// if we got a stop token from server, we will break here
|
||||||
if (result.data.stop) {
|
if (result.data.stop) {
|
||||||
if (result.data.generation_settings) {
|
if (result.data.generation_settings) {
|
||||||
generation_settings = result.data.generation_settings;
|
generation_settings = result.data.generation_settings;
|
||||||
|
}
|
||||||
|
cont = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
|
@ -1274,7 +1274,11 @@ int main(int argc, char **argv)
|
|||||||
sink.done();
|
sink.done();
|
||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
res.set_chunked_content_provider("text/event-stream", chunked_content_provider);
|
const auto on_complete = [&](bool) {
|
||||||
|
llama.mutex.unlock();
|
||||||
|
};
|
||||||
|
lock.release();
|
||||||
|
res.set_chunked_content_provider("text/event-stream", chunked_content_provider, on_complete);
|
||||||
} });
|
} });
|
||||||
|
|
||||||
svr.Get("/model.json", [&llama](const Request &, Response &res)
|
svr.Get("/model.json", [&llama](const Request &, Response &res)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user