server : fix token duplication when streaming with stop strings (#10997)

This commit is contained in:
Alexey Parfenov 2024-12-28 15:08:54 +00:00 committed by GitHub
parent d79d8f39b4
commit 16cdce7b68
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1856,6 +1856,8 @@ struct server_context {
result.text_to_send = slot.generated_text.substr(pos, std::string::npos);
slot.n_sent_text += result.text_to_send.size();
// add the token to slot queue and cache
} else {
result.text_to_send = "";
}
slot.add_token(result);