mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-01 08:49:00 +01:00
server : fix token duplication when streaming with stop strings (#10997)
This commit is contained in:
parent
d79d8f39b4
commit
16cdce7b68
@ -1856,6 +1856,8 @@ struct server_context {
|
||||
result.text_to_send = slot.generated_text.substr(pos, std::string::npos);
|
||||
slot.n_sent_text += result.text_to_send.size();
|
||||
// add the token to slot queue and cache
|
||||
} else {
|
||||
result.text_to_send = "";
|
||||
}
|
||||
|
||||
slot.add_token(result);
|
||||
|
Loading…
Reference in New Issue
Block a user