mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-02-01 14:42:29 +01:00
server : fix token duplication when streaming with stop strings (#10997)
This commit is contained in:
parent
d79d8f39b4
commit
16cdce7b68
@ -1856,6 +1856,8 @@ struct server_context {
|
|||||||
result.text_to_send = slot.generated_text.substr(pos, std::string::npos);
|
result.text_to_send = slot.generated_text.substr(pos, std::string::npos);
|
||||||
slot.n_sent_text += result.text_to_send.size();
|
slot.n_sent_text += result.text_to_send.size();
|
||||||
// add the token to slot queue and cache
|
// add the token to slot queue and cache
|
||||||
|
} else {
|
||||||
|
result.text_to_send = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
slot.add_token(result);
|
slot.add_token(result);
|
||||||
|
Loading…
Reference in New Issue
Block a user