server : fix the disappearance of the end of the text (#9867)

* server: fix the disappearance of the end of the text when streaming with stop strings

* simplify "send text" checks
This commit is contained in:
Alexey Parfenov 2024-10-16 08:35:53 +00:00 committed by GitHub
parent 0e41b300ed
commit 1f66b699c4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1090,22 +1090,21 @@ struct server_context {
size_t pos = std::min(slot.n_sent_text, slot.generated_text.size()); size_t pos = std::min(slot.n_sent_text, slot.generated_text.size());
const std::string str_test = slot.generated_text.substr(pos); const std::string str_test = slot.generated_text.substr(pos);
bool is_stop_full = false; bool send_text = true;
size_t stop_pos = slot.find_stopping_strings(str_test, token_str.size(), STOP_TYPE_FULL); size_t stop_pos = slot.find_stopping_strings(str_test, token_str.size(), STOP_TYPE_FULL);
if (stop_pos != std::string::npos) { if (stop_pos != std::string::npos) {
is_stop_full = true;
slot.generated_text.erase( slot.generated_text.erase(
slot.generated_text.begin() + pos + stop_pos, slot.generated_text.begin() + pos + stop_pos,
slot.generated_text.end()); slot.generated_text.end());
pos = std::min(slot.n_sent_text, slot.generated_text.size()); pos = std::min(slot.n_sent_text, slot.generated_text.size());
} else { } else if (slot.has_next_token) {
is_stop_full = false;
stop_pos = slot.find_stopping_strings(str_test, token_str.size(), STOP_TYPE_PARTIAL); stop_pos = slot.find_stopping_strings(str_test, token_str.size(), STOP_TYPE_PARTIAL);
send_text = stop_pos == std::string::npos;
} }
// check if there is any token to predict // check if there is any token to predict
if (stop_pos == std::string::npos || (!slot.has_next_token && !is_stop_full && stop_pos > 0)) { if (send_text) {
// no send the stop word in the response // no send the stop word in the response
result.text_to_send = slot.generated_text.substr(pos, std::string::npos); result.text_to_send = slot.generated_text.substr(pos, std::string::npos);
slot.n_sent_text += result.text_to_send.size(); slot.n_sent_text += result.text_to_send.size();