server : handle models with missing EOS token (#8997)

ggml-ci
This commit is contained in:
Georgi Gerganov 2024-08-12 10:21:50 +03:00 committed by GitHub
parent 4134999e01
commit 5ef07e25ac
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -631,6 +631,7 @@ struct server_context {
bool clean_kv_cache = true; bool clean_kv_cache = true;
bool add_bos_token = true; bool add_bos_token = true;
bool has_eos_token = false;
int32_t n_ctx; // total context for all clients / slots int32_t n_ctx; // total context for all clients / slots
@ -693,7 +694,7 @@ struct server_context {
n_ctx = llama_n_ctx(ctx); n_ctx = llama_n_ctx(ctx);
add_bos_token = llama_should_add_bos_token(model); add_bos_token = llama_should_add_bos_token(model);
GGML_ASSERT(llama_add_eos_token(model) != 1); has_eos_token = llama_add_eos_token(model) != 1;
return true; return true;
} }
@ -1031,7 +1032,7 @@ struct server_context {
{ {
slot.sparams.logit_bias.clear(); slot.sparams.logit_bias.clear();
if (json_value(data, "ignore_eos", false)) { if (json_value(data, "ignore_eos", false) && has_eos_token) {
slot.sparams.logit_bias[llama_token_eos(model)] = -INFINITY; slot.sparams.logit_bias[llama_token_eos(model)] = -INFINITY;
} }