From 5ef07e25ac39e62297a67208c5bcced50835a2dd Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 12 Aug 2024 10:21:50 +0300 Subject: [PATCH] server : handle models with missing EOS token (#8997) ggml-ci --- examples/server/server.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 360f571e4..1621c7c43 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -631,6 +631,7 @@ struct server_context { bool clean_kv_cache = true; bool add_bos_token = true; + bool has_eos_token = false; int32_t n_ctx; // total context for all clients / slots @@ -693,7 +694,7 @@ struct server_context { n_ctx = llama_n_ctx(ctx); add_bos_token = llama_should_add_bos_token(model); - GGML_ASSERT(llama_add_eos_token(model) != 1); + has_eos_token = llama_add_eos_token(model) != 1; return true; } @@ -1031,7 +1032,7 @@ struct server_context { { slot.sparams.logit_bias.clear(); - if (json_value(data, "ignore_eos", false)) { + if (json_value(data, "ignore_eos", false) && has_eos_token) { slot.sparams.logit_bias[llama_token_eos(model)] = -INFINITY; }