server : handle models with missing EOS token (#8997)

ggml-ci
2024-12-26 14:20:31 +01:00 · 2024-08-12 10:21:50 +03:00 · 2024-08-12 10:21:50 +03:00 · 5ef07e25ac
commit 5ef07e25ac
parent 4134999e01
1 changed files with 3 additions and 2 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -631,6 +631,7 @@ struct server_context {

    bool clean_kv_cache = true;
    bool add_bos_token  = true;
+    bool has_eos_token  = false;

    int32_t n_ctx; // total context for all clients / slots

@ -693,7 +694,7 @@ struct server_context {
        n_ctx = llama_n_ctx(ctx);

        add_bos_token = llama_should_add_bos_token(model);
-        GGML_ASSERT(llama_add_eos_token(model) != 1);
+        has_eos_token = llama_add_eos_token(model) != 1;

        return true;
    }
@ -1031,7 +1032,7 @@ struct server_context {
        {
            slot.sparams.logit_bias.clear();

-            if (json_value(data, "ignore_eos", false)) {
+            if (json_value(data, "ignore_eos", false) && has_eos_token) {
                slot.sparams.logit_bias[llama_token_eos(model)] = -INFINITY;
            }