From 33d7b70c881b1f9ebabd381daf3ad9cb283ce028 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 3 Dec 2024 10:58:43 +0200 Subject: [PATCH] server : do not speculate during prompt processing ggml-ci --- examples/server/server.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 8eca14b86..b91671de6 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2322,6 +2322,10 @@ struct server_context { continue; } + if (slot.state != SLOT_STATE_GENERATING) { + continue; + } + llama_token id = slot.sampled; struct common_speculative_params params_spec;