From e8b8d32e8663ffc55a02c9721af3a5190382cbb0 Mon Sep 17 00:00:00 2001 From: Jhen-Jie Hong Date: Thu, 5 Oct 2023 09:02:55 -0500 Subject: [PATCH] server : fix incorrect num_tokens_predicted (#3480) --- examples/server/server.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 921eb5da4..6e31e1332 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -504,9 +504,11 @@ struct llama_server_context }); } + bool tg = true; while (n_past < embd.size()) { int n_eval = (int)embd.size() - n_past; + tg = n_eval == 1; if (n_eval > params.n_batch) { n_eval = params.n_batch; @@ -633,7 +635,9 @@ struct llama_server_context last_n_tokens.erase(last_n_tokens.begin()); last_n_tokens.push_back(result.tok); - num_tokens_predicted++; + if (tg) { + num_tokens_predicted++; + } } // add it to the context @@ -1124,8 +1128,6 @@ static json format_timings(llama_server_context &llama) { const auto timings = llama_get_timings(llama.ctx); - assert(timings.n_eval == ptrdiff_t(llama.num_tokens_predicted)); - return json{ {"prompt_n", timings.n_p_eval}, {"prompt_ms", timings.t_p_eval_ms},