mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-23 21:17:54 +01:00
server : fix incorrect num_tokens_predicted (#3480)
This commit is contained in:
parent
8f3a642ec1
commit
e8b8d32e86
@ -504,9 +504,11 @@ struct llama_server_context
|
||||
});
|
||||
}
|
||||
|
||||
bool tg = true;
|
||||
while (n_past < embd.size())
|
||||
{
|
||||
int n_eval = (int)embd.size() - n_past;
|
||||
tg = n_eval == 1;
|
||||
if (n_eval > params.n_batch)
|
||||
{
|
||||
n_eval = params.n_batch;
|
||||
@ -633,7 +635,9 @@ struct llama_server_context
|
||||
|
||||
last_n_tokens.erase(last_n_tokens.begin());
|
||||
last_n_tokens.push_back(result.tok);
|
||||
num_tokens_predicted++;
|
||||
if (tg) {
|
||||
num_tokens_predicted++;
|
||||
}
|
||||
}
|
||||
|
||||
// add it to the context
|
||||
@ -1124,8 +1128,6 @@ static json format_timings(llama_server_context &llama)
|
||||
{
|
||||
const auto timings = llama_get_timings(llama.ctx);
|
||||
|
||||
assert(timings.n_eval == ptrdiff_t(llama.num_tokens_predicted));
|
||||
|
||||
return json{
|
||||
{"prompt_n", timings.n_p_eval},
|
||||
{"prompt_ms", timings.t_p_eval_ms},
|
||||
|
Loading…
Reference in New Issue
Block a user