server : fix n_predict check (#4798)

This commit is contained in:
Georgi Gerganov 2024-01-07 08:45:26 +02:00 committed by GitHub
parent c75ca5d96f
commit 67984921a7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -447,8 +447,14 @@ struct llama_client_slot
} }
bool has_budget(gpt_params &global_params) { bool has_budget(gpt_params &global_params) {
if (params.n_predict == -1 && global_params.n_predict == -1)
{
return true; // limitless
}
n_remaining = -1; n_remaining = -1;
if(params.n_predict != -1)
if (params.n_predict != -1)
{ {
n_remaining = params.n_predict - n_decoded; n_remaining = params.n_predict - n_decoded;
} }
@ -456,7 +462,8 @@ struct llama_client_slot
{ {
n_remaining = global_params.n_predict - n_decoded; n_remaining = global_params.n_predict - n_decoded;
} }
return n_remaining > 0 || n_remaining == -1; // no budget || limitless
return n_remaining > 0; // no budget
} }
bool available() const { bool available() const {
@ -1102,7 +1109,7 @@ struct llama_server_context
} }
// check the limits // check the limits
if (slot.n_decoded > 2 && slot.has_next_token && !slot.has_budget(params)) if (slot.n_decoded > 0 && slot.has_next_token && !slot.has_budget(params))
{ {
slot.stopped_limit = true; slot.stopped_limit = true;
slot.has_next_token = false; slot.has_next_token = false;
@ -1703,7 +1710,6 @@ struct llama_server_context
llama_batch_add(batch, slot.sampled, system_tokens.size() + slot.n_past, { slot.id }, true); llama_batch_add(batch, slot.sampled, system_tokens.size() + slot.n_past, { slot.id }, true);
slot.n_decoded += 1;
slot.n_past += 1; slot.n_past += 1;
} }
@ -1921,6 +1927,7 @@ struct llama_server_context
llama_sampling_accept(slot.ctx_sampling, ctx, id, true); llama_sampling_accept(slot.ctx_sampling, ctx, id, true);
slot.n_decoded += 1;
if (slot.n_decoded == 1) if (slot.n_decoded == 1)
{ {
slot.t_start_genereration = ggml_time_us(); slot.t_start_genereration = ggml_time_us();