mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-27 06:39:25 +01:00
parent
4134999e01
commit
5ef07e25ac
@ -631,6 +631,7 @@ struct server_context {
|
|||||||
|
|
||||||
bool clean_kv_cache = true;
|
bool clean_kv_cache = true;
|
||||||
bool add_bos_token = true;
|
bool add_bos_token = true;
|
||||||
|
bool has_eos_token = false;
|
||||||
|
|
||||||
int32_t n_ctx; // total context for all clients / slots
|
int32_t n_ctx; // total context for all clients / slots
|
||||||
|
|
||||||
@ -693,7 +694,7 @@ struct server_context {
|
|||||||
n_ctx = llama_n_ctx(ctx);
|
n_ctx = llama_n_ctx(ctx);
|
||||||
|
|
||||||
add_bos_token = llama_should_add_bos_token(model);
|
add_bos_token = llama_should_add_bos_token(model);
|
||||||
GGML_ASSERT(llama_add_eos_token(model) != 1);
|
has_eos_token = llama_add_eos_token(model) != 1;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -1031,7 +1032,7 @@ struct server_context {
|
|||||||
{
|
{
|
||||||
slot.sparams.logit_bias.clear();
|
slot.sparams.logit_bias.clear();
|
||||||
|
|
||||||
if (json_value(data, "ignore_eos", false)) {
|
if (json_value(data, "ignore_eos", false) && has_eos_token) {
|
||||||
slot.sparams.logit_bias[llama_token_eos(model)] = -INFINITY;
|
slot.sparams.logit_bias[llama_token_eos(model)] = -INFINITY;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user