mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-10-30 14:40:16 +01:00
server : fix crash with multimodal models without BOS token (#4904)
This commit is contained in:
parent
15ebe59210
commit
ee8243adaa
@ -1835,7 +1835,7 @@ struct llama_server_context
|
||||
|
||||
slot.cache_tokens = prompt_tokens;
|
||||
|
||||
if (slot.n_past == slot.num_prompt_tokens)
|
||||
if (slot.n_past == slot.num_prompt_tokens && slot.n_past > 0)
|
||||
{
|
||||
// we have to evaluate at least 1 token to generate logits.
|
||||
LOG_TEE("slot %d : we have to evaluate at least 1 token to generate logits\n", slot.id);
|
||||
|
Loading…
Reference in New Issue
Block a user