server : clear the KV cache beyond n_past before llama_decode

This commit is contained in:
Georgi Gerganov 2023-09-28 18:12:39 +03:00
parent 2b8830af71
commit ce2d995af2
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735

View File

@ -434,6 +434,10 @@ struct llama_server_context
{ {
n_eval = params.n_batch; n_eval = params.n_batch;
} }
// since #3228 we now have to manually manage the KV cache
llama_kv_cache_tokens_rm(ctx, n_past, -1);
if (llama_decode(ctx, llama_batch_get_one(&embd[n_past], n_eval, n_past, 0), params.n_threads)) if (llama_decode(ctx, llama_batch_get_one(&embd[n_past], n_eval, n_past, 0), params.n_threads))
{ {
LOG_ERROR("failed to eval", { LOG_ERROR("failed to eval", {