server : fix draft context not being released (#11354)

This commit is contained in:
Diego Devesa 2025-01-22 17:44:40 +01:00 committed by GitHub
parent c64d2becb1
commit 12c2bdf2de
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1772,6 +1772,9 @@ struct server_context {
// force F16 KV cache for the draft model for extra performance // force F16 KV cache for the draft model for extra performance
cparams_dft.type_k = GGML_TYPE_F16; cparams_dft.type_k = GGML_TYPE_F16;
cparams_dft.type_v = GGML_TYPE_F16; cparams_dft.type_v = GGML_TYPE_F16;
// the context is not needed - we will create one for each slot
llama_init_dft.context.reset();
} }
chat_templates = common_chat_templates_from_model(model, params_base.chat_template); chat_templates = common_chat_templates_from_model(model, params_base.chat_template);