mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-02-05 08:00:42 +01:00
server : fix draft context not being released (#11354)
This commit is contained in:
parent
c64d2becb1
commit
12c2bdf2de
@ -1772,6 +1772,9 @@ struct server_context {
|
|||||||
// force F16 KV cache for the draft model for extra performance
|
// force F16 KV cache for the draft model for extra performance
|
||||||
cparams_dft.type_k = GGML_TYPE_F16;
|
cparams_dft.type_k = GGML_TYPE_F16;
|
||||||
cparams_dft.type_v = GGML_TYPE_F16;
|
cparams_dft.type_v = GGML_TYPE_F16;
|
||||||
|
|
||||||
|
// the context is not needed - we will create one for each slot
|
||||||
|
llama_init_dft.context.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
chat_templates = common_chat_templates_from_model(model, params_base.chat_template);
|
chat_templates = common_chat_templates_from_model(model, params_base.chat_template);
|
||||||
|
Loading…
Reference in New Issue
Block a user