server : fix speculative decoding with context shift

ggml-ci
This commit is contained in:
Georgi Gerganov 2024-12-03 22:44:19 +02:00
parent cc98896db8
commit a5a915b51e
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735

View File

@ -2325,7 +2325,7 @@ struct server_context {
llama_token id = slot.sampled; llama_token id = slot.sampled;
struct common_speculative_params params_spec; struct common_speculative_params params_spec;
params_spec.n_draft = slot.params.speculative.n_max; params_spec.n_draft = std::min(slot.params.speculative.n_max, slot.n_ctx - slot.n_past - 1);
params_spec.n_reuse = llama_n_ctx(slot.ctx_dft) - slot.params.speculative.n_max; params_spec.n_reuse = llama_n_ctx(slot.ctx_dft) - slot.params.speculative.n_max;
params_spec.p_min = slot.params.speculative.p_min; params_spec.p_min = slot.params.speculative.p_min;