llama : handle KV shift for recurrent models (#10402)

ggml-ci
2024-12-27 06:39:25 +01:00 · 2024-11-21 10:22:47 +02:00 · 2024-11-21 10:22:47 +02:00 · 1bb30bf28c
commit 1bb30bf28c
parent 87a533be57
1 changed files with 5 additions and 5 deletions
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -18211,13 +18211,13 @@ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
 static void llama_kv_cache_update_internal(struct llama_context & lctx) {
    bool need_reserve = false;
-    // apply K-shift if needed
+    if (lctx.kv_self.has_shift) {
    if (lctx.model.hparams.rope_type != LLAMA_ROPE_TYPE_NONE && lctx.kv_self.has_shift) {
        if (!llama_kv_cache_can_shift(&lctx)) {
-            GGML_ABORT("Deepseek2 does not support K-shift");
+            GGML_ABORT("The current context does not support K-shift");
        }
-        {
+        // apply K-shift if needed
        if (lctx.model.hparams.rope_type != LLAMA_ROPE_TYPE_NONE) {
            ggml_backend_sched_reset(lctx.sched.get());
            ggml_cgraph * gf = llama_build_graph_k_shift(lctx);
@ -20463,7 +20463,7 @@ void llama_kv_cache_update(struct llama_context * ctx) {
 }
 bool llama_kv_cache_can_shift(struct llama_context * ctx) {
-    return ctx->model.arch != LLM_ARCH_DEEPSEEK2; // not supported due to MLA
+    return !ctx->kv_self.recurrent && ctx->model.arch != LLM_ARCH_DEEPSEEK2; // not supported due to MLA
 }
 // deprecated