diff --git a/llama.cpp b/llama.cpp index 38e593625..0e036e99d 100644 --- a/llama.cpp +++ b/llama.cpp @@ -10702,7 +10702,7 @@ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) { // each move requires 6*n_layer tensors (see build_defrag) // - source view, destination view, copy operation // - x2 for keys and values - const uint32_t max_moves = LLAMA_MAX_NODES/(6*n_layer); + const uint32_t max_moves = (LLAMA_MAX_NODES - 2*n_layer)/(6*n_layer); // determine which KV cells to move where //