mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-26 14:20:31 +01:00
parent
8a56075b07
commit
eedd42e376
@ -10702,7 +10702,7 @@ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
|
||||
// each move requires 6*n_layer tensors (see build_defrag)
|
||||
// - source view, destination view, copy operation
|
||||
// - x2 for keys and values
|
||||
const uint32_t max_moves = LLAMA_MAX_NODES/(6*n_layer);
|
||||
const uint32_t max_moves = (LLAMA_MAX_NODES - 2*n_layer)/(6*n_layer);
|
||||
|
||||
// determine which KV cells to move where
|
||||
//
|
||||
|
Loading…
Reference in New Issue
Block a user