mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-26 14:20:31 +01:00
parent
8a56075b07
commit
eedd42e376
@ -10702,7 +10702,7 @@ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
|
|||||||
// each move requires 6*n_layer tensors (see build_defrag)
|
// each move requires 6*n_layer tensors (see build_defrag)
|
||||||
// - source view, destination view, copy operation
|
// - source view, destination view, copy operation
|
||||||
// - x2 for keys and values
|
// - x2 for keys and values
|
||||||
const uint32_t max_moves = LLAMA_MAX_NODES/(6*n_layer);
|
const uint32_t max_moves = (LLAMA_MAX_NODES - 2*n_layer)/(6*n_layer);
|
||||||
|
|
||||||
// determine which KV cells to move where
|
// determine which KV cells to move where
|
||||||
//
|
//
|
||||||
|
Loading…
Reference in New Issue
Block a user