mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-26 14:20:31 +01:00
llama : print a log of the total cache size
This commit is contained in:
parent
1494a1841e
commit
8a8f8b953f
25
llama.cpp
25
llama.cpp
@ -4842,21 +4842,28 @@ static void llm_load_vocab(
|
|||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
LLAMA_LOG_INFO("%s: special tokens cache size = %u.\n", __func__, (uint32_t)vocab.cache_special_tokens.size());
|
LLAMA_LOG_INFO("%s: special tokens cache size = %u\n", __func__, (uint32_t)vocab.cache_special_tokens.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
// build token to piece caches
|
// build token to piece caches
|
||||||
{
|
{
|
||||||
std::vector<llama_vocab::token> cache_token_to_piece (n_vocab);
|
size_t size_cache = 0;
|
||||||
std::vector<llama_vocab::token> cache_token_to_piece_special(n_vocab);
|
|
||||||
|
|
||||||
for (uint32_t id = 0; id < n_vocab; ++id) {
|
std::vector<llama_vocab::token> cache_token_to_piece (n_vocab);
|
||||||
cache_token_to_piece[id] = llama_token_to_piece(&model, id, false);
|
std::vector<llama_vocab::token> cache_token_to_piece_special(n_vocab);
|
||||||
cache_token_to_piece_special[id] = llama_token_to_piece(&model, id, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::swap(vocab.cache_token_to_piece, cache_token_to_piece);
|
for (uint32_t id = 0; id < n_vocab; ++id) {
|
||||||
std::swap(vocab.cache_token_to_piece_special, cache_token_to_piece_special);
|
cache_token_to_piece[id] = llama_token_to_piece(&model, id, false);
|
||||||
|
cache_token_to_piece_special[id] = llama_token_to_piece(&model, id, true);
|
||||||
|
|
||||||
|
size_cache += cache_token_to_piece[id].size();
|
||||||
|
size_cache += cache_token_to_piece_special[id].size();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::swap(vocab.cache_token_to_piece, cache_token_to_piece);
|
||||||
|
std::swap(vocab.cache_token_to_piece_special, cache_token_to_piece_special);
|
||||||
|
|
||||||
|
LLAMA_LOG_INFO("%s: token to piece cache size = %.4f MB\n", __func__, size_cache / 1024.0 / 1024.0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user