From 8a8f8b953f6d21c2be62fb0e8f8c509d58b8c6ca Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 29 May 2024 21:44:55 +0300 Subject: [PATCH] llama : print a log of the total cache size --- llama.cpp | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/llama.cpp b/llama.cpp index a94d37a31..40d2ec2c9 100644 --- a/llama.cpp +++ b/llama.cpp @@ -4842,21 +4842,28 @@ static void llm_load_vocab( } ); - LLAMA_LOG_INFO("%s: special tokens cache size = %u.\n", __func__, (uint32_t)vocab.cache_special_tokens.size()); + LLAMA_LOG_INFO("%s: special tokens cache size = %u\n", __func__, (uint32_t)vocab.cache_special_tokens.size()); } // build token to piece caches { - std::vector cache_token_to_piece (n_vocab); - std::vector cache_token_to_piece_special(n_vocab); + size_t size_cache = 0; - for (uint32_t id = 0; id < n_vocab; ++id) { - cache_token_to_piece[id] = llama_token_to_piece(&model, id, false); - cache_token_to_piece_special[id] = llama_token_to_piece(&model, id, true); - } + std::vector cache_token_to_piece (n_vocab); + std::vector cache_token_to_piece_special(n_vocab); - std::swap(vocab.cache_token_to_piece, cache_token_to_piece); - std::swap(vocab.cache_token_to_piece_special, cache_token_to_piece_special); + for (uint32_t id = 0; id < n_vocab; ++id) { + cache_token_to_piece[id] = llama_token_to_piece(&model, id, false); + cache_token_to_piece_special[id] = llama_token_to_piece(&model, id, true); + + size_cache += cache_token_to_piece[id].size(); + size_cache += cache_token_to_piece_special[id].size(); + } + + std::swap(vocab.cache_token_to_piece, cache_token_to_piece); + std::swap(vocab.cache_token_to_piece_special, cache_token_to_piece_special); + + LLAMA_LOG_INFO("%s: token to piece cache size = %.4f MB\n", __func__, size_cache / 1024.0 / 1024.0); } }