mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-24 10:29:21 +01:00
llama : do not print "offloading layers" message in CPU-only builds (#5416)
This commit is contained in:
parent
6e99f2a04f
commit
41f308f58e
10
llama.cpp
10
llama.cpp
@ -4209,8 +4209,7 @@ static bool llm_load_tensors(
|
|||||||
ctx_bufs.emplace_back(ctx, buf);
|
ctx_bufs.emplace_back(ctx, buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
// print memory requirements
|
if (llama_supports_gpu_offload()) {
|
||||||
{
|
|
||||||
const int n_gpu = std::min(n_gpu_layers, int(hparams.n_layer));
|
const int n_gpu = std::min(n_gpu_layers, int(hparams.n_layer));
|
||||||
|
|
||||||
LLAMA_LOG_INFO("%s: offloading %d repeating layers to GPU\n", __func__, n_gpu);
|
LLAMA_LOG_INFO("%s: offloading %d repeating layers to GPU\n", __func__, n_gpu);
|
||||||
@ -4222,10 +4221,11 @@ static bool llm_load_tensors(
|
|||||||
const int max_offloadable_layers = hparams.n_layer + 1;
|
const int max_offloadable_layers = hparams.n_layer + 1;
|
||||||
|
|
||||||
LLAMA_LOG_INFO("%s: offloaded %d/%d layers to GPU\n", __func__, std::min(n_gpu_layers, max_offloadable_layers), max_backend_supported_layers);
|
LLAMA_LOG_INFO("%s: offloaded %d/%d layers to GPU\n", __func__, std::min(n_gpu_layers, max_offloadable_layers), max_backend_supported_layers);
|
||||||
|
}
|
||||||
|
|
||||||
for (ggml_backend_buffer_t buf : model.bufs) {
|
// print memory requirements
|
||||||
LLAMA_LOG_INFO("%s: %10s buffer size = %8.2f MiB\n", __func__, ggml_backend_buffer_name(buf), ggml_backend_buffer_get_size(buf) / 1024.0 / 1024.0);
|
for (ggml_backend_buffer_t buf : model.bufs) {
|
||||||
}
|
LLAMA_LOG_INFO("%s: %10s buffer size = %8.2f MiB\n", __func__, ggml_backend_buffer_name(buf), ggml_backend_buffer_get_size(buf) / 1024.0 / 1024.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// populate tensors_by_name
|
// populate tensors_by_name
|
||||||
|
Loading…
Reference in New Issue
Block a user