From 39799adc4739c769e057ce253d31dbd08b0695c6 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 21 Jan 2025 11:49:44 -0800 Subject: [PATCH] Add a helpful error message when llama.cpp fails to load the model --- modules/llamacpp_hf.py | 14 +++++++++++++- modules/llamacpp_model.py | 14 +++++++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/modules/llamacpp_hf.py b/modules/llamacpp_hf.py index f9964fe8..b3761e0f 100644 --- a/modules/llamacpp_hf.py +++ b/modules/llamacpp_hf.py @@ -202,7 +202,19 @@ class LlamacppHF(PreTrainedModel): params["type_v"] = get_llamacpp_cache_type_for_string(shared.args.cache_type) Llama = llama_cpp_lib().Llama - model = Llama(**params) + try: + model = Llama(**params) + except Exception as e: + error_message = ( + f"Failed loading the model. **This usually happens due to lack of memory**. Try these steps:\n" + f"1. Reduce the context length `n_ctx` (currently {shared.args.n_ctx})." + f"{' Try a lower value like 4096.' if shared.args.n_ctx > 4096 else '.'}" + "\n" + f"2. Lower the `n-gpu-layers` value (currently {shared.args.n_gpu_layers})." + ) + + raise type(e)(error_message) from e + model.last_updated_index = -1 return LlamacppHF(model, model_file) diff --git a/modules/llamacpp_model.py b/modules/llamacpp_model.py index c79755e4..db25c66c 100644 --- a/modules/llamacpp_model.py +++ b/modules/llamacpp_model.py @@ -108,7 +108,19 @@ class LlamaCppModel: params["type_k"] = get_llamacpp_cache_type_for_string(shared.args.cache_type) params["type_v"] = get_llamacpp_cache_type_for_string(shared.args.cache_type) - result.model = Llama(**params) + try: + result.model = Llama(**params) + except Exception as e: + error_message = ( + f"Failed loading the model. **This usually happens due to lack of memory**. Try these steps:\n" + f"1. Reduce the context length `n_ctx` (currently {shared.args.n_ctx})." + f"{' Try a lower value like 4096.' if shared.args.n_ctx > 4096 else '.'}" + "\n" + f"2. Lower the `n-gpu-layers` value (currently {shared.args.n_gpu_layers})." + ) + + raise type(e)(error_message) from e + if cache_capacity > 0: result.model.set_cache(LlamaCache(capacity_bytes=cache_capacity))