Add a helpful error message when llama.cpp fails to load the model

2025-01-27 20:43:19 +01:00 · 2025-01-21 11:49:44 -08:00 · 2025-01-21 11:49:44 -08:00 · 39799adc47
commit 39799adc47
parent 079ace63ec
2 changed files with 26 additions and 2 deletions
--- a/modules/llamacpp_hf.py
+++ b/modules/llamacpp_hf.py
@ -202,7 +202,19 @@ class LlamacppHF(PreTrainedModel):
            params["type_v"] = get_llamacpp_cache_type_for_string(shared.args.cache_type)
        Llama = llama_cpp_lib().Llama
-        model = Llama(**params)
+        try:
            model = Llama(**params)
        except Exception as e:
            error_message = (
                f"Failed loading the model. **This usually happens due to lack of memory**. Try these steps:\n"
                f"1. Reduce the context length `n_ctx` (currently {shared.args.n_ctx})."
                f"{' Try a lower value like 4096.' if shared.args.n_ctx > 4096 else '.'}"
                "\n"
                f"2. Lower the `n-gpu-layers` value (currently {shared.args.n_gpu_layers})."
            )
            raise type(e)(error_message) from e
        model.last_updated_index = -1
        return LlamacppHF(model, model_file)
--- a/modules/llamacpp_model.py
+++ b/modules/llamacpp_model.py
@ -108,7 +108,19 @@ class LlamaCppModel:
            params["type_k"] = get_llamacpp_cache_type_for_string(shared.args.cache_type)
            params["type_v"] = get_llamacpp_cache_type_for_string(shared.args.cache_type)
-        result.model = Llama(**params)
+        try:
            result.model = Llama(**params)
        except Exception as e:
            error_message = (
                f"Failed loading the model. **This usually happens due to lack of memory**. Try these steps:\n"
                f"1. Reduce the context length `n_ctx` (currently {shared.args.n_ctx})."
                f"{' Try a lower value like 4096.' if shared.args.n_ctx > 4096 else '.'}"
                "\n"
                f"2. Lower the `n-gpu-layers` value (currently {shared.args.n_gpu_layers})."
            )
            raise type(e)(error_message) from e
        if cache_capacity > 0:
            result.model.set_cache(LlamaCache(capacity_bytes=cache_capacity))