Add a helpful error message when llama.cpp fails to load the model

This commit is contained in:
oobabooga 2025-01-21 11:49:44 -08:00
parent 079ace63ec
commit 39799adc47
2 changed files with 26 additions and 2 deletions

View File

@ -202,7 +202,19 @@ class LlamacppHF(PreTrainedModel):
params["type_v"] = get_llamacpp_cache_type_for_string(shared.args.cache_type)
Llama = llama_cpp_lib().Llama
model = Llama(**params)
try:
model = Llama(**params)
except Exception as e:
error_message = (
f"Failed loading the model. **This usually happens due to lack of memory**. Try these steps:\n"
f"1. Reduce the context length `n_ctx` (currently {shared.args.n_ctx})."
f"{' Try a lower value like 4096.' if shared.args.n_ctx > 4096 else '.'}"
"\n"
f"2. Lower the `n-gpu-layers` value (currently {shared.args.n_gpu_layers})."
)
raise type(e)(error_message) from e
model.last_updated_index = -1
return LlamacppHF(model, model_file)

View File

@ -108,7 +108,19 @@ class LlamaCppModel:
params["type_k"] = get_llamacpp_cache_type_for_string(shared.args.cache_type)
params["type_v"] = get_llamacpp_cache_type_for_string(shared.args.cache_type)
result.model = Llama(**params)
try:
result.model = Llama(**params)
except Exception as e:
error_message = (
f"Failed loading the model. **This usually happens due to lack of memory**. Try these steps:\n"
f"1. Reduce the context length `n_ctx` (currently {shared.args.n_ctx})."
f"{' Try a lower value like 4096.' if shared.args.n_ctx > 4096 else '.'}"
"\n"
f"2. Lower the `n-gpu-layers` value (currently {shared.args.n_gpu_layers})."
)
raise type(e)(error_message) from e
if cache_capacity > 0:
result.model.set_cache(LlamaCache(capacity_bytes=cache_capacity))