mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2025-01-27 20:43:19 +01:00
Add a helpful error message when llama.cpp fails to load the model
This commit is contained in:
parent
079ace63ec
commit
39799adc47
@ -202,7 +202,19 @@ class LlamacppHF(PreTrainedModel):
|
|||||||
params["type_v"] = get_llamacpp_cache_type_for_string(shared.args.cache_type)
|
params["type_v"] = get_llamacpp_cache_type_for_string(shared.args.cache_type)
|
||||||
|
|
||||||
Llama = llama_cpp_lib().Llama
|
Llama = llama_cpp_lib().Llama
|
||||||
model = Llama(**params)
|
try:
|
||||||
|
model = Llama(**params)
|
||||||
|
except Exception as e:
|
||||||
|
error_message = (
|
||||||
|
f"Failed loading the model. **This usually happens due to lack of memory**. Try these steps:\n"
|
||||||
|
f"1. Reduce the context length `n_ctx` (currently {shared.args.n_ctx})."
|
||||||
|
f"{' Try a lower value like 4096.' if shared.args.n_ctx > 4096 else '.'}"
|
||||||
|
"\n"
|
||||||
|
f"2. Lower the `n-gpu-layers` value (currently {shared.args.n_gpu_layers})."
|
||||||
|
)
|
||||||
|
|
||||||
|
raise type(e)(error_message) from e
|
||||||
|
|
||||||
model.last_updated_index = -1
|
model.last_updated_index = -1
|
||||||
|
|
||||||
return LlamacppHF(model, model_file)
|
return LlamacppHF(model, model_file)
|
||||||
|
@ -108,7 +108,19 @@ class LlamaCppModel:
|
|||||||
params["type_k"] = get_llamacpp_cache_type_for_string(shared.args.cache_type)
|
params["type_k"] = get_llamacpp_cache_type_for_string(shared.args.cache_type)
|
||||||
params["type_v"] = get_llamacpp_cache_type_for_string(shared.args.cache_type)
|
params["type_v"] = get_llamacpp_cache_type_for_string(shared.args.cache_type)
|
||||||
|
|
||||||
result.model = Llama(**params)
|
try:
|
||||||
|
result.model = Llama(**params)
|
||||||
|
except Exception as e:
|
||||||
|
error_message = (
|
||||||
|
f"Failed loading the model. **This usually happens due to lack of memory**. Try these steps:\n"
|
||||||
|
f"1. Reduce the context length `n_ctx` (currently {shared.args.n_ctx})."
|
||||||
|
f"{' Try a lower value like 4096.' if shared.args.n_ctx > 4096 else '.'}"
|
||||||
|
"\n"
|
||||||
|
f"2. Lower the `n-gpu-layers` value (currently {shared.args.n_gpu_layers})."
|
||||||
|
)
|
||||||
|
|
||||||
|
raise type(e)(error_message) from e
|
||||||
|
|
||||||
if cache_capacity > 0:
|
if cache_capacity > 0:
|
||||||
result.model.set_cache(LlamaCache(capacity_bytes=cache_capacity))
|
result.model.set_cache(LlamaCache(capacity_bytes=cache_capacity))
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user