mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-11-23 00:18:20 +01:00
Load llamacpp before quantized model (#1307)
This commit is contained in:
parent
3961f49524
commit
07de7d0426
@ -99,6 +99,16 @@ def load_model(model_name):
|
|||||||
|
|
||||||
return model, tokenizer
|
return model, tokenizer
|
||||||
|
|
||||||
|
# llamacpp model
|
||||||
|
elif shared.is_llamacpp:
|
||||||
|
from modules.llamacpp_model_alternative import LlamaCppModel
|
||||||
|
|
||||||
|
model_file = list(Path(f'{shared.args.model_dir}/{model_name}').glob('ggml*.bin'))[0]
|
||||||
|
print(f"llama.cpp weights detected: {model_file}\n")
|
||||||
|
|
||||||
|
model, tokenizer = LlamaCppModel.from_pretrained(model_file)
|
||||||
|
return model, tokenizer
|
||||||
|
|
||||||
# Quantized model
|
# Quantized model
|
||||||
elif shared.args.wbits > 0:
|
elif shared.args.wbits > 0:
|
||||||
|
|
||||||
@ -116,16 +126,6 @@ def load_model(model_name):
|
|||||||
|
|
||||||
model = load_quantized(model_name)
|
model = load_quantized(model_name)
|
||||||
|
|
||||||
# llamacpp model
|
|
||||||
elif shared.is_llamacpp:
|
|
||||||
from modules.llamacpp_model_alternative import LlamaCppModel
|
|
||||||
|
|
||||||
model_file = list(Path(f'{shared.args.model_dir}/{model_name}').glob('ggml*.bin'))[0]
|
|
||||||
print(f"llama.cpp weights detected: {model_file}\n")
|
|
||||||
|
|
||||||
model, tokenizer = LlamaCppModel.from_pretrained(model_file)
|
|
||||||
return model, tokenizer
|
|
||||||
|
|
||||||
# Custom
|
# Custom
|
||||||
else:
|
else:
|
||||||
params = {"low_cpu_mem_usage": True}
|
params = {"low_cpu_mem_usage": True}
|
||||||
|
Loading…
Reference in New Issue
Block a user