Load llamacpp before quantized model (#1307)

This commit is contained in:
catalpaaa 2023-04-17 06:47:26 -07:00 committed by GitHub
parent 3961f49524
commit 07de7d0426
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -99,6 +99,16 @@ def load_model(model_name):
return model, tokenizer return model, tokenizer
# llamacpp model
elif shared.is_llamacpp:
from modules.llamacpp_model_alternative import LlamaCppModel
model_file = list(Path(f'{shared.args.model_dir}/{model_name}').glob('ggml*.bin'))[0]
print(f"llama.cpp weights detected: {model_file}\n")
model, tokenizer = LlamaCppModel.from_pretrained(model_file)
return model, tokenizer
# Quantized model # Quantized model
elif shared.args.wbits > 0: elif shared.args.wbits > 0:
@ -116,16 +126,6 @@ def load_model(model_name):
model = load_quantized(model_name) model = load_quantized(model_name)
# llamacpp model
elif shared.is_llamacpp:
from modules.llamacpp_model_alternative import LlamaCppModel
model_file = list(Path(f'{shared.args.model_dir}/{model_name}').glob('ggml*.bin'))[0]
print(f"llama.cpp weights detected: {model_file}\n")
model, tokenizer = LlamaCppModel.from_pretrained(model_file)
return model, tokenizer
# Custom # Custom
else: else:
params = {"low_cpu_mem_usage": True} params = {"low_cpu_mem_usage": True}