Fix safetensors kwarg usage in AutoAWQ

This commit is contained in:
oobabooga 2023-10-10 19:03:09 -07:00
parent 39f16ff83d
commit f63361568c

View File

@ -277,14 +277,12 @@ def ctransformers_loader(model_name):
model, tokenizer = ctrans.from_pretrained(model_file) model, tokenizer = ctrans.from_pretrained(model_file)
return model, tokenizer return model, tokenizer
def AutoAWQ_loader(model_name): def AutoAWQ_loader(model_name):
from awq import AutoAWQForCausalLM from awq import AutoAWQForCausalLM
model_dir = Path(f'{shared.args.model_dir}/{model_name}') model_dir = Path(f'{shared.args.model_dir}/{model_name}')
if shared.args.deepspeed:
logger.warn("AutoAWQ is incompatible with deepspeed")
model = AutoAWQForCausalLM.from_quantized( model = AutoAWQForCausalLM.from_quantized(
quant_path=model_dir, quant_path=model_dir,
max_new_tokens=shared.args.max_seq_len, max_new_tokens=shared.args.max_seq_len,
@ -292,10 +290,12 @@ def AutoAWQ_loader(model_name):
fuse_layers=not shared.args.no_inject_fused_attention, fuse_layers=not shared.args.no_inject_fused_attention,
max_memory=get_max_memory_dict(), max_memory=get_max_memory_dict(),
batch_size=shared.args.n_batch, batch_size=shared.args.n_batch,
safetensors=not shared.args.trust_remote_code) safetensors=any(model_dir.glob('*.safetensors')),
)
return model return model
def GPTQ_loader(model_name): def GPTQ_loader(model_name):
# Monkey patch # Monkey patch