From 308452b7837f5310d1295dcd6feba5530de4fe50 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 4 Apr 2024 18:09:34 -0700 Subject: [PATCH] Bitsandbytes: load preconverted 4bit models without additional flags --- modules/models.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/modules/models.py b/modules/models.py index 98349705..cccf1d2d 100644 --- a/modules/models.py +++ b/modules/models.py @@ -145,17 +145,18 @@ def huggingface_loader(model_name): else: LoaderClass = AutoModelForCausalLM - # Load the model in simple 16-bit mode by default + # Load the model without any special settings if not any([shared.args.cpu, shared.args.load_in_8bit, shared.args.load_in_4bit, shared.args.auto_devices, shared.args.disk, shared.args.deepspeed, shared.args.gpu_memory is not None, shared.args.cpu_memory is not None, shared.args.compress_pos_emb > 1, shared.args.alpha_value > 1, shared.args.disable_exllama, shared.args.disable_exllamav2]): model = LoaderClass.from_pretrained(path_to_model, **params) - if torch.backends.mps.is_available(): - device = torch.device('mps') - model = model.to(device) - elif is_xpu_available(): - device = torch.device("xpu") - model = model.to(device) - else: - model = model.cuda() + if not (hasattr(model, 'is_loaded_in_4bit') and model.is_loaded_in_4bit): + if torch.backends.mps.is_available(): + device = torch.device('mps') + model = model.to(device) + elif is_xpu_available(): + device = torch.device("xpu") + model = model.to(device) + else: + model = model.cuda() # DeepSpeed ZeRO-3 elif shared.args.deepspeed: