diff --git a/modules/models.py b/modules/models.py index 98349705..cccf1d2d 100644 --- a/modules/models.py +++ b/modules/models.py @@ -145,17 +145,18 @@ def huggingface_loader(model_name): else: LoaderClass = AutoModelForCausalLM - # Load the model in simple 16-bit mode by default + # Load the model without any special settings if not any([shared.args.cpu, shared.args.load_in_8bit, shared.args.load_in_4bit, shared.args.auto_devices, shared.args.disk, shared.args.deepspeed, shared.args.gpu_memory is not None, shared.args.cpu_memory is not None, shared.args.compress_pos_emb > 1, shared.args.alpha_value > 1, shared.args.disable_exllama, shared.args.disable_exllamav2]): model = LoaderClass.from_pretrained(path_to_model, **params) - if torch.backends.mps.is_available(): - device = torch.device('mps') - model = model.to(device) - elif is_xpu_available(): - device = torch.device("xpu") - model = model.to(device) - else: - model = model.cuda() + if not (hasattr(model, 'is_loaded_in_4bit') and model.is_loaded_in_4bit): + if torch.backends.mps.is_available(): + device = torch.device('mps') + model = model.to(device) + elif is_xpu_available(): + device = torch.device("xpu") + model = model.to(device) + else: + model = model.cuda() # DeepSpeed ZeRO-3 elif shared.args.deepspeed: