diff --git a/modules/LoRA.py b/modules/LoRA.py index 0eb56b56..2eade078 100644 --- a/modules/LoRA.py +++ b/modules/LoRA.py @@ -9,9 +9,9 @@ from modules.models import reload_model def add_lora_to_model(lora_names): - if 'GPTQForCausalLM' in shared.model.__class__.__name__: + if 'GPTQForCausalLM' in shared.model.__class__.__name__ or shared.args.loader == 'AutoGPTQ': add_lora_autogptq(lora_names) - elif shared.model.__class__.__name__ in ['ExllamaModel', 'ExllamaHF']: + elif shared.model.__class__.__name__ in ['ExllamaModel', 'ExllamaHF'] or shared.args.loader == 'ExLlama': add_lora_exllama(lora_names) else: add_lora_transformers(lora_names) @@ -66,15 +66,16 @@ def add_lora_autogptq(lora_names): logger.error("This version of AutoGPTQ does not support LoRA. You need to install from source or wait for a new release.") return - if len(lora_names) == 0: - if len(shared.lora_names) > 0: - reload_model() + if len(lora_names) == 0: + reload_model() shared.lora_names = [] return else: if len(lora_names) > 1: logger.warning('AutoGPTQ can only work with 1 LoRA at the moment. Only the first one in the list will be loaded.') + if not shared.args.no_inject_fused_attention: + logger.warning('Fused Atttention + AutoGPTQ may break Lora loading. Disable it.') peft_config = GPTQLoraConfig( inference_mode=True,