Lora fixes for AutoGPTQ (#2818)

2024-11-25 17:29:22 +01:00 · 2023-07-09 04:03:43 +00:00 · 2023-07-09 04:03:43 +00:00 · 74ea7522a0
commit 74ea7522a0
parent 70b088843d
1 changed files with 6 additions and 5 deletions
--- a/modules/LoRA.py
+++ b/modules/LoRA.py
@ -9,9 +9,9 @@ from modules.models import reload_model


 def add_lora_to_model(lora_names):
-    if 'GPTQForCausalLM' in shared.model.__class__.__name__:
+    if 'GPTQForCausalLM' in shared.model.__class__.__name__ or shared.args.loader == 'AutoGPTQ':
        add_lora_autogptq(lora_names)
-    elif shared.model.__class__.__name__ in ['ExllamaModel', 'ExllamaHF']:
+    elif shared.model.__class__.__name__ in ['ExllamaModel', 'ExllamaHF'] or shared.args.loader == 'ExLlama':
        add_lora_exllama(lora_names)
    else:
        add_lora_transformers(lora_names)
@ -67,7 +67,6 @@ def add_lora_autogptq(lora_names):
        return

    if len(lora_names) == 0:        
-        if len(shared.lora_names) > 0:
        reload_model()

        shared.lora_names = []
@ -75,6 +74,8 @@ def add_lora_autogptq(lora_names):
    else:
        if len(lora_names) > 1:
            logger.warning('AutoGPTQ can only work with 1 LoRA at the moment. Only the first one in the list will be loaded.')
+        if not shared.args.no_inject_fused_attention:
+            logger.warning('Fused Atttention + AutoGPTQ may break Lora loading. Disable it.')

        peft_config = GPTQLoraConfig(
            inference_mode=True,