Add ExLlama+LoRA support (#2756)

2024-12-04 13:00:30 +01:00 · 2023-06-19 12:31:24 -03:00 · 2023-06-19 12:31:24 -03:00 · eb30f4441f
commit eb30f4441f
parent a1cac88c19
3 changed files with 119 additions and 73 deletions
--- a/modules/LoRA.py
+++ b/modules/LoRA.py
@ -7,49 +7,79 @@ import modules.shared as shared
 from modules.logging_colors import logger
 from modules.models import reload_model
 def add_lora_to_model(lora_names):
    if 'GPTQForCausalLM' in shared.model.__class__.__name__:
        add_lora_autogptq(lora_names)
    elif shared.model.__class__.__name__ == 'ExllamaModel':
        add_lora_exllama(lora_names)
    else:
        add_lora_transformers(lora_names)
 def add_lora_exllama(lora_names):
    try:
        from repositories.exllama.lora import ExLlamaLora
    except:
        logger.error("Could not find the file repositories/exllama/lora.py. Make sure that exllama is cloned inside repositories/ and is up to date.")
        return
    if len(lora_names) == 0:
        shared.model.generator.lora = None
        shared.lora_names = []
        return
    else:
        if len(lora_names) > 1:
            logger.warning('ExLlama can only work with 1 LoRA at the moment. Only the first one in the list will be loaded.')
        lora_path = Path(f"{shared.args.lora_dir}/{lora_names[0]}")
        lora_config_path = lora_path / "adapter_config.json"
        lora_adapter_path = lora_path / "adapter_model.bin"
        logger.info("Applying the following LoRAs to {}: {}".format(shared.model_name, ', '.join([lora_names[0]])))
        lora = ExLlamaLora(shared.model.model, str(lora_config_path), str(lora_adapter_path))
        shared.model.generator.lora = lora
        shared.lora_names = [lora_names[0]]
        return
 # Adapted from https://github.com/Ph0rk0z/text-generation-webui-testing
 def add_lora_autogptq(lora_names):
    try:
        from auto_gptq import get_gptq_peft_model
        from auto_gptq.utils.peft_utils import GPTQLoraConfig
    has_auto_gptq_peft = True
    except:
    has_auto_gptq_peft = False
 def add_lora_to_model(lora_names):
    prior_set = set(shared.lora_names)
    added_set = set(lora_names) - prior_set
    removed_set = prior_set - set(lora_names)
    shared.lora_names = list(lora_names)
    is_autogptq = 'GPTQForCausalLM' in shared.model.__class__.__name__
    # AutoGPTQ case. It doesn't use the peft functions.
    # Copied from https://github.com/Ph0rk0z/text-generation-webui-testing
    if is_autogptq:
        if not has_auto_gptq_peft:
        logger.error("This version of AutoGPTQ does not support LoRA. You need to install from source or wait for a new release.")
        return
-        if len(prior_set) > 0:
+    if len(lora_names) == 0:
        if len(shared.lora_names) > 0:
            reload_model()
-        if len(shared.lora_names) == 0:
+        shared.lora_names = []
        return
    else:
-            if len(shared.lora_names) > 1:
+        if len(lora_names) > 1:
-                logger.warning('AutoGPTQ can only work with 1 LoRA at the moment. Only the first one in the list will be loaded')
+            logger.warning('AutoGPTQ can only work with 1 LoRA at the moment. Only the first one in the list will be loaded.')
        peft_config = GPTQLoraConfig(
            inference_mode=True,
        )
-            lora_path = Path(f"{shared.args.lora_dir}/{shared.lora_names[0]}")
+        lora_path = Path(f"{shared.args.lora_dir}/{lora_names[0]}")
        logger.info("Applying the following LoRAs to {}: {}".format(shared.model_name, ', '.join([lora_names[0]])))
        shared.model = get_gptq_peft_model(shared.model, peft_config, lora_path)
        shared.lora_names = [lora_names[0]]
        return
-    # Transformers case
+
-    else:
+def add_lora_transformers(lora_names):
    prior_set = set(shared.lora_names)
    added_set = set(lora_names) - prior_set
    removed_set = prior_set - set(lora_names)
    # If no LoRA needs to be added or removed, exit
    if len(added_set) == 0 and len(removed_set) == 0:
        return
@ -68,7 +98,6 @@ def add_lora_to_model(lora_names):
        shared.model = shared.model.base_model.model
    if len(lora_names) > 0:
            logger.info("Applying the following LoRAs to {}: {}".format(shared.model_name, ', '.join(lora_names)))
        params = {}
        if not shared.args.cpu:
            params['dtype'] = shared.model.dtype
@ -77,10 +106,13 @@ def add_lora_to_model(lora_names):
            elif shared.args.load_in_8bit:
                params['device_map'] = {'': 0}
        logger.info("Applying the following LoRAs to {}: {}".format(shared.model_name, ', '.join(lora_names)))
        shared.model = PeftModel.from_pretrained(shared.model, Path(f"{shared.args.lora_dir}/{lora_names[0]}"), adapter_name=lora_names[0], **params)
        for lora in lora_names[1:]:
            shared.model.load_adapter(Path(f"{shared.args.lora_dir}/{lora}"), lora)
        shared.lora_names = lora_names
        if not shared.args.load_in_8bit and not shared.args.cpu:
            shared.model.half()
            if not hasattr(shared.model, "hf_device_map"):
--- a/modules/exllama.py
+++ b/modules/exllama.py
@ -3,11 +3,12 @@ from pathlib import Path
 from modules import shared
 from modules.logging_colors import logger
 from modules.relative_imports import RelativeImport
-sys.path.insert(0, str(Path("repositories/exllama")))
+with RelativeImport("repositories/exllama"):
-from repositories.exllama.generator import ExLlamaGenerator
+    from generator import ExLlamaGenerator
-from repositories.exllama.model import ExLlama, ExLlamaCache, ExLlamaConfig
+    from model import ExLlama, ExLlamaCache, ExLlamaConfig
-from repositories.exllama.tokenizer import ExLlamaTokenizer
+    from tokenizer import ExLlamaTokenizer
 class ExllamaModel:
--- a/modules/relative_imports.py
+++ b/modules/relative_imports.py
@ -0,0 +1,13 @@
 import sys
 from pathlib import Path
 class RelativeImport:
    def __init__(self, path):
        self.import_path = Path(path)
    def __enter__(self):
        sys.path.insert(0, str(self.import_path))
    def __exit__(self, exc_type, exc_value, traceback):
        sys.path.remove(str(self.import_path))