Lora fixes for AutoGPTQ (#2818)

This commit is contained in:
Forkoz 2023-07-09 04:03:43 +00:00 committed by GitHub
parent 70b088843d
commit 74ea7522a0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -9,9 +9,9 @@ from modules.models import reload_model
def add_lora_to_model(lora_names):
if 'GPTQForCausalLM' in shared.model.__class__.__name__:
if 'GPTQForCausalLM' in shared.model.__class__.__name__ or shared.args.loader == 'AutoGPTQ':
add_lora_autogptq(lora_names)
elif shared.model.__class__.__name__ in ['ExllamaModel', 'ExllamaHF']:
elif shared.model.__class__.__name__ in ['ExllamaModel', 'ExllamaHF'] or shared.args.loader == 'ExLlama':
add_lora_exllama(lora_names)
else:
add_lora_transformers(lora_names)
@ -67,14 +67,15 @@ def add_lora_autogptq(lora_names):
return
if len(lora_names) == 0:
if len(shared.lora_names) > 0:
reload_model()
reload_model()
shared.lora_names = []
return
else:
if len(lora_names) > 1:
logger.warning('AutoGPTQ can only work with 1 LoRA at the moment. Only the first one in the list will be loaded.')
if not shared.args.no_inject_fused_attention:
logger.warning('Fused Atttention + AutoGPTQ may break Lora loading. Disable it.')
peft_config = GPTQLoraConfig(
inference_mode=True,