mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-11-29 02:49:30 +01:00
Lora fixes for AutoGPTQ (#2818)
This commit is contained in:
parent
70b088843d
commit
74ea7522a0
@ -9,9 +9,9 @@ from modules.models import reload_model
|
|||||||
|
|
||||||
|
|
||||||
def add_lora_to_model(lora_names):
|
def add_lora_to_model(lora_names):
|
||||||
if 'GPTQForCausalLM' in shared.model.__class__.__name__:
|
if 'GPTQForCausalLM' in shared.model.__class__.__name__ or shared.args.loader == 'AutoGPTQ':
|
||||||
add_lora_autogptq(lora_names)
|
add_lora_autogptq(lora_names)
|
||||||
elif shared.model.__class__.__name__ in ['ExllamaModel', 'ExllamaHF']:
|
elif shared.model.__class__.__name__ in ['ExllamaModel', 'ExllamaHF'] or shared.args.loader == 'ExLlama':
|
||||||
add_lora_exllama(lora_names)
|
add_lora_exllama(lora_names)
|
||||||
else:
|
else:
|
||||||
add_lora_transformers(lora_names)
|
add_lora_transformers(lora_names)
|
||||||
@ -67,7 +67,6 @@ def add_lora_autogptq(lora_names):
|
|||||||
return
|
return
|
||||||
|
|
||||||
if len(lora_names) == 0:
|
if len(lora_names) == 0:
|
||||||
if len(shared.lora_names) > 0:
|
|
||||||
reload_model()
|
reload_model()
|
||||||
|
|
||||||
shared.lora_names = []
|
shared.lora_names = []
|
||||||
@ -75,6 +74,8 @@ def add_lora_autogptq(lora_names):
|
|||||||
else:
|
else:
|
||||||
if len(lora_names) > 1:
|
if len(lora_names) > 1:
|
||||||
logger.warning('AutoGPTQ can only work with 1 LoRA at the moment. Only the first one in the list will be loaded.')
|
logger.warning('AutoGPTQ can only work with 1 LoRA at the moment. Only the first one in the list will be loaded.')
|
||||||
|
if not shared.args.no_inject_fused_attention:
|
||||||
|
logger.warning('Fused Atttention + AutoGPTQ may break Lora loading. Disable it.')
|
||||||
|
|
||||||
peft_config = GPTQLoraConfig(
|
peft_config = GPTQLoraConfig(
|
||||||
inference_mode=True,
|
inference_mode=True,
|
||||||
|
Loading…
Reference in New Issue
Block a user