mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-11-26 01:30:20 +01:00
initial multi-lora support (#1103)
--------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com>
This commit is contained in:
parent
ebb81eb176
commit
64e3b44e0f
@ -4,19 +4,31 @@ import torch
|
|||||||
from peft import PeftModel
|
from peft import PeftModel
|
||||||
|
|
||||||
import modules.shared as shared
|
import modules.shared as shared
|
||||||
from modules.models import reload_model
|
|
||||||
|
|
||||||
|
|
||||||
def add_lora_to_model(lora_name):
|
def add_lora_to_model(lora_names):
|
||||||
|
prior_set = set(shared.lora_names)
|
||||||
|
added_set = set(lora_names) - prior_set
|
||||||
|
removed_set = prior_set - set(lora_names)
|
||||||
|
shared.lora_names = list(lora_names)
|
||||||
|
|
||||||
# If a LoRA had been previously loaded, or if we want
|
# Nothing to do = skip.
|
||||||
# to unload a LoRA, reload the model
|
if len(added_set) == 0 and len(removed_set) == 0:
|
||||||
if shared.lora_name not in ['None', ''] or lora_name in ['None', '']:
|
return
|
||||||
reload_model()
|
|
||||||
shared.lora_name = lora_name
|
|
||||||
|
|
||||||
if lora_name not in ['None', '']:
|
# Only adding, and already peft? Do it the easy way.
|
||||||
print(f"Adding the LoRA {lora_name} to the model...")
|
if len(removed_set) == 0 and len(prior_set) > 0:
|
||||||
|
print(f"Adding the LoRA(s) named {added_set} to the model...")
|
||||||
|
for lora in added_set:
|
||||||
|
shared.model.load_adapter(Path(f"{shared.args.lora_dir}/{lora}"), lora)
|
||||||
|
return
|
||||||
|
|
||||||
|
# If removing anything, disable all and re-add.
|
||||||
|
if len(removed_set) > 0:
|
||||||
|
shared.model.disable_adapter()
|
||||||
|
|
||||||
|
if len(lora_names) > 0:
|
||||||
|
print("Applying the following LoRAs to {}: {}".format(shared.model_name, ', '.join(lora_names)))
|
||||||
params = {}
|
params = {}
|
||||||
if not shared.args.cpu:
|
if not shared.args.cpu:
|
||||||
params['dtype'] = shared.model.dtype
|
params['dtype'] = shared.model.dtype
|
||||||
@ -25,7 +37,11 @@ def add_lora_to_model(lora_name):
|
|||||||
elif shared.args.load_in_8bit:
|
elif shared.args.load_in_8bit:
|
||||||
params['device_map'] = {'': 0}
|
params['device_map'] = {'': 0}
|
||||||
|
|
||||||
shared.model = PeftModel.from_pretrained(shared.model, Path(f"{shared.args.lora_dir}/{lora_name}"), **params)
|
shared.model = PeftModel.from_pretrained(shared.model, Path(f"{shared.args.lora_dir}/{lora_names[0]}"), **params)
|
||||||
|
|
||||||
|
for lora in lora_names[1:]:
|
||||||
|
shared.model.load_adapter(Path(f"{shared.args.lora_dir}/{lora}"), lora)
|
||||||
|
|
||||||
if not shared.args.load_in_8bit and not shared.args.cpu:
|
if not shared.args.load_in_8bit and not shared.args.cpu:
|
||||||
shared.model.half()
|
shared.model.half()
|
||||||
if not hasattr(shared.model, "hf_device_map"):
|
if not hasattr(shared.model, "hf_device_map"):
|
||||||
|
@ -6,7 +6,7 @@ import yaml
|
|||||||
model = None
|
model = None
|
||||||
tokenizer = None
|
tokenizer = None
|
||||||
model_name = "None"
|
model_name = "None"
|
||||||
lora_name = "None"
|
lora_names = []
|
||||||
soft_prompt_tensor = None
|
soft_prompt_tensor = None
|
||||||
soft_prompt = False
|
soft_prompt = False
|
||||||
is_RWKV = False
|
is_RWKV = False
|
||||||
@ -68,7 +68,7 @@ settings = {
|
|||||||
},
|
},
|
||||||
'lora_prompts': {
|
'lora_prompts': {
|
||||||
'default': 'QA',
|
'default': 'QA',
|
||||||
'.*(alpaca-lora-7b|alpaca-lora-13b|alpaca-lora-30b)': "Alpaca",
|
'.*alpaca': "Alpaca",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,13 +5,13 @@ gradio==3.24.1
|
|||||||
markdown
|
markdown
|
||||||
numpy
|
numpy
|
||||||
Pillow>=9.5.0
|
Pillow>=9.5.0
|
||||||
peft==0.2.0
|
|
||||||
requests
|
requests
|
||||||
rwkv==0.7.3
|
rwkv==0.7.3
|
||||||
safetensors==0.3.0
|
safetensors==0.3.0
|
||||||
sentencepiece
|
sentencepiece
|
||||||
pyyaml
|
pyyaml
|
||||||
tqdm
|
tqdm
|
||||||
|
git+https://github.com/huggingface/peft
|
||||||
transformers==4.28.0
|
transformers==4.28.0
|
||||||
bitsandbytes==0.38.1; platform_system != "Windows"
|
bitsandbytes==0.38.1; platform_system != "Windows"
|
||||||
llama-cpp-python==0.1.33; platform_system != "Windows"
|
llama-cpp-python==0.1.33; platform_system != "Windows"
|
||||||
|
25
server.py
25
server.py
@ -88,9 +88,10 @@ def load_model_wrapper(selected_model):
|
|||||||
yield traceback.format_exc()
|
yield traceback.format_exc()
|
||||||
|
|
||||||
|
|
||||||
def load_lora_wrapper(selected_lora):
|
def load_lora_wrapper(selected_loras):
|
||||||
add_lora_to_model(selected_lora)
|
yield ("Applying the following LoRAs to {}:\n\n{}".format(shared.model_name, '\n'.join(selected_loras)))
|
||||||
return selected_lora
|
add_lora_to_model(selected_loras)
|
||||||
|
yield ("Successfuly applied the LoRAs")
|
||||||
|
|
||||||
|
|
||||||
def load_preset_values(preset_menu, state, return_dict=False):
|
def load_preset_values(preset_menu, state, return_dict=False):
|
||||||
@ -275,12 +276,14 @@ def create_model_menus():
|
|||||||
|
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
shared.gradio['lora_menu'] = gr.Dropdown(choices=get_available_loras(), value=shared.lora_name, label='LoRA')
|
shared.gradio['lora_menu'] = gr.Dropdown(multiselect=True, choices=get_available_loras(), value=shared.lora_names, label='LoRA(s)')
|
||||||
ui.create_refresh_button(shared.gradio['lora_menu'], lambda: None, lambda: {'choices': get_available_loras()}, 'refresh-button')
|
ui.create_refresh_button(shared.gradio['lora_menu'], lambda: None, lambda: {'choices': get_available_loras(), 'value': shared.lora_names}, 'refresh-button')
|
||||||
|
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
unload = gr.Button("Unload the model")
|
shared.gradio['lora_menu_apply'] = gr.Button(value='Apply the selected LoRAs')
|
||||||
reload = gr.Button("Reload the model")
|
with gr.Row():
|
||||||
|
unload = gr.Button("Unload the model")
|
||||||
|
reload = gr.Button("Reload the model")
|
||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
@ -338,7 +341,7 @@ def create_model_menus():
|
|||||||
update_model_parameters, shared.gradio['interface_state'], None).then(
|
update_model_parameters, shared.gradio['interface_state'], None).then(
|
||||||
load_model_wrapper, shared.gradio['model_menu'], shared.gradio['model_status'], show_progress=True)
|
load_model_wrapper, shared.gradio['model_menu'], shared.gradio['model_status'], show_progress=True)
|
||||||
|
|
||||||
shared.gradio['lora_menu'].change(load_lora_wrapper, shared.gradio['lora_menu'], shared.gradio['lora_menu'], show_progress=True)
|
shared.gradio['lora_menu_apply'].click(load_lora_wrapper, shared.gradio['lora_menu'], shared.gradio['model_status'], show_progress=False)
|
||||||
shared.gradio['download_button'].click(download_model_wrapper, shared.gradio['custom_model_menu'], shared.gradio['model_status'], show_progress=False)
|
shared.gradio['download_button'].click(download_model_wrapper, shared.gradio['custom_model_menu'], shared.gradio['model_status'], show_progress=False)
|
||||||
|
|
||||||
|
|
||||||
@ -428,8 +431,8 @@ def create_interface():
|
|||||||
# Defining some variables
|
# Defining some variables
|
||||||
gen_events = []
|
gen_events = []
|
||||||
default_preset = shared.settings['presets'][next((k for k in shared.settings['presets'] if re.match(k.lower(), shared.model_name.lower())), 'default')]
|
default_preset = shared.settings['presets'][next((k for k in shared.settings['presets'] if re.match(k.lower(), shared.model_name.lower())), 'default')]
|
||||||
if shared.lora_name != "None":
|
if len(shared.lora_names) == 1:
|
||||||
default_text = load_prompt(shared.settings['lora_prompts'][next((k for k in shared.settings['lora_prompts'] if re.match(k.lower(), shared.lora_name.lower())), 'default')])
|
default_text = load_prompt(shared.settings['lora_prompts'][next((k for k in shared.settings['lora_prompts'] if re.match(k.lower(), shared.lora_names[0].lower())), 'default')])
|
||||||
else:
|
else:
|
||||||
default_text = load_prompt(shared.settings['prompts'][next((k for k in shared.settings['prompts'] if re.match(k.lower(), shared.model_name.lower())), 'default')])
|
default_text = load_prompt(shared.settings['prompts'][next((k for k in shared.settings['prompts'] if re.match(k.lower(), shared.model_name.lower())), 'default')])
|
||||||
title = 'Text generation web UI'
|
title = 'Text generation web UI'
|
||||||
@ -861,7 +864,7 @@ if __name__ == "__main__":
|
|||||||
# Load the model
|
# Load the model
|
||||||
shared.model, shared.tokenizer = load_model(shared.model_name)
|
shared.model, shared.tokenizer = load_model(shared.model_name)
|
||||||
if shared.args.lora:
|
if shared.args.lora:
|
||||||
add_lora_to_model(shared.args.lora)
|
add_lora_to_model([shared.args.lora])
|
||||||
|
|
||||||
# Launch the web UI
|
# Launch the web UI
|
||||||
create_interface()
|
create_interface()
|
||||||
|
Loading…
Reference in New Issue
Block a user