From 64e3b44e0f5d7a008d031c3d328804b1617c8c0e Mon Sep 17 00:00:00 2001 From: "Alex \"mcmonkey\" Goodwin" <4000772+mcmonkey4eva@users.noreply.github.com> Date: Fri, 14 Apr 2023 10:52:06 -0700 Subject: [PATCH] initial multi-lora support (#1103) --------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> --- modules/LoRA.py | 36 ++++++++++++++++++++++++++---------- modules/shared.py | 4 ++-- requirements.txt | 2 +- server.py | 25 ++++++++++++++----------- 4 files changed, 43 insertions(+), 24 deletions(-) diff --git a/modules/LoRA.py b/modules/LoRA.py index 0cf379e5..8b54ef69 100644 --- a/modules/LoRA.py +++ b/modules/LoRA.py @@ -4,19 +4,31 @@ import torch from peft import PeftModel import modules.shared as shared -from modules.models import reload_model -def add_lora_to_model(lora_name): +def add_lora_to_model(lora_names): + prior_set = set(shared.lora_names) + added_set = set(lora_names) - prior_set + removed_set = prior_set - set(lora_names) + shared.lora_names = list(lora_names) - # If a LoRA had been previously loaded, or if we want - # to unload a LoRA, reload the model - if shared.lora_name not in ['None', ''] or lora_name in ['None', '']: - reload_model() - shared.lora_name = lora_name + # Nothing to do = skip. + if len(added_set) == 0 and len(removed_set) == 0: + return - if lora_name not in ['None', '']: - print(f"Adding the LoRA {lora_name} to the model...") + # Only adding, and already peft? Do it the easy way. + if len(removed_set) == 0 and len(prior_set) > 0: + print(f"Adding the LoRA(s) named {added_set} to the model...") + for lora in added_set: + shared.model.load_adapter(Path(f"{shared.args.lora_dir}/{lora}"), lora) + return + + # If removing anything, disable all and re-add. + if len(removed_set) > 0: + shared.model.disable_adapter() + + if len(lora_names) > 0: + print("Applying the following LoRAs to {}: {}".format(shared.model_name, ', '.join(lora_names))) params = {} if not shared.args.cpu: params['dtype'] = shared.model.dtype @@ -25,7 +37,11 @@ def add_lora_to_model(lora_name): elif shared.args.load_in_8bit: params['device_map'] = {'': 0} - shared.model = PeftModel.from_pretrained(shared.model, Path(f"{shared.args.lora_dir}/{lora_name}"), **params) + shared.model = PeftModel.from_pretrained(shared.model, Path(f"{shared.args.lora_dir}/{lora_names[0]}"), **params) + + for lora in lora_names[1:]: + shared.model.load_adapter(Path(f"{shared.args.lora_dir}/{lora}"), lora) + if not shared.args.load_in_8bit and not shared.args.cpu: shared.model.half() if not hasattr(shared.model, "hf_device_map"): diff --git a/modules/shared.py b/modules/shared.py index 97f34342..1822d0f7 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -6,7 +6,7 @@ import yaml model = None tokenizer = None model_name = "None" -lora_name = "None" +lora_names = [] soft_prompt_tensor = None soft_prompt = False is_RWKV = False @@ -68,7 +68,7 @@ settings = { }, 'lora_prompts': { 'default': 'QA', - '.*(alpaca-lora-7b|alpaca-lora-13b|alpaca-lora-30b)': "Alpaca", + '.*alpaca': "Alpaca", } } diff --git a/requirements.txt b/requirements.txt index bf3dbd33..9c68600d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,13 +5,13 @@ gradio==3.24.1 markdown numpy Pillow>=9.5.0 -peft==0.2.0 requests rwkv==0.7.3 safetensors==0.3.0 sentencepiece pyyaml tqdm +git+https://github.com/huggingface/peft transformers==4.28.0 bitsandbytes==0.38.1; platform_system != "Windows" llama-cpp-python==0.1.33; platform_system != "Windows" diff --git a/server.py b/server.py index 134e5001..39406339 100644 --- a/server.py +++ b/server.py @@ -88,9 +88,10 @@ def load_model_wrapper(selected_model): yield traceback.format_exc() -def load_lora_wrapper(selected_lora): - add_lora_to_model(selected_lora) - return selected_lora +def load_lora_wrapper(selected_loras): + yield ("Applying the following LoRAs to {}:\n\n{}".format(shared.model_name, '\n'.join(selected_loras))) + add_lora_to_model(selected_loras) + yield ("Successfuly applied the LoRAs") def load_preset_values(preset_menu, state, return_dict=False): @@ -275,12 +276,14 @@ def create_model_menus(): with gr.Column(): with gr.Row(): - shared.gradio['lora_menu'] = gr.Dropdown(choices=get_available_loras(), value=shared.lora_name, label='LoRA') - ui.create_refresh_button(shared.gradio['lora_menu'], lambda: None, lambda: {'choices': get_available_loras()}, 'refresh-button') + shared.gradio['lora_menu'] = gr.Dropdown(multiselect=True, choices=get_available_loras(), value=shared.lora_names, label='LoRA(s)') + ui.create_refresh_button(shared.gradio['lora_menu'], lambda: None, lambda: {'choices': get_available_loras(), 'value': shared.lora_names}, 'refresh-button') with gr.Column(): - unload = gr.Button("Unload the model") - reload = gr.Button("Reload the model") + shared.gradio['lora_menu_apply'] = gr.Button(value='Apply the selected LoRAs') + with gr.Row(): + unload = gr.Button("Unload the model") + reload = gr.Button("Reload the model") with gr.Row(): with gr.Column(): @@ -338,7 +341,7 @@ def create_model_menus(): update_model_parameters, shared.gradio['interface_state'], None).then( load_model_wrapper, shared.gradio['model_menu'], shared.gradio['model_status'], show_progress=True) - shared.gradio['lora_menu'].change(load_lora_wrapper, shared.gradio['lora_menu'], shared.gradio['lora_menu'], show_progress=True) + shared.gradio['lora_menu_apply'].click(load_lora_wrapper, shared.gradio['lora_menu'], shared.gradio['model_status'], show_progress=False) shared.gradio['download_button'].click(download_model_wrapper, shared.gradio['custom_model_menu'], shared.gradio['model_status'], show_progress=False) @@ -428,8 +431,8 @@ def create_interface(): # Defining some variables gen_events = [] default_preset = shared.settings['presets'][next((k for k in shared.settings['presets'] if re.match(k.lower(), shared.model_name.lower())), 'default')] - if shared.lora_name != "None": - default_text = load_prompt(shared.settings['lora_prompts'][next((k for k in shared.settings['lora_prompts'] if re.match(k.lower(), shared.lora_name.lower())), 'default')]) + if len(shared.lora_names) == 1: + default_text = load_prompt(shared.settings['lora_prompts'][next((k for k in shared.settings['lora_prompts'] if re.match(k.lower(), shared.lora_names[0].lower())), 'default')]) else: default_text = load_prompt(shared.settings['prompts'][next((k for k in shared.settings['prompts'] if re.match(k.lower(), shared.model_name.lower())), 'default')]) title = 'Text generation web UI' @@ -861,7 +864,7 @@ if __name__ == "__main__": # Load the model shared.model, shared.tokenizer = load_model(shared.model_name) if shared.args.lora: - add_lora_to_model(shared.args.lora) + add_lora_to_model([shared.args.lora]) # Launch the web UI create_interface()