From 1917b1527503d7efbce3d33aa7df9a216aaf36fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=CE=A6=CF=86?= <42910943+Brawlence@users.noreply.github.com> Date: Tue, 21 Mar 2023 13:15:42 +0300 Subject: [PATCH 01/80] Unload and reload models on request --- server.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/server.py b/server.py index cdf7aa93..1309c17d 100644 --- a/server.py +++ b/server.py @@ -63,6 +63,18 @@ def load_model_wrapper(selected_model): return selected_model +def reload_model(): + if not shared.args.cpu: + gc.collect() + torch.cuda.empty_cache() + shared.model, shared.tokenizer = load_model(shared.model_name) + +def unload_model(): + shared.model = shared.tokenizer = None + if not shared.args.cpu: + gc.collect() + torch.cuda.empty_cache() + def load_lora_wrapper(selected_lora): shared.lora_name = selected_lora default_text = shared.settings['lora_prompts'][next((k for k in shared.settings['lora_prompts'] if re.match(k.lower(), shared.lora_name.lower())), 'default')] @@ -126,6 +138,9 @@ def create_model_and_preset_menus(): with gr.Row(): shared.gradio['preset_menu'] = gr.Dropdown(choices=available_presets, value=default_preset if not shared.args.flexgen else 'Naive', label='Generation parameters preset') ui.create_refresh_button(shared.gradio['preset_menu'], lambda : None, lambda : {'choices': get_available_presets()}, 'refresh-button') + with gr.Row(): + shared.gradio['unload_model'] = gr.Button(value='Unload model to free VRAM', elem_id="unload_model") + shared.gradio['reload_model'] = gr.Button(value='Reload the model into VRAM', elem_id="reload_model") def create_settings_menus(default_preset): generate_params = load_preset_values(default_preset if not shared.args.flexgen else 'Naive', return_dict=True) @@ -180,6 +195,8 @@ def create_settings_menus(default_preset): shared.gradio['upload_softprompt'] = gr.File(type='binary', file_types=['.zip']) shared.gradio['model_menu'].change(load_model_wrapper, [shared.gradio['model_menu']], [shared.gradio['model_menu']], show_progress=True) + shared.gradio['unload_model'].click(fn=unload_model,inputs=[],outputs=[]) + shared.gradio['reload_model'].click(fn=reload_model,inputs=[],outputs=[]) shared.gradio['preset_menu'].change(load_preset_values, [shared.gradio['preset_menu']], [shared.gradio[k] for k in ['preset_menu_mirror', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']]) shared.gradio['preset_menu_mirror'].change(load_preset_values, [shared.gradio['preset_menu_mirror']], [shared.gradio[k] for k in ['preset_menu', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']]) shared.gradio['lora_menu'].change(load_lora_wrapper, [shared.gradio['lora_menu']], [shared.gradio['lora_menu'], shared.gradio['textbox']], show_progress=True) From 483d173d23309f77d197951ad9f21632955fd13a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=CE=A6=CF=86?= <42910943+Brawlence@users.noreply.github.com> Date: Tue, 21 Mar 2023 20:19:38 +0300 Subject: [PATCH 02/80] Code reuse + indication Now shows the message in the console when unloading weights. Also reload_model() calls unload_model() first to free the memory so that multiple reloads won't overfill it. --- server.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/server.py b/server.py index 1309c17d..4c3497c9 100644 --- a/server.py +++ b/server.py @@ -64,9 +64,7 @@ def load_model_wrapper(selected_model): return selected_model def reload_model(): - if not shared.args.cpu: - gc.collect() - torch.cuda.empty_cache() + unload_model() shared.model, shared.tokenizer = load_model(shared.model_name) def unload_model(): @@ -74,6 +72,7 @@ def unload_model(): if not shared.args.cpu: gc.collect() torch.cuda.empty_cache() + print("Model weights unloaded.") def load_lora_wrapper(selected_lora): shared.lora_name = selected_lora From b37c54edcfee36ef5fdbaae9f6337d236be52b99 Mon Sep 17 00:00:00 2001 From: catalpaaa Date: Fri, 24 Mar 2023 17:30:18 -0700 Subject: [PATCH 03/80] lora-dir, model-dir and login auth Added lora-dir, model-dir, and a login auth arguments that points to a file contains usernames and passwords in the format of "u:pw,u:pw,..." --- modules/LoRA.py | 2 +- modules/models.py | 20 ++++++++++---------- modules/shared.py | 3 +++ server.py | 14 +++++++++----- 4 files changed, 23 insertions(+), 16 deletions(-) diff --git a/modules/LoRA.py b/modules/LoRA.py index aa68ad32..394f7367 100644 --- a/modules/LoRA.py +++ b/modules/LoRA.py @@ -30,7 +30,7 @@ def add_lora_to_model(lora_name): elif shared.args.load_in_8bit: params['device_map'] = {'': 0} - shared.model = PeftModel.from_pretrained(shared.model, Path(f"loras/{lora_name}"), **params) + shared.model = PeftModel.from_pretrained(shared.model, Path(f"{shared.args.lora_dir}/{lora_name}"), **params) if not shared.args.load_in_8bit and not shared.args.cpu: shared.model.half() if not hasattr(shared.model, "hf_device_map"): diff --git a/modules/models.py b/modules/models.py index ccb97da3..757eb8b9 100644 --- a/modules/models.py +++ b/modules/models.py @@ -46,9 +46,9 @@ def load_model(model_name): # Default settings if not any([shared.args.cpu, shared.args.load_in_8bit, shared.args.gptq_bits, shared.args.auto_devices, shared.args.disk, shared.args.gpu_memory is not None, shared.args.cpu_memory is not None, shared.args.deepspeed, shared.args.flexgen, shared.is_RWKV]): if any(size in shared.model_name.lower() for size in ('13b', '20b', '30b')): - model = AutoModelForCausalLM.from_pretrained(Path(f"models/{shared.model_name}"), device_map='auto', load_in_8bit=True) + model = AutoModelForCausalLM.from_pretrained(Path(f"{shared.args.model_dir}/{shared.model_name}"), device_map='auto', load_in_8bit=True) else: - model = AutoModelForCausalLM.from_pretrained(Path(f"models/{shared.model_name}"), low_cpu_mem_usage=True, torch_dtype=torch.bfloat16 if shared.args.bf16 else torch.float16) + model = AutoModelForCausalLM.from_pretrained(Path(f"{shared.args.model_dir}/{shared.model_name}"), low_cpu_mem_usage=True, torch_dtype=torch.bfloat16 if shared.args.bf16 else torch.float16) if torch.has_mps: device = torch.device('mps') model = model.to(device) @@ -76,11 +76,11 @@ def load_model(model_name): num_bits=4, group_size=64, group_dim=2, symmetric=False)) - model = OptLM(f"facebook/{shared.model_name}", env, "models", policy) + model = OptLM(f"facebook/{shared.model_name}", env, shared.model_name, policy) # DeepSpeed ZeRO-3 elif shared.args.deepspeed: - model = AutoModelForCausalLM.from_pretrained(Path(f"models/{shared.model_name}"), torch_dtype=torch.bfloat16 if shared.args.bf16 else torch.float16) + model = AutoModelForCausalLM.from_pretrained(Path(f"{shared.args.model_dir}/{shared.model_name}"), torch_dtype=torch.bfloat16 if shared.args.bf16 else torch.float16) model = deepspeed.initialize(model=model, config_params=ds_config, model_parameters=None, optimizer=None, lr_scheduler=None)[0] model.module.eval() # Inference print(f"DeepSpeed ZeRO-3 is enabled: {is_deepspeed_zero3_enabled()}") @@ -89,8 +89,8 @@ def load_model(model_name): elif shared.is_RWKV: from modules.RWKV import RWKVModel, RWKVTokenizer - model = RWKVModel.from_pretrained(Path(f'models/{model_name}'), dtype="fp32" if shared.args.cpu else "bf16" if shared.args.bf16 else "fp16", device="cpu" if shared.args.cpu else "cuda") - tokenizer = RWKVTokenizer.from_pretrained(Path('models')) + model = RWKVModel.from_pretrained(Path(f'{shared.args.model_dir}/{model_name}'), dtype="fp32" if shared.args.cpu else "bf16" if shared.args.bf16 else "fp16", device="cpu" if shared.args.cpu else "cuda") + tokenizer = RWKVTokenizer.from_pretrained(Path(shared.model_name)) return model, tokenizer @@ -142,7 +142,7 @@ def load_model(model_name): if shared.args.disk: params["offload_folder"] = shared.args.disk_cache_dir - checkpoint = Path(f'models/{shared.model_name}') + checkpoint = Path(f'{shared.args.model_dir}/{shared.model_name}') if shared.args.load_in_8bit and params.get('max_memory', None) is not None and params['device_map'] == 'auto': config = AutoConfig.from_pretrained(checkpoint) @@ -159,10 +159,10 @@ def load_model(model_name): model = AutoModelForCausalLM.from_pretrained(checkpoint, **params) # Loading the tokenizer - if shared.model_name.lower().startswith(('gpt4chan', 'gpt-4chan', '4chan')) and Path("models/gpt-j-6B/").exists(): - tokenizer = AutoTokenizer.from_pretrained(Path("models/gpt-j-6B/")) + if shared.model_name.lower().startswith(('gpt4chan', 'gpt-4chan', '4chan')) and Path(f"{shared.args.model_dir}/gpt-j-6B/").exists(): + tokenizer = AutoTokenizer.from_pretrained(Path(f"{shared.args.model_dir}/gpt-j-6B/")) else: - tokenizer = AutoTokenizer.from_pretrained(Path(f"models/{shared.model_name}/")) + tokenizer = AutoTokenizer.from_pretrained(Path(f"{shared.args.model_dir}/{shared.model_name}/")) tokenizer.truncation_side = 'left' print(f"Loaded the model in {(time.time()-t0):.2f} seconds.") diff --git a/modules/shared.py b/modules/shared.py index 720c697e..72cea1d4 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -106,6 +106,9 @@ parser.add_argument('--listen-port', type=int, help='The listening port that the parser.add_argument('--share', action='store_true', help='Create a public URL. This is useful for running the web UI on Google Colab or similar.') parser.add_argument('--auto-launch', action='store_true', default=False, help='Open the web UI in the default browser upon launch.') parser.add_argument('--verbose', action='store_true', help='Print the prompts to the terminal.') +parser.add_argument("--gradio-auth-path", type=str, help='set gradio authentication file path ex. "/path/to/auth/file" with format like "u1:p1,u2:p2,u3:p3"', default=None) +parser.add_argument("--model-dir", type=str, default='models/', help="Path to directory with all the models") +parser.add_argument("--lora-dir", type=str, default='loras/', help="Path to directory with all the loras") args = parser.parse_args() # Provisional, this will be deleted later diff --git a/server.py b/server.py index f423e368..f8fd663c 100644 --- a/server.py +++ b/server.py @@ -31,9 +31,9 @@ if settings_file is not None: def get_available_models(): if shared.args.flexgen: - return sorted([re.sub('-np$', '', item.name) for item in list(Path('models/').glob('*')) if item.name.endswith('-np')], key=str.lower) + return sorted([re.sub('-np$', '', item.name) for item in list(Path(f'{shared.model_name}/').glob('*')) if item.name.endswith('-np')], key=str.lower) else: - return sorted([re.sub('.pth$', '', item.name) for item in list(Path('models/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower) + return sorted([re.sub('.pth$', '', item.name) for item in list(Path(f'{shared.model_name}/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower) def get_available_presets(): return sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('presets').glob('*.txt'))), key=str.lower) @@ -48,7 +48,7 @@ def get_available_softprompts(): return ['None'] + sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('softprompts').glob('*.zip'))), key=str.lower) def get_available_loras(): - return ['None'] + sorted([item.name for item in list(Path('loras/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower) + return ['None'] + sorted([item.name for item in list(Path('shared.args.lora_dir').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower) def load_model_wrapper(selected_model): if selected_model != shared.model_name: @@ -448,11 +448,15 @@ def create_interface(): extensions_module.create_extensions_block() # Launch the interface + gradio_auth_creds = [] + with open(shared.args.gradio_auth_path, 'r', encoding="utf8") as file: + for line in file.readlines(): + gradio_auth_creds += [x.strip() for x in line.split(',') if x.strip()] shared.gradio['interface'].queue() if shared.args.listen: - shared.gradio['interface'].launch(prevent_thread_lock=True, share=shared.args.share, server_name='0.0.0.0', server_port=shared.args.listen_port, inbrowser=shared.args.auto_launch) + shared.gradio['interface'].launch(prevent_thread_lock=True, share=shared.args.share, server_name='0.0.0.0', server_port=shared.args.listen_port, inbrowser=shared.args.auto_launch, auth=[tuple(cred.split(':')) for cred in gradio_auth_creds] if gradio_auth_creds else None) else: - shared.gradio['interface'].launch(prevent_thread_lock=True, share=shared.args.share, server_port=shared.args.listen_port, inbrowser=shared.args.auto_launch) + shared.gradio['interface'].launch(prevent_thread_lock=True, share=shared.args.share, server_port=shared.args.listen_port, inbrowser=shared.args.auto_launch, auth=[tuple(cred.split(':')) for cred in gradio_auth_creds] if gradio_auth_creds else None) create_interface() From ec2a1faceecddf1400245a6c8983e40ef430cccf Mon Sep 17 00:00:00 2001 From: catalpaaa Date: Fri, 24 Mar 2023 17:34:33 -0700 Subject: [PATCH 04/80] Update server.py --- server.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server.py b/server.py index f8fd663c..c69abb4b 100644 --- a/server.py +++ b/server.py @@ -31,9 +31,9 @@ if settings_file is not None: def get_available_models(): if shared.args.flexgen: - return sorted([re.sub('-np$', '', item.name) for item in list(Path(f'{shared.model_name}/').glob('*')) if item.name.endswith('-np')], key=str.lower) + return sorted([re.sub('-np$', '', item.name) for item in list(Path(f'{shared.model_dir}/').glob('*')) if item.name.endswith('-np')], key=str.lower) else: - return sorted([re.sub('.pth$', '', item.name) for item in list(Path(f'{shared.model_name}/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower) + return sorted([re.sub('.pth$', '', item.name) for item in list(Path(f'{shared.model_dir}/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower) def get_available_presets(): return sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('presets').glob('*.txt'))), key=str.lower) From 9e2963e0c86180fec5a88db4ec77530ad2de7d69 Mon Sep 17 00:00:00 2001 From: catalpaaa Date: Fri, 24 Mar 2023 17:35:45 -0700 Subject: [PATCH 05/80] Update server.py --- server.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server.py b/server.py index c69abb4b..67c1e915 100644 --- a/server.py +++ b/server.py @@ -31,9 +31,9 @@ if settings_file is not None: def get_available_models(): if shared.args.flexgen: - return sorted([re.sub('-np$', '', item.name) for item in list(Path(f'{shared.model_dir}/').glob('*')) if item.name.endswith('-np')], key=str.lower) + return sorted([re.sub('-np$', '', item.name) for item in list(Path(f'{shared.arg.model_dir}/').glob('*')) if item.name.endswith('-np')], key=str.lower) else: - return sorted([re.sub('.pth$', '', item.name) for item in list(Path(f'{shared.model_dir}/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower) + return sorted([re.sub('.pth$', '', item.name) for item in list(Path(f'{shared.arg.model_dir}/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower) def get_available_presets(): return sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('presets').glob('*.txt'))), key=str.lower) From d51cb8292b42eb29e4e45ed850d23b446208a0d3 Mon Sep 17 00:00:00 2001 From: catalpaaa Date: Fri, 24 Mar 2023 17:36:31 -0700 Subject: [PATCH 06/80] Update server.py yea i should go to bed --- server.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server.py b/server.py index 67c1e915..8ac6031a 100644 --- a/server.py +++ b/server.py @@ -31,9 +31,9 @@ if settings_file is not None: def get_available_models(): if shared.args.flexgen: - return sorted([re.sub('-np$', '', item.name) for item in list(Path(f'{shared.arg.model_dir}/').glob('*')) if item.name.endswith('-np')], key=str.lower) + return sorted([re.sub('-np$', '', item.name) for item in list(Path(f'{shared.args.model_dir}/').glob('*')) if item.name.endswith('-np')], key=str.lower) else: - return sorted([re.sub('.pth$', '', item.name) for item in list(Path(f'{shared.arg.model_dir}/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower) + return sorted([re.sub('.pth$', '', item.name) for item in list(Path(f'{shared.args.model_dir}/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower) def get_available_presets(): return sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('presets').glob('*.txt'))), key=str.lower) From 1a1e420e65e5e9aed26419ccccc59765505b38c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=CE=A6=CF=86?= <42910943+Brawlence@users.noreply.github.com> Date: Sat, 25 Mar 2023 21:31:13 +0300 Subject: [PATCH 07/80] Silero_tts streaming fix Temporarily suppress the streaming during the audio response as it would interfere with the audio (making it stutter and play anew) --- extensions/silero_tts/script.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/extensions/silero_tts/script.py b/extensions/silero_tts/script.py index a81a5da1..1352993a 100644 --- a/extensions/silero_tts/script.py +++ b/extensions/silero_tts/script.py @@ -26,6 +26,7 @@ current_params = params.copy() voices_by_gender = ['en_99', 'en_45', 'en_18', 'en_117', 'en_49', 'en_51', 'en_68', 'en_0', 'en_26', 'en_56', 'en_74', 'en_5', 'en_38', 'en_53', 'en_21', 'en_37', 'en_107', 'en_10', 'en_82', 'en_16', 'en_41', 'en_12', 'en_67', 'en_61', 'en_14', 'en_11', 'en_39', 'en_52', 'en_24', 'en_97', 'en_28', 'en_72', 'en_94', 'en_36', 'en_4', 'en_43', 'en_88', 'en_25', 'en_65', 'en_6', 'en_44', 'en_75', 'en_91', 'en_60', 'en_109', 'en_85', 'en_101', 'en_108', 'en_50', 'en_96', 'en_64', 'en_92', 'en_76', 'en_33', 'en_116', 'en_48', 'en_98', 'en_86', 'en_62', 'en_54', 'en_95', 'en_55', 'en_111', 'en_3', 'en_83', 'en_8', 'en_47', 'en_59', 'en_1', 'en_2', 'en_7', 'en_9', 'en_13', 'en_15', 'en_17', 'en_19', 'en_20', 'en_22', 'en_23', 'en_27', 'en_29', 'en_30', 'en_31', 'en_32', 'en_34', 'en_35', 'en_40', 'en_42', 'en_46', 'en_57', 'en_58', 'en_63', 'en_66', 'en_69', 'en_70', 'en_71', 'en_73', 'en_77', 'en_78', 'en_79', 'en_80', 'en_81', 'en_84', 'en_87', 'en_89', 'en_90', 'en_93', 'en_100', 'en_102', 'en_103', 'en_104', 'en_105', 'en_106', 'en_110', 'en_112', 'en_113', 'en_114', 'en_115'] voice_pitches = ['x-low', 'low', 'medium', 'high', 'x-high'] voice_speeds = ['x-slow', 'slow', 'medium', 'fast', 'x-fast'] +streaming_state = shared.args.no_stream # remember if chat streaming was enabled # Used for making text xml compatible, needed for voice pitch and speed control table = str.maketrans({ @@ -77,6 +78,7 @@ def input_modifier(string): shared.history['visible'][-1] = [shared.history['visible'][-1][0], shared.history['visible'][-1][1].replace('controls autoplay>','controls>')] shared.processing_message = "*Is recording a voice message...*" + shared.args.no_stream = True # Disable streaming cause otherwise the audio output will stutter and begin anew every time the message is being updated return string def output_modifier(string): @@ -84,7 +86,7 @@ def output_modifier(string): This function is applied to the model outputs. """ - global model, current_params + global model, current_params, streaming_state for i in params: if params[i] != current_params[i]: @@ -116,6 +118,7 @@ def output_modifier(string): string += f'\n\n{original_string}' shared.processing_message = "*Is typing...*" + shared.args.no_stream = streaming_state # restore the streaming option to the previous value return string def bot_prefix_modifier(string): From 566898a79a0915879273f3d77017908bcf7d62ab Mon Sep 17 00:00:00 2001 From: "Alex \"mcmonkey\" Goodwin" Date: Sat, 25 Mar 2023 12:08:26 -0700 Subject: [PATCH 08/80] initial lora training tab --- modules/training.py | 139 ++++++++++++++++++ requirements.txt | 2 + server.py | 7 +- .../datasets/put-trainer-datasets-here.txt | 0 training/formats/alpaca-chatbot-format.json | 4 + training/formats/alpaca-format.json | 4 + training/formats/put-trainer-formats-here.txt | 0 7 files changed, 153 insertions(+), 3 deletions(-) create mode 100644 modules/training.py create mode 100644 training/datasets/put-trainer-datasets-here.txt create mode 100644 training/formats/alpaca-chatbot-format.json create mode 100644 training/formats/alpaca-format.json create mode 100644 training/formats/put-trainer-formats-here.txt diff --git a/modules/training.py b/modules/training.py new file mode 100644 index 00000000..96cd6e7c --- /dev/null +++ b/modules/training.py @@ -0,0 +1,139 @@ +import sys, torch, json +from pathlib import Path +import gradio as gr +from datasets import load_dataset +import transformers +from modules import ui, shared +from peft import prepare_model_for_int8_training, LoraConfig, get_peft_model, get_peft_model_state_dict + +def get_json_dataset(path: str): + def get_set(): + return ['None'] + sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path(path).glob('*.json'))), key=str.lower) + return get_set + +def create_train_interface(): + with gr.Tab('Train LoRA', elem_id='lora-train-tab'): + loraName = gr.Textbox(label="Name", info="The name of your new LoRA file") + # TODO: Add explanations of batch sizes and recommendations. Note that batch/microBatch determines gradient accumulation and explain what that means. Note the effects on VRAM usage from changing these values. + microBatchSize = gr.Slider(label='Micro Batch Size', value=4, minimum=1, maximum=128, step=1, info='(TODO)') + batchSize = gr.Slider(label='Batch Size', value=128, minimum=1, maximum=1024, step=4, info='(TODO)') + epochs = gr.Slider(label='Epochs', value=1, minimum=1, maximum=1000, info='Number of times every entry in the dataset should be fed into training. So 1 means feed each item in once, 5 means feed it in five times, etc.') + learningRate = gr.Textbox(label='Learning Rate', value='3e-4', info='Learning rate, in scientific notation. 3e-4 is a good starting base point. 1e-2 is extremely high, 1e-6 is extremely low.') + # TODO: What is the actual maximum rank? Likely distinct per model. This might be better to somehow be on a log scale. + loraRank = gr.Slider(label='LoRA Rank', value=8, minimum=1, maximum=1024, step=4, info='LoRA Rank, or dimension count. Higher values produce a larger file with better control over the model\'s content. Smaller values produce a smaller file with less overall control. Small values like 4 or 8 are great for stylistic guidance, high values like 128 or 256 are good for teaching content upgrades. Higher ranks also require higher VRAM.') + loraAlpha = gr.Slider(label='LoRA Alpha', value=16, minimum=1, maximum=2048, step=4, info='LoRA Alpha. This divided by the rank becomes the scaling of the LoRA. Higher means stronger. A good standard value is twice your Rank.') + # TODO: Better explain what this does. + loraDropout = gr.Slider(label='LoRA Dropout', minimum=0.0, maximum=1.0, step=0.025, value=0.05, info='Percentage probability for dropout of LoRA layers.') + cutoffLen = gr.Slider(label='Cutoff Length', minimum=1,maximum=2048, value=256, step=32, info='Cutoff length for text input. Essentially, how long of a line of text to feed in at a time. Higher values require drastically more VRAM.') + with gr.Row(): + datasetFunction = get_json_dataset('training/datasets') + dataset = gr.Dropdown(choices=datasetFunction(), value='None', label='Dataset') + ui.create_refresh_button(dataset, lambda : None, lambda : {'choices': datasetFunction()}, 'refresh-button') + with gr.Row(): + evalDataset = gr.Dropdown(choices=datasetFunction(), value='None', label='Evaluation Dataset') + ui.create_refresh_button(evalDataset, lambda : None, lambda : {'choices': datasetFunction()}, 'refresh-button') + with gr.Row(): + formatsFunction = get_json_dataset('training/formats') + format = gr.Dropdown(choices=formatsFunction(), value='None', label='Data Format') + ui.create_refresh_button(format, lambda : None, lambda : {'choices': formatsFunction()}, 'refresh-button') + startButton = gr.Button("Start LoRA Training") + output = gr.Markdown(value="(...)") + startButton.click(do_train, [loraName, microBatchSize, batchSize, epochs, learningRate, loraRank, loraAlpha, loraDropout, cutoffLen, dataset, evalDataset, format], [output]) + +def cleanPath(basePath: str, path: str): + """"Strips unusual symbols and forcibly builds a path as relative to the intended directory.""" + # TODO: Probably could do with a security audit to guarantee there's no ways this can be bypassed to target an unwanted path. + # Or swap it to a strict whitelist of [a-zA-Z_0-9] + path = path.replace('\\', '/').replace('..', '_') + if basePath is None: + return path + return f'{Path(basePath).absolute()}/{path}' + +def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, learningRate: float, loraRank: int, loraAlpha: int, loraDropout: float, cutoffLen: int, dataset: str, evalDataset: str, format: str): + # Input validation / processing + # TODO: --lora-dir PR once pulled will need to be applied here + loraName = f"loras/{cleanPath(None, loraName)}" + if dataset is None: + return "**Missing dataset choice input, cannot continue.**" + if format is None: + return "**Missing format choice input, cannot continue.**" + gradientAccumulationSteps = batchSize // microBatchSize + actualLR = float(learningRate) + model = shared.model + tokenizer = shared.tokenizer + tokenizer.pad_token = 0 + tokenizer.padding_side = "left" + # Prep the dataset, format, etc + with open(cleanPath('training/formats', f'{format}.json'), 'r') as formatFile: + formatData: dict[str, str] = json.load(formatFile) + def tokenize(prompt): + result = tokenizer(prompt, truncation=True, max_length=cutoffLen + 1, padding="max_length") + return { + "input_ids": result["input_ids"][:-1], + "attention_mask": result["attention_mask"][:-1], + } + def generate_prompt(data_point: dict[str, str]): + for options, data in formatData.items(): + if set(options.split(',')) == set(data_point.keys()): + for key, val in data_point.items(): + data = data.replace(f'%{key}%', val) + return data + raise RuntimeError(f'Data-point "{data_point}" has no keyset match within format "{list(formatData.keys())}"') + def generate_and_tokenize_prompt(data_point): + prompt = generate_prompt(data_point) + return tokenize(prompt) + data = load_dataset("json", data_files=cleanPath('training/datasets', f'{dataset}.json')) + train_data = data['train'].shuffle().map(generate_and_tokenize_prompt) + if evalDataset == 'None': + evalData = None + else: + evalData = load_dataset("json", data_files=cleanPath('training/datasets', f'{evalDataset}.json')) + evalData = evalData['train'].shuffle().map(generate_and_tokenize_prompt) + # Start prepping the model itself + model = prepare_model_for_int8_training(model) + config = LoraConfig( + r=loraRank, + lora_alpha=loraAlpha, + # TODO: Should target_modules be configurable? + target_modules=[ "q_proj", "v_proj" ], + lora_dropout=loraDropout, + bias="none", + task_type="CAUSAL_LM" + ) + model = get_peft_model(model, config) + trainer = transformers.Trainer( + model=model, + train_dataset=train_data, + eval_dataset=evalData, + args=transformers.TrainingArguments( + per_device_train_batch_size=microBatchSize, + gradient_accumulation_steps=gradientAccumulationSteps, + # TODO: Should more of these be configurable? Probably. + warmup_steps=100, + num_train_epochs=epochs, + learning_rate=actualLR, + fp16=True, + logging_steps=20, + evaluation_strategy="steps" if evalData is not None else "no", + save_strategy="steps", + eval_steps=200 if evalData is not None else None, + save_steps=200, + output_dir=loraName, + save_total_limit=3, + load_best_model_at_end=True if evalData is not None else False, + # TODO: Enable multi-device support + ddp_find_unused_parameters=None, + ), + data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False), + ) + model.config.use_cache = False + old_state_dict = model.state_dict + model.state_dict = ( + lambda self, *_, **__: get_peft_model_state_dict(self, old_state_dict()) + ).__get__(model, type(model)) + if torch.__version__ >= "2" and sys.platform != "win32": + model = torch.compile(model) + # Actually start and run and save at the end + trainer.train() + model.save_pretrained(loraName) + return "Done!" diff --git a/requirements.txt b/requirements.txt index e5b3de69..c93ce671 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,4 +10,6 @@ rwkv==0.7.0 safetensors==0.3.0 sentencepiece tqdm +peft +datasets git+https://github.com/huggingface/transformers diff --git a/server.py b/server.py index f423e368..cd95d5ef 100644 --- a/server.py +++ b/server.py @@ -8,10 +8,8 @@ from pathlib import Path import gradio as gr -import modules.chat as chat +from modules import chat, shared, ui, training import modules.extensions as extensions_module -import modules.shared as shared -import modules.ui as ui from modules.html_generator import generate_chat_html from modules.LoRA import add_lora_to_model from modules.models import load_model, load_soft_prompt @@ -443,6 +441,9 @@ def create_interface(): shared.gradio['reset_interface'].click(set_interface_arguments, [shared.gradio[k] for k in ['interface_modes_menu', 'extensions_menu', 'cmd_arguments_menu']], None) shared.gradio['reset_interface'].click(lambda : None, None, None, _js='() => {document.body.innerHTML=\'

Reloading...

\'; setTimeout(function(){location.reload()},2500)}') + + with gr.Tab("Training", elem_id="training-tab"): + training.create_train_interface() if shared.args.extensions is not None: extensions_module.create_extensions_block() diff --git a/training/datasets/put-trainer-datasets-here.txt b/training/datasets/put-trainer-datasets-here.txt new file mode 100644 index 00000000..e69de29b diff --git a/training/formats/alpaca-chatbot-format.json b/training/formats/alpaca-chatbot-format.json new file mode 100644 index 00000000..4b38103f --- /dev/null +++ b/training/formats/alpaca-chatbot-format.json @@ -0,0 +1,4 @@ +{ + "instruction,output": "User: %instruction%\nAssistant: %output%", + "instruction,input,output": "User: %instruction%: %input%\nAssistant: %output%" +} diff --git a/training/formats/alpaca-format.json b/training/formats/alpaca-format.json new file mode 100644 index 00000000..dd6df956 --- /dev/null +++ b/training/formats/alpaca-format.json @@ -0,0 +1,4 @@ +{ + "instruction,output": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n%instruction%\n\n### Response:\n%output%", + "instruction,input,output": "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n%instruction%\n\n### Input:\n%input%\n\n### Response:\n%output%" +} diff --git a/training/formats/put-trainer-formats-here.txt b/training/formats/put-trainer-formats-here.txt new file mode 100644 index 00000000..e69de29b From 7bf601107c1b9aebd5bbbb5d08aa3d20c697daf1 Mon Sep 17 00:00:00 2001 From: "Alex \"mcmonkey\" Goodwin" Date: Sat, 25 Mar 2023 12:28:46 -0700 Subject: [PATCH 09/80] automatically strip empty data entries (for better alpaca dataset compat) --- modules/training.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/training.py b/modules/training.py index 96cd6e7c..e2be18e8 100644 --- a/modules/training.py +++ b/modules/training.py @@ -74,7 +74,7 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le } def generate_prompt(data_point: dict[str, str]): for options, data in formatData.items(): - if set(options.split(',')) == set(data_point.keys()): + if set(options.split(',')) == set(x[0] for x in data_point.items() if len(x[1].strip()) > 0): for key, val in data_point.items(): data = data.replace(f'%{key}%', val) return data From 5c49a0dcd02c3cf2e31a00fdaf554f36895276d7 Mon Sep 17 00:00:00 2001 From: "Alex \"mcmonkey\" Goodwin" Date: Sat, 25 Mar 2023 12:37:32 -0700 Subject: [PATCH 10/80] fix error from prepare call running twice in a row --- modules/training.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/training.py b/modules/training.py index e2be18e8..0e210c52 100644 --- a/modules/training.py +++ b/modules/training.py @@ -90,7 +90,8 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le evalData = load_dataset("json", data_files=cleanPath('training/datasets', f'{evalDataset}.json')) evalData = evalData['train'].shuffle().map(generate_and_tokenize_prompt) # Start prepping the model itself - model = prepare_model_for_int8_training(model) + if not hasattr(model, 'lm_head') or hasattr(model.lm_head, 'weight'): + model = prepare_model_for_int8_training(model) config = LoraConfig( r=loraRank, lora_alpha=loraAlpha, From 8da237223ed008c418386a805524929ddebb59ba Mon Sep 17 00:00:00 2001 From: "Alex \"mcmonkey\" Goodwin" Date: Sat, 25 Mar 2023 12:48:35 -0700 Subject: [PATCH 11/80] document options better --- modules/training.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/training.py b/modules/training.py index 0e210c52..250093a0 100644 --- a/modules/training.py +++ b/modules/training.py @@ -14,27 +14,27 @@ def get_json_dataset(path: str): def create_train_interface(): with gr.Tab('Train LoRA', elem_id='lora-train-tab'): loraName = gr.Textbox(label="Name", info="The name of your new LoRA file") - # TODO: Add explanations of batch sizes and recommendations. Note that batch/microBatch determines gradient accumulation and explain what that means. Note the effects on VRAM usage from changing these values. - microBatchSize = gr.Slider(label='Micro Batch Size', value=4, minimum=1, maximum=128, step=1, info='(TODO)') - batchSize = gr.Slider(label='Batch Size', value=128, minimum=1, maximum=1024, step=4, info='(TODO)') + # TODO: Implement multi-device support. + microBatchSize = gr.Slider(label='Micro Batch Size', value=4, minimum=1, maximum=128, step=1, info='Per-device batch size (NOTE: multiple devices not yet implemented). Increasing this will increase VRAM usage.') + batchSize = gr.Slider(label='Batch Size', value=128, minimum=1, maximum=1024, step=4, info='Global batch size. The two batch sizes together determine gradient accumulation (gradientAccum = batch / microBatch). Higher gradient accum values lead to better quality training.') epochs = gr.Slider(label='Epochs', value=1, minimum=1, maximum=1000, info='Number of times every entry in the dataset should be fed into training. So 1 means feed each item in once, 5 means feed it in five times, etc.') learningRate = gr.Textbox(label='Learning Rate', value='3e-4', info='Learning rate, in scientific notation. 3e-4 is a good starting base point. 1e-2 is extremely high, 1e-6 is extremely low.') # TODO: What is the actual maximum rank? Likely distinct per model. This might be better to somehow be on a log scale. loraRank = gr.Slider(label='LoRA Rank', value=8, minimum=1, maximum=1024, step=4, info='LoRA Rank, or dimension count. Higher values produce a larger file with better control over the model\'s content. Smaller values produce a smaller file with less overall control. Small values like 4 or 8 are great for stylistic guidance, high values like 128 or 256 are good for teaching content upgrades. Higher ranks also require higher VRAM.') loraAlpha = gr.Slider(label='LoRA Alpha', value=16, minimum=1, maximum=2048, step=4, info='LoRA Alpha. This divided by the rank becomes the scaling of the LoRA. Higher means stronger. A good standard value is twice your Rank.') - # TODO: Better explain what this does. + # TODO: Better explain what this does, in terms of real world effect especially. loraDropout = gr.Slider(label='LoRA Dropout', minimum=0.0, maximum=1.0, step=0.025, value=0.05, info='Percentage probability for dropout of LoRA layers.') cutoffLen = gr.Slider(label='Cutoff Length', minimum=1,maximum=2048, value=256, step=32, info='Cutoff length for text input. Essentially, how long of a line of text to feed in at a time. Higher values require drastically more VRAM.') with gr.Row(): datasetFunction = get_json_dataset('training/datasets') - dataset = gr.Dropdown(choices=datasetFunction(), value='None', label='Dataset') + dataset = gr.Dropdown(choices=datasetFunction(), value='None', label='Dataset', info='The dataset file to use for training.') ui.create_refresh_button(dataset, lambda : None, lambda : {'choices': datasetFunction()}, 'refresh-button') with gr.Row(): - evalDataset = gr.Dropdown(choices=datasetFunction(), value='None', label='Evaluation Dataset') + evalDataset = gr.Dropdown(choices=datasetFunction(), value='None', label='Evaluation Dataset', info='The dataset file used to evaluate the model after training.') ui.create_refresh_button(evalDataset, lambda : None, lambda : {'choices': datasetFunction()}, 'refresh-button') with gr.Row(): formatsFunction = get_json_dataset('training/formats') - format = gr.Dropdown(choices=formatsFunction(), value='None', label='Data Format') + format = gr.Dropdown(choices=formatsFunction(), value='None', label='Data Format', info='The format file used to decide how to format the dataset input.') ui.create_refresh_button(format, lambda : None, lambda : {'choices': formatsFunction()}, 'refresh-button') startButton = gr.Button("Start LoRA Training") output = gr.Markdown(value="(...)") From f1ba2196b1a640bd094623120486b847ca59ccf5 Mon Sep 17 00:00:00 2001 From: "Alex \"mcmonkey\" Goodwin" Date: Sat, 25 Mar 2023 12:57:36 -0700 Subject: [PATCH 12/80] make 'model' variables less ambiguous --- modules/training.py | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/modules/training.py b/modules/training.py index 250093a0..f9f0790f 100644 --- a/modules/training.py +++ b/modules/training.py @@ -59,15 +59,13 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le return "**Missing format choice input, cannot continue.**" gradientAccumulationSteps = batchSize // microBatchSize actualLR = float(learningRate) - model = shared.model - tokenizer = shared.tokenizer - tokenizer.pad_token = 0 - tokenizer.padding_side = "left" + shared.tokenizer.pad_token = 0 + shared.tokenizer.padding_side = "left" # Prep the dataset, format, etc with open(cleanPath('training/formats', f'{format}.json'), 'r') as formatFile: formatData: dict[str, str] = json.load(formatFile) def tokenize(prompt): - result = tokenizer(prompt, truncation=True, max_length=cutoffLen + 1, padding="max_length") + result = shared.tokenizer(prompt, truncation=True, max_length=cutoffLen + 1, padding="max_length") return { "input_ids": result["input_ids"][:-1], "attention_mask": result["attention_mask"][:-1], @@ -90,8 +88,8 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le evalData = load_dataset("json", data_files=cleanPath('training/datasets', f'{evalDataset}.json')) evalData = evalData['train'].shuffle().map(generate_and_tokenize_prompt) # Start prepping the model itself - if not hasattr(model, 'lm_head') or hasattr(model.lm_head, 'weight'): - model = prepare_model_for_int8_training(model) + if not hasattr(shared.model, 'lm_head') or hasattr(shared.model.lm_head, 'weight'): + prepare_model_for_int8_training(shared.model) config = LoraConfig( r=loraRank, lora_alpha=loraAlpha, @@ -101,9 +99,9 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le bias="none", task_type="CAUSAL_LM" ) - model = get_peft_model(model, config) + loraModel = get_peft_model(shared.model, config) trainer = transformers.Trainer( - model=model, + model=loraModel, train_dataset=train_data, eval_dataset=evalData, args=transformers.TrainingArguments( @@ -125,16 +123,16 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le # TODO: Enable multi-device support ddp_find_unused_parameters=None, ), - data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False), + data_collator=transformers.DataCollatorForLanguageModeling(shared.tokenizer, mlm=False), ) - model.config.use_cache = False - old_state_dict = model.state_dict - model.state_dict = ( + loraModel.config.use_cache = False + old_state_dict = loraModel.state_dict + loraModel.state_dict = ( lambda self, *_, **__: get_peft_model_state_dict(self, old_state_dict()) - ).__get__(model, type(model)) + ).__get__(loraModel, type(loraModel)) if torch.__version__ >= "2" and sys.platform != "win32": - model = torch.compile(model) + loraModel = torch.compile(loraModel) # Actually start and run and save at the end trainer.train() - model.save_pretrained(loraName) + loraModel.save_pretrained(loraName) return "Done!" From 0bac80d9ebf03d91ed5b8f921be03debc3c65cee Mon Sep 17 00:00:00 2001 From: Sean Fitzgerald Date: Sat, 25 Mar 2023 13:08:45 -0700 Subject: [PATCH 13/80] Potential fix for issues/571 --- modules/text_generation.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/modules/text_generation.py b/modules/text_generation.py index fd017e2c..eb8f6ca1 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -236,8 +236,6 @@ def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typi break yield formatted_outputs(reply, shared.model_name) - yield formatted_outputs(reply, shared.model_name) - # Stream the output naively for FlexGen since it doesn't support 'stopping_criteria' else: for i in range(max_new_tokens//8+1): From 9ff6a538b6055b6845efd2f0e625386a847945eb Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 26 Mar 2023 22:11:19 -0300 Subject: [PATCH 14/80] Bump gradio version Make sure to upgrade with `pip install -r requirements.txt --upgrade` --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index e5b3de69..c84f2948 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ accelerate==0.17.1 bitsandbytes==0.37.1 flexgen==0.1.7 -gradio==3.18.0 +gradio==3.23.0 markdown numpy peft==0.2.0 From 1c77fdca4cdfca5c636595a8aaaff3281b859d3a Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 26 Mar 2023 22:20:30 -0300 Subject: [PATCH 15/80] Change notebook mode appearance --- css/chat.css | 6 ++++++ css/main.css | 10 ++++++++++ css/main.js | 2 +- modules/extensions.py | 4 ++-- server.py | 25 ++++++++++++++----------- 5 files changed, 33 insertions(+), 14 deletions(-) diff --git a/css/chat.css b/css/chat.css index 8d9d88a6..1e703530 100644 --- a/css/chat.css +++ b/css/chat.css @@ -23,3 +23,9 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { .pending.svelte-1ed2p3z { opacity: 1; } + +#extensions { + padding: 0; + padding: 0; +} + diff --git a/css/main.css b/css/main.css index 09f3b6a8..97879f01 100644 --- a/css/main.css +++ b/css/main.css @@ -54,3 +54,13 @@ ol li p, ul li p { .gradio-container-3-18-0 .prose * h1, h2, h3, h4 { color: white; } + +.gradio-container { + max-width: 100% !important; + padding-top: 0 !important; +} + +#extensions { + padding: 15px; + padding: 15px; +} diff --git a/css/main.js b/css/main.js index 9db3fe8b..029ecb62 100644 --- a/css/main.js +++ b/css/main.js @@ -11,7 +11,7 @@ let extensions = document.getElementById('extensions'); main_parent.addEventListener('click', function(e) { // Check if the main element is visible if (main.offsetHeight > 0 && main.offsetWidth > 0) { - extensions.style.display = 'block'; + extensions.style.display = 'flex'; } else { extensions.style.display = 'none'; } diff --git a/modules/extensions.py b/modules/extensions.py index c55dc978..c3cf4de4 100644 --- a/modules/extensions.py +++ b/modules/extensions.py @@ -63,8 +63,8 @@ def create_extensions_block(): # Creating the extension ui elements if should_display_ui: - with gr.Box(elem_id="extensions"): - gr.Markdown("Extensions") + with gr.Column(elem_id="extensions"): for extension, name in iterator(): + gr.Markdown(f"\n### {name}") if hasattr(extension, "ui"): extension.ui() diff --git a/server.py b/server.py index f1b95a5b..56bb499d 100644 --- a/server.py +++ b/server.py @@ -369,19 +369,22 @@ def create_interface(): elif shared.args.notebook: with gr.Tab("Text generation", elem_id="main"): - with gr.Tab('Raw'): - shared.gradio['textbox'] = gr.Textbox(value=default_text, lines=25) - with gr.Tab('Markdown'): - shared.gradio['markdown'] = gr.Markdown() - with gr.Tab('HTML'): - shared.gradio['html'] = gr.HTML() - with gr.Row(): - shared.gradio['Stop'] = gr.Button('Stop') - shared.gradio['Generate'] = gr.Button('Generate') - shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens']) + with gr.Column(scale=4): + with gr.Tab('Raw'): + shared.gradio['textbox'] = gr.Textbox(value=default_text, elem_id="textbox", lines=25) + with gr.Tab('Markdown'): + shared.gradio['markdown'] = gr.Markdown() + with gr.Tab('HTML'): + shared.gradio['html'] = gr.HTML() - create_model_and_preset_menus() + with gr.Row(): + shared.gradio['Stop'] = gr.Button('Stop') + shared.gradio['Generate'] = gr.Button('Generate') + with gr.Column(scale=1): + shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens']) + + create_model_and_preset_menus() with gr.Tab("Parameters", elem_id="parameters"): create_settings_menus(default_preset) From 95c97e1747f277e62db997da73556a94904c1f9c Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 26 Mar 2023 23:47:29 -0300 Subject: [PATCH 16/80] Unload the model using the "Remove all" button --- server.py | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/server.py b/server.py index 3e31377c..db83b4f3 100644 --- a/server.py +++ b/server.py @@ -50,26 +50,20 @@ def get_available_softprompts(): def get_available_loras(): return ['None'] + sorted([item.name for item in list(Path('loras/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower) +def unload_model(): + shared.model = shared.tokenizer = None + clear_torch_cache() + def load_model_wrapper(selected_model): if selected_model != shared.model_name: shared.model_name = selected_model - shared.model = shared.tokenizer = None - clear_torch_cache() - shared.model, shared.tokenizer = load_model(shared.model_name) + + unload_model() + if selected_model != '': + shared.model, shared.tokenizer = load_model(shared.model_name) return selected_model -def reload_model(): - unload_model() - shared.model, shared.tokenizer = load_model(shared.model_name) - -def unload_model(): - shared.model = shared.tokenizer = None - if not shared.args.cpu: - gc.collect() - torch.cuda.empty_cache() - print("Model weights unloaded.") - def load_lora_wrapper(selected_lora): add_lora_to_model(selected_lora) default_text = shared.settings['lora_prompts'][next((k for k in shared.settings['lora_prompts'] if re.match(k.lower(), shared.lora_name.lower())), 'default')] @@ -128,9 +122,6 @@ def create_model_and_preset_menus(): with gr.Row(): shared.gradio['preset_menu'] = gr.Dropdown(choices=available_presets, value=default_preset if not shared.args.flexgen else 'Naive', label='Generation parameters preset') ui.create_refresh_button(shared.gradio['preset_menu'], lambda : None, lambda : {'choices': get_available_presets()}, 'refresh-button') - with gr.Row(): - shared.gradio['unload_model'] = gr.Button(value='Unload model to free VRAM', elem_id="unload_model") - shared.gradio['reload_model'] = gr.Button(value='Reload the model into VRAM', elem_id="reload_model") def create_settings_menus(default_preset): generate_params = load_preset_values(default_preset if not shared.args.flexgen else 'Naive', return_dict=True) @@ -185,8 +176,6 @@ def create_settings_menus(default_preset): shared.gradio['upload_softprompt'] = gr.File(type='binary', file_types=['.zip']) shared.gradio['model_menu'].change(load_model_wrapper, [shared.gradio['model_menu']], [shared.gradio['model_menu']], show_progress=True) - shared.gradio['unload_model'].click(fn=unload_model,inputs=[],outputs=[]) - shared.gradio['reload_model'].click(fn=reload_model,inputs=[],outputs=[]) shared.gradio['preset_menu'].change(load_preset_values, [shared.gradio['preset_menu']], [shared.gradio[k] for k in ['preset_menu_mirror', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']]) shared.gradio['preset_menu_mirror'].change(load_preset_values, [shared.gradio['preset_menu_mirror']], [shared.gradio[k] for k in ['preset_menu', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']]) shared.gradio['lora_menu'].change(load_lora_wrapper, [shared.gradio['lora_menu']], [shared.gradio['lora_menu'], shared.gradio['textbox']], show_progress=True) From 3dc61284d581bdf688731cb68f1dc3fa47ae59d1 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 27 Mar 2023 00:04:43 -0300 Subject: [PATCH 17/80] Handle unloading LoRA from dropdown menu icon --- modules/LoRA.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/LoRA.py b/modules/LoRA.py index 283fcf4c..f5dfe4ed 100644 --- a/modules/LoRA.py +++ b/modules/LoRA.py @@ -18,11 +18,11 @@ def add_lora_to_model(lora_name): # If a LoRA had been previously loaded, or if we want # to unload a LoRA, reload the model - if shared.lora_name != "None" or lora_name == "None": + if shared.lora_name not in ['None', ''] or lora_name in ['None', '']: reload_model() shared.lora_name = lora_name - if lora_name != "None": + if lora_name not in ['None', '']: print(f"Adding the LoRA {lora_name} to the model...") params = {} if not shared.args.cpu: From 57345b8f30fcaf5eef2a5be2b4c239d51750a6ba Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 27 Mar 2023 12:16:37 -0300 Subject: [PATCH 18/80] Add prompt loading/saving menus + reorganize interface --- prompts/Alpaca.txt | 6 ++++ prompts/Open Assistant.txt | 1 + prompts/QA.txt | 4 +++ server.py | 67 +++++++++++++++++++++++++++++--------- 4 files changed, 63 insertions(+), 15 deletions(-) create mode 100644 prompts/Alpaca.txt create mode 100644 prompts/Open Assistant.txt create mode 100644 prompts/QA.txt diff --git a/prompts/Alpaca.txt b/prompts/Alpaca.txt new file mode 100644 index 00000000..8434a80c --- /dev/null +++ b/prompts/Alpaca.txt @@ -0,0 +1,6 @@ +Below is an instruction that describes a task. Write a response that appropriately completes the request. +### Instruction: +Write a poem about the transformers Python library. +Mention the word "large language models" in that poem. +### Response: + diff --git a/prompts/Open Assistant.txt b/prompts/Open Assistant.txt new file mode 100644 index 00000000..cf1ae4a2 --- /dev/null +++ b/prompts/Open Assistant.txt @@ -0,0 +1 @@ +<|prompter|>Write a story about future of AI development<|endoftext|><|assistant|> diff --git a/prompts/QA.txt b/prompts/QA.txt new file mode 100644 index 00000000..32b0e235 --- /dev/null +++ b/prompts/QA.txt @@ -0,0 +1,4 @@ +Common sense questions and answers + +Question: +Factual answer: diff --git a/server.py b/server.py index db83b4f3..b789ab16 100644 --- a/server.py +++ b/server.py @@ -4,6 +4,7 @@ import re import sys import time import zipfile +from datetime import datetime from pathlib import Path import gradio as gr @@ -38,6 +39,13 @@ def get_available_models(): def get_available_presets(): return sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('presets').glob('*.txt'))), key=str.lower) +def get_available_prompts(): + prompts = [] + prompts += sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('prompts').glob('[0-9]*.txt'))), key=str.lower, reverse=True) + prompts += sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('prompts').glob('*.txt'))), key=str.lower) + prompts += ['None'] + return prompts + def get_available_characters(): return ['None'] + sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('characters').glob('*.json'))), key=str.lower) @@ -98,7 +106,7 @@ def load_preset_values(preset_menu, return_dict=False): if return_dict: return generate_params else: - return preset_menu, generate_params['do_sample'], generate_params['temperature'], generate_params['top_p'], generate_params['typical_p'], generate_params['repetition_penalty'], generate_params['encoder_repetition_penalty'], generate_params['top_k'], generate_params['min_length'], generate_params['no_repeat_ngram_size'], generate_params['num_beams'], generate_params['penalty_alpha'], generate_params['length_penalty'], generate_params['early_stopping'] + return generate_params['do_sample'], generate_params['temperature'], generate_params['top_p'], generate_params['typical_p'], generate_params['repetition_penalty'], generate_params['encoder_repetition_penalty'], generate_params['top_k'], generate_params['min_length'], generate_params['no_repeat_ngram_size'], generate_params['num_beams'], generate_params['penalty_alpha'], generate_params['length_penalty'], generate_params['early_stopping'] def upload_soft_prompt(file): with zipfile.ZipFile(io.BytesIO(file)) as zf: @@ -123,9 +131,43 @@ def create_model_and_preset_menus(): shared.gradio['preset_menu'] = gr.Dropdown(choices=available_presets, value=default_preset if not shared.args.flexgen else 'Naive', label='Generation parameters preset') ui.create_refresh_button(shared.gradio['preset_menu'], lambda : None, lambda : {'choices': get_available_presets()}, 'refresh-button') +def save_prompt(text): + fname = f"{datetime.now().strftime('%Y-%m-%d-%H:%M:%S')}.txt" + with open(Path(f'prompts/{fname}'), 'w', encoding='utf-8') as f: + f.write(text) + return f"Saved prompt to prompts/{fname}" + +def load_prompt(fname): + if fname in ['None', '']: + return '' + else: + with open(Path(f'prompts/{fname}.txt'), 'r', encoding='utf-8') as f: + return f.read() + +def create_prompt_menus(): + with gr.Row(): + with gr.Column(): + with gr.Row(): + shared.gradio['prompt_menu'] = gr.Dropdown(choices=get_available_prompts(), value='None', label='Prompt') + ui.create_refresh_button(shared.gradio['prompt_menu'], lambda : None, lambda : {'choices': get_available_prompts()}, 'refresh-button') + + with gr.Column(): + with gr.Column(): + shared.gradio['save_prompt'] = gr.Button('Save prompt') + shared.gradio['status'] = gr.Markdown('Ready') + + shared.gradio['prompt_menu'].change(load_prompt, [shared.gradio['prompt_menu']], [shared.gradio['textbox']], show_progress=True) + shared.gradio['save_prompt'].click(save_prompt, [shared.gradio['textbox']], [shared.gradio['status']], show_progress=False) + def create_settings_menus(default_preset): generate_params = load_preset_values(default_preset if not shared.args.flexgen else 'Naive', return_dict=True) + with gr.Row(): + with gr.Column(): + create_model_and_preset_menus() + with gr.Column(): + shared.gradio['seed'] = gr.Number(value=-1, label='Seed (-1 for random)') + with gr.Row(): with gr.Column(): with gr.Box(): @@ -156,12 +198,6 @@ def create_settings_menus(default_preset): shared.gradio['length_penalty'] = gr.Slider(-5, 5, value=generate_params['length_penalty'], label='length_penalty') shared.gradio['early_stopping'] = gr.Checkbox(value=generate_params['early_stopping'], label='early_stopping') - shared.gradio['seed'] = gr.Number(value=-1, label='Seed (-1 for random)') - - with gr.Row(): - shared.gradio['preset_menu_mirror'] = gr.Dropdown(choices=available_presets, value=default_preset if not shared.args.flexgen else 'Naive', label='Generation parameters preset') - ui.create_refresh_button(shared.gradio['preset_menu_mirror'], lambda : None, lambda : {'choices': get_available_presets()}, 'refresh-button') - with gr.Row(): shared.gradio['lora_menu'] = gr.Dropdown(choices=available_loras, value=shared.lora_name, label='LoRA') ui.create_refresh_button(shared.gradio['lora_menu'], lambda : None, lambda : {'choices': get_available_loras()}, 'refresh-button') @@ -176,8 +212,7 @@ def create_settings_menus(default_preset): shared.gradio['upload_softprompt'] = gr.File(type='binary', file_types=['.zip']) shared.gradio['model_menu'].change(load_model_wrapper, [shared.gradio['model_menu']], [shared.gradio['model_menu']], show_progress=True) - shared.gradio['preset_menu'].change(load_preset_values, [shared.gradio['preset_menu']], [shared.gradio[k] for k in ['preset_menu_mirror', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']]) - shared.gradio['preset_menu_mirror'].change(load_preset_values, [shared.gradio['preset_menu_mirror']], [shared.gradio[k] for k in ['preset_menu', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']]) + shared.gradio['preset_menu'].change(load_preset_values, [shared.gradio['preset_menu']], [shared.gradio[k] for k in ['do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']]) shared.gradio['lora_menu'].change(load_lora_wrapper, [shared.gradio['lora_menu']], [shared.gradio['lora_menu'], shared.gradio['textbox']], show_progress=True) shared.gradio['softprompts_menu'].change(load_soft_prompt, [shared.gradio['softprompts_menu']], [shared.gradio['softprompts_menu']], show_progress=True) shared.gradio['upload_softprompt'].upload(upload_soft_prompt, [shared.gradio['upload_softprompt']], [shared.gradio['softprompts_menu']]) @@ -265,8 +300,8 @@ def create_interface(): shared.gradio['display'] = gr.Chatbot(value=shared.history['visible']).style(color_map=("#326efd", "#212528")) shared.gradio['textbox'] = gr.Textbox(label='Input') with gr.Row(): - shared.gradio['Stop'] = gr.Button('Stop', elem_id="stop") shared.gradio['Generate'] = gr.Button('Generate') + shared.gradio['Stop'] = gr.Button('Stop', elem_id="stop") with gr.Row(): shared.gradio['Impersonate'] = gr.Button('Impersonate') shared.gradio['Regenerate'] = gr.Button('Regenerate') @@ -279,8 +314,6 @@ def create_interface(): shared.gradio['Clear history-confirm'] = gr.Button('Confirm', variant="stop", visible=False) shared.gradio['Clear history-cancel'] = gr.Button('Cancel', visible=False) - create_model_and_preset_menus() - with gr.Tab("Character", elem_id="chat-settings"): shared.gradio['name1'] = gr.Textbox(value=shared.settings[f'name1{suffix}'], lines=1, label='Your name') shared.gradio['name2'] = gr.Textbox(value=shared.settings[f'name2{suffix}'], lines=1, label='Bot\'s name') @@ -384,12 +417,15 @@ def create_interface(): shared.gradio['html'] = gr.HTML() with gr.Row(): - shared.gradio['Stop'] = gr.Button('Stop') shared.gradio['Generate'] = gr.Button('Generate') + shared.gradio['Stop'] = gr.Button('Stop') + with gr.Column(scale=1): + gr.Markdown("\n") shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens']) - create_model_and_preset_menus() + create_prompt_menus() + with gr.Tab("Parameters", elem_id="parameters"): create_settings_menus(default_preset) @@ -413,7 +449,7 @@ def create_interface(): with gr.Column(): shared.gradio['Stop'] = gr.Button('Stop') - create_model_and_preset_menus() + create_prompt_menus() with gr.Column(): with gr.Tab('Raw'): @@ -422,6 +458,7 @@ def create_interface(): shared.gradio['markdown'] = gr.Markdown() with gr.Tab('HTML'): shared.gradio['html'] = gr.HTML() + with gr.Tab("Parameters", elem_id="parameters"): create_settings_menus(default_preset) From 8e2d94a5a1a8252131715d7dfe068fc8e49d9aaf Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 27 Mar 2023 12:21:19 -0300 Subject: [PATCH 19/80] Add saved promtps to gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 36852916..a9c47a5a 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,4 @@ repositories settings.json img_bot* img_me* +prompts/[0-9]* From 202e981d00755e45850e3f428e24dc9e8be75b0c Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 27 Mar 2023 12:30:57 -0300 Subject: [PATCH 20/80] Make Generate/Stop buttons smaller in notebook mode --- server.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/server.py b/server.py index b789ab16..27525e43 100644 --- a/server.py +++ b/server.py @@ -417,8 +417,12 @@ def create_interface(): shared.gradio['html'] = gr.HTML() with gr.Row(): - shared.gradio['Generate'] = gr.Button('Generate') - shared.gradio['Stop'] = gr.Button('Stop') + with gr.Column(): + with gr.Row(): + shared.gradio['Generate'] = gr.Button('Generate') + shared.gradio['Stop'] = gr.Button('Stop') + with gr.Column(): + pass with gr.Column(scale=1): gr.Markdown("\n") From d911c22af9019312eb05f3981ffee22c7243f1d8 Mon Sep 17 00:00:00 2001 From: "Alex \"mcmonkey\" Goodwin" Date: Mon, 27 Mar 2023 08:31:49 -0700 Subject: [PATCH 21/80] use shared rows to make the LoRA Trainer interface a bit more compact / clean --- modules/training.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/training.py b/modules/training.py index f9f0790f..aa085fda 100644 --- a/modules/training.py +++ b/modules/training.py @@ -14,11 +14,13 @@ def get_json_dataset(path: str): def create_train_interface(): with gr.Tab('Train LoRA', elem_id='lora-train-tab'): loraName = gr.Textbox(label="Name", info="The name of your new LoRA file") - # TODO: Implement multi-device support. - microBatchSize = gr.Slider(label='Micro Batch Size', value=4, minimum=1, maximum=128, step=1, info='Per-device batch size (NOTE: multiple devices not yet implemented). Increasing this will increase VRAM usage.') - batchSize = gr.Slider(label='Batch Size', value=128, minimum=1, maximum=1024, step=4, info='Global batch size. The two batch sizes together determine gradient accumulation (gradientAccum = batch / microBatch). Higher gradient accum values lead to better quality training.') - epochs = gr.Slider(label='Epochs', value=1, minimum=1, maximum=1000, info='Number of times every entry in the dataset should be fed into training. So 1 means feed each item in once, 5 means feed it in five times, etc.') - learningRate = gr.Textbox(label='Learning Rate', value='3e-4', info='Learning rate, in scientific notation. 3e-4 is a good starting base point. 1e-2 is extremely high, 1e-6 is extremely low.') + with gr.Row(): + # TODO: Implement multi-device support. + microBatchSize = gr.Slider(label='Micro Batch Size', value=4, minimum=1, maximum=128, step=1, info='Per-device batch size (NOTE: multiple devices not yet implemented). Increasing this will increase VRAM usage.') + batchSize = gr.Slider(label='Batch Size', value=128, minimum=1, maximum=1024, step=4, info='Global batch size. The two batch sizes together determine gradient accumulation (gradientAccum = batch / microBatch). Higher gradient accum values lead to better quality training.') + with gr.Row(): + epochs = gr.Number(label='Epochs', value=1, minimum=1, maximum=1000, info='Number of times every entry in the dataset should be fed into training. So 1 means feed each item in once, 5 means feed it in five times, etc.') + learningRate = gr.Textbox(label='Learning Rate', value='3e-4', info='Learning rate, in scientific notation. 3e-4 is a good starting base point. 1e-2 is extremely high, 1e-6 is extremely low.') # TODO: What is the actual maximum rank? Likely distinct per model. This might be better to somehow be on a log scale. loraRank = gr.Slider(label='LoRA Rank', value=8, minimum=1, maximum=1024, step=4, info='LoRA Rank, or dimension count. Higher values produce a larger file with better control over the model\'s content. Smaller values produce a smaller file with less overall control. Small values like 4 or 8 are great for stylistic guidance, high values like 128 or 256 are good for teaching content upgrades. Higher ranks also require higher VRAM.') loraAlpha = gr.Slider(label='LoRA Alpha', value=16, minimum=1, maximum=2048, step=4, info='LoRA Alpha. This divided by the rank becomes the scaling of the LoRA. Higher means stronger. A good standard value is twice your Rank.') @@ -29,10 +31,8 @@ def create_train_interface(): datasetFunction = get_json_dataset('training/datasets') dataset = gr.Dropdown(choices=datasetFunction(), value='None', label='Dataset', info='The dataset file to use for training.') ui.create_refresh_button(dataset, lambda : None, lambda : {'choices': datasetFunction()}, 'refresh-button') - with gr.Row(): evalDataset = gr.Dropdown(choices=datasetFunction(), value='None', label='Evaluation Dataset', info='The dataset file used to evaluate the model after training.') ui.create_refresh_button(evalDataset, lambda : None, lambda : {'choices': datasetFunction()}, 'refresh-button') - with gr.Row(): formatsFunction = get_json_dataset('training/formats') format = gr.Dropdown(choices=formatsFunction(), value='None', label='Data Format', info='The format file used to decide how to format the dataset input.') ui.create_refresh_button(format, lambda : None, lambda : {'choices': formatsFunction()}, 'refresh-button') From 2afe1c13c143dd3e8c2d63c15fb8c1ef59895448 Mon Sep 17 00:00:00 2001 From: "Alex \"mcmonkey\" Goodwin" Date: Mon, 27 Mar 2023 08:32:32 -0700 Subject: [PATCH 22/80] move Training to before Interface mode as Interface Mode seems to be a core 'settings' page that naturally belongs at the very end --- server.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server.py b/server.py index 03158ac6..0e512c7b 100644 --- a/server.py +++ b/server.py @@ -468,6 +468,9 @@ def create_interface(): shared.gradio['Stop'].click(None, None, None, cancels=gen_events) shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.main_js}}}") + with gr.Tab("Training", elem_id="training-tab"): + training.create_train_interface() + with gr.Tab("Interface mode", elem_id="interface-mode"): modes = ["default", "notebook", "chat", "cai_chat"] current_mode = "default" @@ -488,9 +491,6 @@ def create_interface(): shared.gradio['reset_interface'].click(set_interface_arguments, [shared.gradio[k] for k in ['interface_modes_menu', 'extensions_menu', 'cmd_arguments_menu']], None) shared.gradio['reset_interface'].click(lambda : None, None, None, _js='() => {document.body.innerHTML=\'

Reloading...

\'; setTimeout(function(){location.reload()},2500)}') - with gr.Tab("Training", elem_id="training-tab"): - training.create_train_interface() - if shared.args.extensions is not None: extensions_module.create_extensions_block() From 572bafcd24099553cd432e4a695a20050386f8c9 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 27 Mar 2023 12:43:37 -0300 Subject: [PATCH 23/80] Less verbose message --- server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server.py b/server.py index 27525e43..020093ee 100644 --- a/server.py +++ b/server.py @@ -135,7 +135,7 @@ def save_prompt(text): fname = f"{datetime.now().strftime('%Y-%m-%d-%H:%M:%S')}.txt" with open(Path(f'prompts/{fname}'), 'w', encoding='utf-8') as f: f.write(text) - return f"Saved prompt to prompts/{fname}" + return f"Saved to prompts/{fname}" def load_prompt(fname): if fname in ['None', '']: From addb9777f9130f6cef773db68992bfac7cff8058 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 27 Mar 2023 12:59:07 -0300 Subject: [PATCH 24/80] Increase size of GALACTICA equations --- css/main.css | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/css/main.css b/css/main.css index 97879f01..3f044094 100644 --- a/css/main.css +++ b/css/main.css @@ -37,12 +37,6 @@ text-decoration: none !important; } -svg { - display: unset !important; - vertical-align: middle !important; - margin: 5px; -} - ol li p, ul li p { display: inline-block; } @@ -64,3 +58,8 @@ ol li p, ul li p { padding: 15px; padding: 15px; } + +span.math.inline { + font-size: 27px; + vertical-align: baseline !important; +} From af65c129008c7b84a933247867673575de07ae33 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 27 Mar 2023 13:23:59 -0300 Subject: [PATCH 25/80] Change Stop button behavior --- modules/callbacks.py | 2 +- modules/chat.py | 4 ---- modules/text_generation.py | 4 ++++ server.py | 9 +++++---- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/modules/callbacks.py b/modules/callbacks.py index 8d30d615..d85f406d 100644 --- a/modules/callbacks.py +++ b/modules/callbacks.py @@ -54,7 +54,7 @@ class Iteratorize: self.stop_now = False def _callback(val): - if self.stop_now: + if self.stop_now or shared.stop_everything: raise ValueError self.q.put(val) diff --git a/modules/chat.py b/modules/chat.py index 1a43cf3d..cc3c45c7 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -80,11 +80,7 @@ def extract_message_from_reply(reply, name1, name2, check): reply = fix_newlines(reply) return reply, next_character_found -def stop_everything_event(): - shared.stop_everything = True - def chatbot_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, encoder_repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, seed, name1, name2, context, check, chat_prompt_size, chat_generation_attempts=1, regenerate=False): - shared.stop_everything = False just_started = True eos_token = '\n' if check else None name1_original = name1 diff --git a/modules/text_generation.py b/modules/text_generation.py index 9b2c233d..477257c2 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -99,9 +99,13 @@ def set_manual_seed(seed): if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed) +def stop_everything_event(): + shared.stop_everything = True + def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, encoder_repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, seed, eos_token=None, stopping_strings=[]): clear_torch_cache() set_manual_seed(seed) + shared.stop_everything = False t0 = time.time() original_question = question diff --git a/server.py b/server.py index 020093ee..9f90c79b 100644 --- a/server.py +++ b/server.py @@ -16,7 +16,8 @@ import modules.ui as ui from modules.html_generator import generate_chat_html from modules.LoRA import add_lora_to_model from modules.models import load_model, load_soft_prompt -from modules.text_generation import clear_torch_cache, generate_reply +from modules.text_generation import (clear_torch_cache, generate_reply, + stop_everything_event) # Loading custom settings settings_file = None @@ -366,7 +367,7 @@ def create_interface(): gen_events.append(shared.gradio['textbox'].submit(eval(function_call), shared.input_params, shared.gradio['display'], show_progress=shared.args.no_stream)) gen_events.append(shared.gradio['Regenerate'].click(chat.regenerate_wrapper, shared.input_params, shared.gradio['display'], show_progress=shared.args.no_stream)) gen_events.append(shared.gradio['Impersonate'].click(chat.impersonate_wrapper, shared.input_params, shared.gradio['textbox'], show_progress=shared.args.no_stream)) - shared.gradio['Stop'].click(chat.stop_everything_event, [], [], cancels=gen_events, queue=False) + shared.gradio['Stop'].click(stop_everything_event, [], [], queue=False, cancels=gen_events if shared.args.no_stream else None) shared.gradio['Copy last reply'].click(chat.send_last_reply_to_input, [], shared.gradio['textbox'], show_progress=shared.args.no_stream) shared.gradio['Replace last reply'].click(chat.replace_last_reply, [shared.gradio['textbox'], shared.gradio['name1'], shared.gradio['name2']], shared.gradio['display'], show_progress=shared.args.no_stream) @@ -437,7 +438,7 @@ def create_interface(): output_params = [shared.gradio[k] for k in ['textbox', 'markdown', 'html']] gen_events.append(shared.gradio['Generate'].click(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream, api_name='textgen')) gen_events.append(shared.gradio['textbox'].submit(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream)) - shared.gradio['Stop'].click(None, None, None, cancels=gen_events) + shared.gradio['Stop'].click(stop_everything_event, [], [], queue=False, cancels=gen_events if shared.args.no_stream else None) shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.main_js}}}") else: @@ -471,7 +472,7 @@ def create_interface(): gen_events.append(shared.gradio['Generate'].click(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream, api_name='textgen')) gen_events.append(shared.gradio['textbox'].submit(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream)) gen_events.append(shared.gradio['Continue'].click(generate_reply, [shared.gradio['output_textbox']] + shared.input_params[1:], output_params, show_progress=shared.args.no_stream)) - shared.gradio['Stop'].click(None, None, None, cancels=gen_events) + shared.gradio['Stop'].click(stop_everything_event, [], [], queue=False, cancels=gen_events if shared.args.no_stream else None) shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.main_js}}}") with gr.Tab("Interface mode", elem_id="interface-mode"): From c07bcd0850ce0312826f6195450a3b04eb1788f8 Mon Sep 17 00:00:00 2001 From: "Alex \"mcmonkey\" Goodwin" Date: Mon, 27 Mar 2023 09:41:06 -0700 Subject: [PATCH 26/80] add some outputs to indicate progress updates (sorta) Actual progressbar still needed. Also minor formatting fixes. --- modules/training.py | 15 ++++++++++++--- server.py | 2 +- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/modules/training.py b/modules/training.py index aa085fda..b9f3d192 100644 --- a/modules/training.py +++ b/modules/training.py @@ -19,7 +19,7 @@ def create_train_interface(): microBatchSize = gr.Slider(label='Micro Batch Size', value=4, minimum=1, maximum=128, step=1, info='Per-device batch size (NOTE: multiple devices not yet implemented). Increasing this will increase VRAM usage.') batchSize = gr.Slider(label='Batch Size', value=128, minimum=1, maximum=1024, step=4, info='Global batch size. The two batch sizes together determine gradient accumulation (gradientAccum = batch / microBatch). Higher gradient accum values lead to better quality training.') with gr.Row(): - epochs = gr.Number(label='Epochs', value=1, minimum=1, maximum=1000, info='Number of times every entry in the dataset should be fed into training. So 1 means feed each item in once, 5 means feed it in five times, etc.') + epochs = gr.Number(label='Epochs', value=1, info='Number of times every entry in the dataset should be fed into training. So 1 means feed each item in once, 5 means feed it in five times, etc.') learningRate = gr.Textbox(label='Learning Rate', value='3e-4', info='Learning rate, in scientific notation. 3e-4 is a good starting base point. 1e-2 is extremely high, 1e-6 is extremely low.') # TODO: What is the actual maximum rank? Likely distinct per model. This might be better to somehow be on a log scale. loraRank = gr.Slider(label='LoRA Rank', value=8, minimum=1, maximum=1024, step=4, info='LoRA Rank, or dimension count. Higher values produce a larger file with better control over the model\'s content. Smaller values produce a smaller file with less overall control. Small values like 4 or 8 are great for stylistic guidance, high values like 128 or 256 are good for teaching content upgrades. Higher ranks also require higher VRAM.') @@ -50,6 +50,7 @@ def cleanPath(basePath: str, path: str): return f'{Path(basePath).absolute()}/{path}' def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, learningRate: float, loraRank: int, loraAlpha: int, loraDropout: float, cutoffLen: int, dataset: str, evalDataset: str, format: str): + yield "Prepping..." # Input validation / processing # TODO: --lora-dir PR once pulled will need to be applied here loraName = f"loras/{cleanPath(None, loraName)}" @@ -80,6 +81,7 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le def generate_and_tokenize_prompt(data_point): prompt = generate_prompt(data_point) return tokenize(prompt) + print("Loading datasets...") data = load_dataset("json", data_files=cleanPath('training/datasets', f'{dataset}.json')) train_data = data['train'].shuffle().map(generate_and_tokenize_prompt) if evalDataset == 'None': @@ -89,7 +91,9 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le evalData = evalData['train'].shuffle().map(generate_and_tokenize_prompt) # Start prepping the model itself if not hasattr(shared.model, 'lm_head') or hasattr(shared.model.lm_head, 'weight'): + print("Getting model ready...") prepare_model_for_int8_training(shared.model) + print("Prepping for training...") config = LoraConfig( r=loraRank, lora_alpha=loraAlpha, @@ -121,7 +125,7 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le save_total_limit=3, load_best_model_at_end=True if evalData is not None else False, # TODO: Enable multi-device support - ddp_find_unused_parameters=None, + ddp_find_unused_parameters=None ), data_collator=transformers.DataCollatorForLanguageModeling(shared.tokenizer, mlm=False), ) @@ -133,6 +137,11 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le if torch.__version__ >= "2" and sys.platform != "win32": loraModel = torch.compile(loraModel) # Actually start and run and save at the end + # TODO: save/load checkpoints to resume from? + print("Starting training...") + yield "Running..." trainer.train() + print("Training complete, saving...") loraModel.save_pretrained(loraName) - return "Done!" + print("Training complete!") + yield f"Done! Lora saved to `{loraName}`" diff --git a/server.py b/server.py index 0e512c7b..caca85c9 100644 --- a/server.py +++ b/server.py @@ -490,7 +490,7 @@ def create_interface(): shared.gradio['reset_interface'].click(set_interface_arguments, [shared.gradio[k] for k in ['interface_modes_menu', 'extensions_menu', 'cmd_arguments_menu']], None) shared.gradio['reset_interface'].click(lambda : None, None, None, _js='() => {document.body.innerHTML=\'

Reloading...

\'; setTimeout(function(){location.reload()},2500)}') - + if shared.args.extensions is not None: extensions_module.create_extensions_block() From 268abd1cbabf00ca841efc85211428b3a7f54680 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 27 Mar 2023 13:52:12 -0300 Subject: [PATCH 27/80] Add some space in notebook mode --- server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server.py b/server.py index 9f90c79b..36083cc1 100644 --- a/server.py +++ b/server.py @@ -426,7 +426,7 @@ def create_interface(): pass with gr.Column(scale=1): - gr.Markdown("\n") + gr.HTML('
') shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens']) create_prompt_menus() From 8fc723fc95d82755ae9280d9a8fe8b6feb804b1e Mon Sep 17 00:00:00 2001 From: "Alex \"mcmonkey\" Goodwin" Date: Mon, 27 Mar 2023 10:25:08 -0700 Subject: [PATCH 28/80] initial progress tracker in UI --- modules/training.py | 48 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/modules/training.py b/modules/training.py index b9f3d192..c83427d6 100644 --- a/modules/training.py +++ b/modules/training.py @@ -1,4 +1,4 @@ -import sys, torch, json +import sys, torch, json, threading, time from pathlib import Path import gradio as gr from datasets import load_dataset @@ -6,6 +6,9 @@ import transformers from modules import ui, shared from peft import prepare_model_for_int8_training, LoraConfig, get_peft_model, get_peft_model_state_dict +CURRENT_STEPS = 0 +MAX_STEPS = 0 + def get_json_dataset(path: str): def get_set(): return ['None'] + sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path(path).glob('*.json'))), key=str.lower) @@ -40,6 +43,12 @@ def create_train_interface(): output = gr.Markdown(value="(...)") startButton.click(do_train, [loraName, microBatchSize, batchSize, epochs, learningRate, loraRank, loraAlpha, loraDropout, cutoffLen, dataset, evalDataset, format], [output]) +class Callbacks(transformers.TrainerCallback): + def on_step_begin(self, args: transformers.TrainingArguments, state: transformers.TrainerState, control: transformers.TrainerControl, **kwargs): + global CURRENT_STEPS, MAX_STEPS + CURRENT_STEPS = state.global_step + MAX_STEPS = state.max_steps + def cleanPath(basePath: str, path: str): """"Strips unusual symbols and forcibly builds a path as relative to the intended directory.""" # TODO: Probably could do with a security audit to guarantee there's no ways this can be bypassed to target an unwanted path. @@ -50,8 +59,11 @@ def cleanPath(basePath: str, path: str): return f'{Path(basePath).absolute()}/{path}' def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, learningRate: float, loraRank: int, loraAlpha: int, loraDropout: float, cutoffLen: int, dataset: str, evalDataset: str, format: str): + global CURRENT_STEPS, MAX_STEPS + CURRENT_STEPS = 0 + MAX_STEPS = 0 yield "Prepping..." - # Input validation / processing + # == Input validation / processing == # TODO: --lora-dir PR once pulled will need to be applied here loraName = f"loras/{cleanPath(None, loraName)}" if dataset is None: @@ -62,7 +74,7 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le actualLR = float(learningRate) shared.tokenizer.pad_token = 0 shared.tokenizer.padding_side = "left" - # Prep the dataset, format, etc + # == Prep the dataset, format, etc == with open(cleanPath('training/formats', f'{format}.json'), 'r') as formatFile: formatData: dict[str, str] = json.load(formatFile) def tokenize(prompt): @@ -89,7 +101,7 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le else: evalData = load_dataset("json", data_files=cleanPath('training/datasets', f'{evalDataset}.json')) evalData = evalData['train'].shuffle().map(generate_and_tokenize_prompt) - # Start prepping the model itself + # == Start prepping the model itself == if not hasattr(shared.model, 'lm_head') or hasattr(shared.model.lm_head, 'weight'): print("Getting model ready...") prepare_model_for_int8_training(shared.model) @@ -128,6 +140,7 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le ddp_find_unused_parameters=None ), data_collator=transformers.DataCollatorForLanguageModeling(shared.tokenizer, mlm=False), + callbacks=list([Callbacks()]) ) loraModel.config.use_cache = False old_state_dict = loraModel.state_dict @@ -136,12 +149,31 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le ).__get__(loraModel, type(loraModel)) if torch.__version__ >= "2" and sys.platform != "win32": loraModel = torch.compile(loraModel) - # Actually start and run and save at the end + # == Main run and monitor loop == # TODO: save/load checkpoints to resume from? print("Starting training...") - yield "Running..." - trainer.train() + yield "Starting..." + def threadedRun(): + trainer.train() + thread = threading.Thread(target=threadedRun) + thread.start() + lastStep = 0 + startTime = time.perf_counter() + while thread.is_alive(): + time.sleep(0.5) + if CURRENT_STEPS != lastStep: + lastStep = CURRENT_STEPS + timeElapsed = time.perf_counter() - startTime + if timeElapsed <= 0: + timerInfo = "" + else: + its = CURRENT_STEPS / timeElapsed + if its > 1: + timerInfo = f"`{its:.2f}` it/s" + else: + timerInfo = f"`{1.0/its:.2f}` s/it" + yield f"Running... **{CURRENT_STEPS}** / **{MAX_STEPS}** ... {timerInfo}, `{timeElapsed:.1f}` seconds" print("Training complete, saving...") loraModel.save_pretrained(loraName) print("Training complete!") - yield f"Done! Lora saved to `{loraName}`" + yield f"Done! LoRA saved to `{loraName}`" From 16ea4fc36df9ec0cde796eaecf22db64c4d91fd8 Mon Sep 17 00:00:00 2001 From: "Alex \"mcmonkey\" Goodwin" Date: Mon, 27 Mar 2023 10:43:01 -0700 Subject: [PATCH 29/80] interrupt button --- modules/training.py | 42 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/modules/training.py b/modules/training.py index c83427d6..19f33220 100644 --- a/modules/training.py +++ b/modules/training.py @@ -6,8 +6,10 @@ import transformers from modules import ui, shared from peft import prepare_model_for_int8_training, LoraConfig, get_peft_model, get_peft_model_state_dict +WANT_INTERRUPT = False CURRENT_STEPS = 0 MAX_STEPS = 0 +CURRENT_GRADIENT_ACCUM = 1 def get_json_dataset(path: str): def get_set(): @@ -39,15 +41,31 @@ def create_train_interface(): formatsFunction = get_json_dataset('training/formats') format = gr.Dropdown(choices=formatsFunction(), value='None', label='Data Format', info='The format file used to decide how to format the dataset input.') ui.create_refresh_button(format, lambda : None, lambda : {'choices': formatsFunction()}, 'refresh-button') - startButton = gr.Button("Start LoRA Training") + with gr.Row(): + startButton = gr.Button("Start LoRA Training") + stopButton = gr.Button("Interrupt") output = gr.Markdown(value="(...)") - startButton.click(do_train, [loraName, microBatchSize, batchSize, epochs, learningRate, loraRank, loraAlpha, loraDropout, cutoffLen, dataset, evalDataset, format], [output]) + startEvent = startButton.click(do_train, [loraName, microBatchSize, batchSize, epochs, learningRate, loraRank, loraAlpha, loraDropout, cutoffLen, dataset, evalDataset, format], [output]) + stopButton.click(doInterrupt, [], [], cancels=[], queue=False) + +def doInterrupt(): + global WANT_INTERRUPT + WANT_INTERRUPT = True class Callbacks(transformers.TrainerCallback): def on_step_begin(self, args: transformers.TrainingArguments, state: transformers.TrainerState, control: transformers.TrainerControl, **kwargs): global CURRENT_STEPS, MAX_STEPS - CURRENT_STEPS = state.global_step - MAX_STEPS = state.max_steps + CURRENT_STEPS = state.global_step * CURRENT_GRADIENT_ACCUM + MAX_STEPS = state.max_steps * CURRENT_GRADIENT_ACCUM + if WANT_INTERRUPT: + control.should_epoch_stop = True + control.should_training_stop = True + def on_substep_end(self, args: transformers.TrainingArguments, state: transformers.TrainerState, control: transformers.TrainerControl, **kwargs): + global CURRENT_STEPS + CURRENT_STEPS += 1 + if WANT_INTERRUPT: + control.should_epoch_stop = True + control.should_training_stop = True def cleanPath(basePath: str, path: str): """"Strips unusual symbols and forcibly builds a path as relative to the intended directory.""" @@ -59,7 +77,8 @@ def cleanPath(basePath: str, path: str): return f'{Path(basePath).absolute()}/{path}' def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, learningRate: float, loraRank: int, loraAlpha: int, loraDropout: float, cutoffLen: int, dataset: str, evalDataset: str, format: str): - global CURRENT_STEPS, MAX_STEPS + global WANT_INTERRUPT, CURRENT_STEPS, MAX_STEPS, CURRENT_GRADIENT_ACCUM + WANT_INTERRUPT = False CURRENT_STEPS = 0 MAX_STEPS = 0 yield "Prepping..." @@ -71,6 +90,7 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le if format is None: return "**Missing format choice input, cannot continue.**" gradientAccumulationSteps = batchSize // microBatchSize + CURRENT_GRADIENT_ACCUM = gradientAccumulationSteps actualLR = float(learningRate) shared.tokenizer.pad_token = 0 shared.tokenizer.padding_side = "left" @@ -161,7 +181,9 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le startTime = time.perf_counter() while thread.is_alive(): time.sleep(0.5) - if CURRENT_STEPS != lastStep: + if WANT_INTERRUPT: + yield "Interrupting, please wait... *(Run will stop after the current training step completes.)*" + elif CURRENT_STEPS != lastStep: lastStep = CURRENT_STEPS timeElapsed = time.perf_counter() - startTime if timeElapsed <= 0: @@ -175,5 +197,9 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le yield f"Running... **{CURRENT_STEPS}** / **{MAX_STEPS}** ... {timerInfo}, `{timeElapsed:.1f}` seconds" print("Training complete, saving...") loraModel.save_pretrained(loraName) - print("Training complete!") - yield f"Done! LoRA saved to `{loraName}`" + if WANT_INTERRUPT: + print("Training interrupted.") + yield f"Interrupted. Incomplete LoRA saved to `{loraName}`" + else: + print("Training complete!") + yield f"Done! LoRA saved to `{loraName}`" From 641e1a09a746f3c1172d0a60fb68067d27d903c0 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 27 Mar 2023 14:48:43 -0300 Subject: [PATCH 30/80] Don't flash when selecting a new prompt --- server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server.py b/server.py index 36083cc1..8b26a90f 100644 --- a/server.py +++ b/server.py @@ -157,7 +157,7 @@ def create_prompt_menus(): shared.gradio['save_prompt'] = gr.Button('Save prompt') shared.gradio['status'] = gr.Markdown('Ready') - shared.gradio['prompt_menu'].change(load_prompt, [shared.gradio['prompt_menu']], [shared.gradio['textbox']], show_progress=True) + shared.gradio['prompt_menu'].change(load_prompt, [shared.gradio['prompt_menu']], [shared.gradio['textbox']], show_progress=False) shared.gradio['save_prompt'].click(save_prompt, [shared.gradio['textbox']], [shared.gradio['status']], show_progress=False) def create_settings_menus(default_preset): From 9ced75746de1335e383626d996ced7d0d17e489b Mon Sep 17 00:00:00 2001 From: "Alex \"mcmonkey\" Goodwin" Date: Mon, 27 Mar 2023 10:57:27 -0700 Subject: [PATCH 31/80] add total time estimate --- modules/training.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/training.py b/modules/training.py index 19f33220..f8846049 100644 --- a/modules/training.py +++ b/modules/training.py @@ -188,13 +188,15 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le timeElapsed = time.perf_counter() - startTime if timeElapsed <= 0: timerInfo = "" + totalTimeEstimate = 999 else: its = CURRENT_STEPS / timeElapsed if its > 1: timerInfo = f"`{its:.2f}` it/s" else: timerInfo = f"`{1.0/its:.2f}` s/it" - yield f"Running... **{CURRENT_STEPS}** / **{MAX_STEPS}** ... {timerInfo}, `{timeElapsed:.1f}` seconds" + totalTimeEstimate = (1.0/its) * (MAX_STEPS) + yield f"Running... **{CURRENT_STEPS}** / **{MAX_STEPS}** ... {timerInfo}, `{timeElapsed:.0f}`/`{totalTimeEstimate:.0f}` seconds" print("Training complete, saving...") loraModel.save_pretrained(loraName) if WANT_INTERRUPT: From 9ec6c56680e96e59b12f68199827c1f8b9510f38 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 27 Mar 2023 15:12:43 -0300 Subject: [PATCH 32/80] Update stale.yml --- .github/workflows/stale.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index 82cd1701..ce603a4f 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -13,7 +13,7 @@ jobs: - uses: actions/stale@v5 with: stale-issue-message: "" - close-issue-message: "This issue has been closed due to inactivity for 30 days. If you believe it is still relevant, you can reopen it (if you are the author) or leave a comment below." + close-issue-message: "This issue has been closed due to inactivity for 30 days. If you believe it is still relevant, please leave a comment below." days-before-issue-stale: 30 days-before-issue-close: 0 stale-issue-label: "stale" From 9c96919121ca281fb7857c0a503e6141ff94ba84 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 Mar 2023 21:05:19 +0000 Subject: [PATCH 33/80] Bump bitsandbytes from 0.37.1 to 0.37.2 Bumps [bitsandbytes](https://github.com/TimDettmers/bitsandbytes) from 0.37.1 to 0.37.2. - [Release notes](https://github.com/TimDettmers/bitsandbytes/releases) - [Changelog](https://github.com/TimDettmers/bitsandbytes/blob/main/CHANGELOG.md) - [Commits](https://github.com/TimDettmers/bitsandbytes/commits) --- updated-dependencies: - dependency-name: bitsandbytes dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index c84f2948..bfec2c9d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ accelerate==0.17.1 -bitsandbytes==0.37.1 +bitsandbytes==0.37.2 flexgen==0.1.7 gradio==3.23.0 markdown From e9c0226b092025173ac368d3af3992561b3edef3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 Mar 2023 21:05:35 +0000 Subject: [PATCH 34/80] Bump rwkv from 0.7.0 to 0.7.1 Bumps [rwkv](https://github.com/BlinkDL/ChatRWKV) from 0.7.0 to 0.7.1. - [Release notes](https://github.com/BlinkDL/ChatRWKV/releases) - [Commits](https://github.com/BlinkDL/ChatRWKV/commits) --- updated-dependencies: - dependency-name: rwkv dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index c84f2948..257a29b9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ markdown numpy peft==0.2.0 requests -rwkv==0.7.0 +rwkv==0.7.1 safetensors==0.3.0 sentencepiece tqdm From 2f0571bfa4a17300113b3e91f422cc8aa5471b4d Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 27 Mar 2023 21:24:39 -0300 Subject: [PATCH 35/80] Small style changes --- css/main.css | 2 +- modules/training.py | 23 ++++++++++++++++++----- server.py | 2 +- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/css/main.css b/css/main.css index 3f044094..6aa3bc1a 100644 --- a/css/main.css +++ b/css/main.css @@ -41,7 +41,7 @@ ol li p, ul li p { display: inline-block; } -#main, #parameters, #chat-settings, #interface-mode, #lora { +#main, #parameters, #chat-settings, #interface-mode, #lora, #training-tab { border: 0; } diff --git a/modules/training.py b/modules/training.py index f8846049..bc5b3878 100644 --- a/modules/training.py +++ b/modules/training.py @@ -1,10 +1,17 @@ -import sys, torch, json, threading, time +import json +import sys +import threading +import time from pathlib import Path + import gradio as gr -from datasets import load_dataset +import torch import transformers -from modules import ui, shared -from peft import prepare_model_for_int8_training, LoraConfig, get_peft_model, get_peft_model_state_dict +from datasets import load_dataset +from peft import (LoraConfig, get_peft_model, get_peft_model_state_dict, + prepare_model_for_int8_training) + +from modules import shared, ui WANT_INTERRUPT = False CURRENT_STEPS = 0 @@ -44,7 +51,7 @@ def create_train_interface(): with gr.Row(): startButton = gr.Button("Start LoRA Training") stopButton = gr.Button("Interrupt") - output = gr.Markdown(value="(...)") + output = gr.Markdown(value="Ready") startEvent = startButton.click(do_train, [loraName, microBatchSize, batchSize, epochs, learningRate, loraRank, loraAlpha, loraDropout, cutoffLen, dataset, evalDataset, format], [output]) stopButton.click(doInterrupt, [], [], cancels=[], queue=False) @@ -169,16 +176,20 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le ).__get__(loraModel, type(loraModel)) if torch.__version__ >= "2" and sys.platform != "win32": loraModel = torch.compile(loraModel) + # == Main run and monitor loop == # TODO: save/load checkpoints to resume from? print("Starting training...") yield "Starting..." + def threadedRun(): trainer.train() + thread = threading.Thread(target=threadedRun) thread.start() lastStep = 0 startTime = time.perf_counter() + while thread.is_alive(): time.sleep(0.5) if WANT_INTERRUPT: @@ -197,8 +208,10 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le timerInfo = f"`{1.0/its:.2f}` s/it" totalTimeEstimate = (1.0/its) * (MAX_STEPS) yield f"Running... **{CURRENT_STEPS}** / **{MAX_STEPS}** ... {timerInfo}, `{timeElapsed:.0f}`/`{totalTimeEstimate:.0f}` seconds" + print("Training complete, saving...") loraModel.save_pretrained(loraName) + if WANT_INTERRUPT: print("Training interrupted.") yield f"Interrupted. Incomplete LoRA saved to `{loraName}`" diff --git a/server.py b/server.py index cf37dc50..c3c8d2c8 100644 --- a/server.py +++ b/server.py @@ -9,8 +9,8 @@ from pathlib import Path import gradio as gr -from modules import chat, shared, ui, training import modules.extensions as extensions_module +from modules import chat, shared, training, ui from modules.html_generator import generate_chat_html from modules.LoRA import add_lora_to_model from modules.models import load_model, load_soft_prompt From 6368dad7dbf4a85d840930548bce5a28714f65e5 Mon Sep 17 00:00:00 2001 From: "Alex \"mcmonkey\" Goodwin" Date: Mon, 27 Mar 2023 18:17:42 -0700 Subject: [PATCH 36/80] Fix camelCase to snake_case to match repo format standard --- modules/training.py | 132 +++++++++++++++++++++++++------------------- 1 file changed, 74 insertions(+), 58 deletions(-) diff --git a/modules/training.py b/modules/training.py index bc5b3878..f63f2990 100644 --- a/modules/training.py +++ b/modules/training.py @@ -25,35 +25,40 @@ def get_json_dataset(path: str): def create_train_interface(): with gr.Tab('Train LoRA', elem_id='lora-train-tab'): - loraName = gr.Textbox(label="Name", info="The name of your new LoRA file") + lora_name = gr.Textbox(label="Name", info="The name of your new LoRA file") with gr.Row(): # TODO: Implement multi-device support. - microBatchSize = gr.Slider(label='Micro Batch Size', value=4, minimum=1, maximum=128, step=1, info='Per-device batch size (NOTE: multiple devices not yet implemented). Increasing this will increase VRAM usage.') - batchSize = gr.Slider(label='Batch Size', value=128, minimum=1, maximum=1024, step=4, info='Global batch size. The two batch sizes together determine gradient accumulation (gradientAccum = batch / microBatch). Higher gradient accum values lead to better quality training.') + micro_batch_size = gr.Slider(label='Micro Batch Size', value=4, minimum=1, maximum=128, step=1, info='Per-device batch size (NOTE: multiple devices not yet implemented). Increasing this will increase VRAM usage.') + batch_size = gr.Slider(label='Batch Size', value=128, minimum=1, maximum=1024, step=4, info='Global batch size. The two batch sizes together determine gradient accumulation (gradientAccum = batch / microBatch). Higher gradient accum values lead to better quality training.') + with gr.Row(): epochs = gr.Number(label='Epochs', value=1, info='Number of times every entry in the dataset should be fed into training. So 1 means feed each item in once, 5 means feed it in five times, etc.') - learningRate = gr.Textbox(label='Learning Rate', value='3e-4', info='Learning rate, in scientific notation. 3e-4 is a good starting base point. 1e-2 is extremely high, 1e-6 is extremely low.') + learning_rate = gr.Textbox(label='Learning Rate', value='3e-4', info='Learning rate, in scientific notation. 3e-4 is a good starting base point. 1e-2 is extremely high, 1e-6 is extremely low.') + # TODO: What is the actual maximum rank? Likely distinct per model. This might be better to somehow be on a log scale. - loraRank = gr.Slider(label='LoRA Rank', value=8, minimum=1, maximum=1024, step=4, info='LoRA Rank, or dimension count. Higher values produce a larger file with better control over the model\'s content. Smaller values produce a smaller file with less overall control. Small values like 4 or 8 are great for stylistic guidance, high values like 128 or 256 are good for teaching content upgrades. Higher ranks also require higher VRAM.') - loraAlpha = gr.Slider(label='LoRA Alpha', value=16, minimum=1, maximum=2048, step=4, info='LoRA Alpha. This divided by the rank becomes the scaling of the LoRA. Higher means stronger. A good standard value is twice your Rank.') + lora_rank = gr.Slider(label='LoRA Rank', value=8, minimum=1, maximum=1024, step=4, info='LoRA Rank, or dimension count. Higher values produce a larger file with better control over the model\'s content. Smaller values produce a smaller file with less overall control. Small values like 4 or 8 are great for stylistic guidance, high values like 128 or 256 are good for teaching content upgrades. Higher ranks also require higher VRAM.') + lora_alpha = gr.Slider(label='LoRA Alpha', value=16, minimum=1, maximum=2048, step=4, info='LoRA Alpha. This divided by the rank becomes the scaling of the LoRA. Higher means stronger. A good standard value is twice your Rank.') # TODO: Better explain what this does, in terms of real world effect especially. - loraDropout = gr.Slider(label='LoRA Dropout', minimum=0.0, maximum=1.0, step=0.025, value=0.05, info='Percentage probability for dropout of LoRA layers.') - cutoffLen = gr.Slider(label='Cutoff Length', minimum=1,maximum=2048, value=256, step=32, info='Cutoff length for text input. Essentially, how long of a line of text to feed in at a time. Higher values require drastically more VRAM.') + lora_dropout = gr.Slider(label='LoRA Dropout', minimum=0.0, maximum=1.0, step=0.025, value=0.05, info='Percentage probability for dropout of LoRA layers.') + cutoff_len = gr.Slider(label='Cutoff Length', minimum=1,maximum=2048, value=256, step=32, info='Cutoff length for text input. Essentially, how long of a line of text to feed in at a time. Higher values require drastically more VRAM.') + with gr.Row(): - datasetFunction = get_json_dataset('training/datasets') - dataset = gr.Dropdown(choices=datasetFunction(), value='None', label='Dataset', info='The dataset file to use for training.') - ui.create_refresh_button(dataset, lambda : None, lambda : {'choices': datasetFunction()}, 'refresh-button') - evalDataset = gr.Dropdown(choices=datasetFunction(), value='None', label='Evaluation Dataset', info='The dataset file used to evaluate the model after training.') - ui.create_refresh_button(evalDataset, lambda : None, lambda : {'choices': datasetFunction()}, 'refresh-button') - formatsFunction = get_json_dataset('training/formats') - format = gr.Dropdown(choices=formatsFunction(), value='None', label='Data Format', info='The format file used to decide how to format the dataset input.') - ui.create_refresh_button(format, lambda : None, lambda : {'choices': formatsFunction()}, 'refresh-button') + dataset_function = get_json_dataset('training/datasets') + dataset = gr.Dropdown(choices=dataset_function(), value='None', label='Dataset', info='The dataset file to use for training.') + ui.create_refresh_button(dataset, lambda : None, lambda : {'choices': dataset_function()}, 'refresh-button') + eval_dataset = gr.Dropdown(choices=dataset_function(), value='None', label='Evaluation Dataset', info='The dataset file used to evaluate the model after training.') + ui.create_refresh_button(eval_dataset, lambda : None, lambda : {'choices': dataset_function()}, 'refresh-button') + formats_function = get_json_dataset('training/formats') + format = gr.Dropdown(choices=formats_function(), value='None', label='Data Format', info='The format file used to decide how to format the dataset input.') + ui.create_refresh_button(format, lambda : None, lambda : {'choices': formats_function()}, 'refresh-button') + with gr.Row(): - startButton = gr.Button("Start LoRA Training") - stopButton = gr.Button("Interrupt") + start_button = gr.Button("Start LoRA Training") + stop_button = gr.Button("Interrupt") + output = gr.Markdown(value="Ready") - startEvent = startButton.click(do_train, [loraName, microBatchSize, batchSize, epochs, learningRate, loraRank, loraAlpha, loraDropout, cutoffLen, dataset, evalDataset, format], [output]) - stopButton.click(doInterrupt, [], [], cancels=[], queue=False) + startEvent = start_button.click(do_train, [lora_name, micro_batch_size, batch_size, epochs, learning_rate, lora_rank, lora_alpha, lora_dropout, cutoff_len, dataset, eval_dataset, format], [output]) + stop_button.click(doInterrupt, [], [], cancels=[], queue=False) def doInterrupt(): global WANT_INTERRUPT @@ -74,108 +79,119 @@ class Callbacks(transformers.TrainerCallback): control.should_epoch_stop = True control.should_training_stop = True -def cleanPath(basePath: str, path: str): +def cleanPath(base_path: str, path: str): """"Strips unusual symbols and forcibly builds a path as relative to the intended directory.""" # TODO: Probably could do with a security audit to guarantee there's no ways this can be bypassed to target an unwanted path. # Or swap it to a strict whitelist of [a-zA-Z_0-9] path = path.replace('\\', '/').replace('..', '_') - if basePath is None: + if base_path is None: return path - return f'{Path(basePath).absolute()}/{path}' + return f'{Path(base_path).absolute()}/{path}' -def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, learningRate: float, loraRank: int, loraAlpha: int, loraDropout: float, cutoffLen: int, dataset: str, evalDataset: str, format: str): +def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int, learning_rate: float, lora_rank: int, lora_alpha: int, lora_dropout: float, cutoff_len: int, dataset: str, eval_dataset: str, format: str): global WANT_INTERRUPT, CURRENT_STEPS, MAX_STEPS, CURRENT_GRADIENT_ACCUM WANT_INTERRUPT = False CURRENT_STEPS = 0 MAX_STEPS = 0 - yield "Prepping..." + # == Input validation / processing == + yield "Prepping..." # TODO: --lora-dir PR once pulled will need to be applied here - loraName = f"loras/{cleanPath(None, loraName)}" + lora_name = f"loras/{cleanPath(None, lora_name)}" if dataset is None: return "**Missing dataset choice input, cannot continue.**" if format is None: return "**Missing format choice input, cannot continue.**" - gradientAccumulationSteps = batchSize // microBatchSize - CURRENT_GRADIENT_ACCUM = gradientAccumulationSteps - actualLR = float(learningRate) + gradient_accumulation_steps = batch_size // micro_batch_size + CURRENT_GRADIENT_ACCUM = gradient_accumulation_steps + actual_lr = float(learning_rate) shared.tokenizer.pad_token = 0 shared.tokenizer.padding_side = "left" + # == Prep the dataset, format, etc == with open(cleanPath('training/formats', f'{format}.json'), 'r') as formatFile: - formatData: dict[str, str] = json.load(formatFile) + format_data: dict[str, str] = json.load(formatFile) + def tokenize(prompt): - result = shared.tokenizer(prompt, truncation=True, max_length=cutoffLen + 1, padding="max_length") + result = shared.tokenizer(prompt, truncation=True, max_length=cutoff_len + 1, padding="max_length") return { "input_ids": result["input_ids"][:-1], "attention_mask": result["attention_mask"][:-1], } + def generate_prompt(data_point: dict[str, str]): - for options, data in formatData.items(): + for options, data in format_data.items(): if set(options.split(',')) == set(x[0] for x in data_point.items() if len(x[1].strip()) > 0): for key, val in data_point.items(): data = data.replace(f'%{key}%', val) return data - raise RuntimeError(f'Data-point "{data_point}" has no keyset match within format "{list(formatData.keys())}"') + raise RuntimeError(f'Data-point "{data_point}" has no keyset match within format "{list(format_data.keys())}"') + def generate_and_tokenize_prompt(data_point): prompt = generate_prompt(data_point) return tokenize(prompt) + print("Loading datasets...") data = load_dataset("json", data_files=cleanPath('training/datasets', f'{dataset}.json')) train_data = data['train'].shuffle().map(generate_and_tokenize_prompt) - if evalDataset == 'None': - evalData = None + + if eval_dataset == 'None': + eval_data = None else: - evalData = load_dataset("json", data_files=cleanPath('training/datasets', f'{evalDataset}.json')) - evalData = evalData['train'].shuffle().map(generate_and_tokenize_prompt) + eval_data = load_dataset("json", data_files=cleanPath('training/datasets', f'{eval_dataset}.json')) + eval_data = eval_data['train'].shuffle().map(generate_and_tokenize_prompt) + # == Start prepping the model itself == if not hasattr(shared.model, 'lm_head') or hasattr(shared.model.lm_head, 'weight'): print("Getting model ready...") prepare_model_for_int8_training(shared.model) + print("Prepping for training...") config = LoraConfig( - r=loraRank, - lora_alpha=loraAlpha, + r=lora_rank, + lora_alpha=lora_alpha, # TODO: Should target_modules be configurable? target_modules=[ "q_proj", "v_proj" ], - lora_dropout=loraDropout, + lora_dropout=lora_dropout, bias="none", task_type="CAUSAL_LM" ) - loraModel = get_peft_model(shared.model, config) + lora_model = get_peft_model(shared.model, config) trainer = transformers.Trainer( - model=loraModel, + model=lora_model, train_dataset=train_data, - eval_dataset=evalData, + eval_dataset=eval_data, args=transformers.TrainingArguments( - per_device_train_batch_size=microBatchSize, - gradient_accumulation_steps=gradientAccumulationSteps, + per_device_train_batch_size=micro_batch_size, + gradient_accumulation_steps=gradient_accumulation_steps, # TODO: Should more of these be configurable? Probably. warmup_steps=100, num_train_epochs=epochs, - learning_rate=actualLR, + learning_rate=actual_lr, fp16=True, logging_steps=20, - evaluation_strategy="steps" if evalData is not None else "no", + evaluation_strategy="steps" if eval_data is not None else "no", save_strategy="steps", - eval_steps=200 if evalData is not None else None, + eval_steps=200 if eval_data is not None else None, save_steps=200, - output_dir=loraName, + output_dir=lora_name, save_total_limit=3, - load_best_model_at_end=True if evalData is not None else False, + load_best_model_at_end=True if eval_data is not None else False, # TODO: Enable multi-device support ddp_find_unused_parameters=None ), data_collator=transformers.DataCollatorForLanguageModeling(shared.tokenizer, mlm=False), callbacks=list([Callbacks()]) ) - loraModel.config.use_cache = False - old_state_dict = loraModel.state_dict - loraModel.state_dict = ( + + lora_model.config.use_cache = False + old_state_dict = lora_model.state_dict + lora_model.state_dict = ( lambda self, *_, **__: get_peft_model_state_dict(self, old_state_dict()) - ).__get__(loraModel, type(loraModel)) + ).__get__(lora_model, type(lora_model)) + if torch.__version__ >= "2" and sys.platform != "win32": - loraModel = torch.compile(loraModel) + lora_model = torch.compile(lora_model) # == Main run and monitor loop == # TODO: save/load checkpoints to resume from? @@ -210,11 +226,11 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le yield f"Running... **{CURRENT_STEPS}** / **{MAX_STEPS}** ... {timerInfo}, `{timeElapsed:.0f}`/`{totalTimeEstimate:.0f}` seconds" print("Training complete, saving...") - loraModel.save_pretrained(loraName) + lora_model.save_pretrained(lora_name) if WANT_INTERRUPT: print("Training interrupted.") - yield f"Interrupted. Incomplete LoRA saved to `{loraName}`" + yield f"Interrupted. Incomplete LoRA saved to `{lora_name}`" else: print("Training complete!") - yield f"Done! LoRA saved to `{loraName}`" + yield f"Done! LoRA saved to `{lora_name}`" From 7fab7ea1b64a262f63535d06b5e418910bed7edd Mon Sep 17 00:00:00 2001 From: "Alex \"mcmonkey\" Goodwin" Date: Mon, 27 Mar 2023 18:19:06 -0700 Subject: [PATCH 37/80] couple missed camelCases --- modules/training.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/training.py b/modules/training.py index f63f2990..e3976d8f 100644 --- a/modules/training.py +++ b/modules/training.py @@ -58,9 +58,9 @@ def create_train_interface(): output = gr.Markdown(value="Ready") startEvent = start_button.click(do_train, [lora_name, micro_batch_size, batch_size, epochs, learning_rate, lora_rank, lora_alpha, lora_dropout, cutoff_len, dataset, eval_dataset, format], [output]) - stop_button.click(doInterrupt, [], [], cancels=[], queue=False) + stop_button.click(do_interrupt, [], [], cancels=[], queue=False) -def doInterrupt(): +def do_interrupt(): global WANT_INTERRUPT WANT_INTERRUPT = True @@ -79,7 +79,7 @@ class Callbacks(transformers.TrainerCallback): control.should_epoch_stop = True control.should_training_stop = True -def cleanPath(base_path: str, path: str): +def clean_path(base_path: str, path: str): """"Strips unusual symbols and forcibly builds a path as relative to the intended directory.""" # TODO: Probably could do with a security audit to guarantee there's no ways this can be bypassed to target an unwanted path. # Or swap it to a strict whitelist of [a-zA-Z_0-9] @@ -97,7 +97,7 @@ def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int # == Input validation / processing == yield "Prepping..." # TODO: --lora-dir PR once pulled will need to be applied here - lora_name = f"loras/{cleanPath(None, lora_name)}" + lora_name = f"loras/{clean_path(None, lora_name)}" if dataset is None: return "**Missing dataset choice input, cannot continue.**" if format is None: @@ -109,7 +109,7 @@ def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int shared.tokenizer.padding_side = "left" # == Prep the dataset, format, etc == - with open(cleanPath('training/formats', f'{format}.json'), 'r') as formatFile: + with open(clean_path('training/formats', f'{format}.json'), 'r') as formatFile: format_data: dict[str, str] = json.load(formatFile) def tokenize(prompt): @@ -132,13 +132,13 @@ def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int return tokenize(prompt) print("Loading datasets...") - data = load_dataset("json", data_files=cleanPath('training/datasets', f'{dataset}.json')) + data = load_dataset("json", data_files=clean_path('training/datasets', f'{dataset}.json')) train_data = data['train'].shuffle().map(generate_and_tokenize_prompt) if eval_dataset == 'None': eval_data = None else: - eval_data = load_dataset("json", data_files=cleanPath('training/datasets', f'{eval_dataset}.json')) + eval_data = load_dataset("json", data_files=clean_path('training/datasets', f'{eval_dataset}.json')) eval_data = eval_data['train'].shuffle().map(generate_and_tokenize_prompt) # == Start prepping the model itself == From 1e02f75f2bc7917de83b89f2d04de1df157f01e7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 28 Mar 2023 01:19:34 +0000 Subject: [PATCH 38/80] Bump accelerate from 0.17.1 to 0.18.0 Bumps [accelerate](https://github.com/huggingface/accelerate) from 0.17.1 to 0.18.0. - [Release notes](https://github.com/huggingface/accelerate/releases) - [Commits](https://github.com/huggingface/accelerate/compare/v0.17.1...v0.18.0) --- updated-dependencies: - dependency-name: accelerate dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 0c6ed793..77557a74 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -accelerate==0.17.1 +accelerate==0.18.0 bitsandbytes==0.37.2 flexgen==0.1.7 gradio==3.23.0 From 8a97f6ba293228f7e33fd96670db81b0d2001f23 Mon Sep 17 00:00:00 2001 From: "Alex \"mcmonkey\" Goodwin" Date: Mon, 27 Mar 2023 18:39:06 -0700 Subject: [PATCH 39/80] corrections per the PR comments --- modules/training.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/modules/training.py b/modules/training.py index e3976d8f..52ecc55e 100644 --- a/modules/training.py +++ b/modules/training.py @@ -19,9 +19,7 @@ MAX_STEPS = 0 CURRENT_GRADIENT_ACCUM = 1 def get_json_dataset(path: str): - def get_set(): - return ['None'] + sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path(path).glob('*.json'))), key=str.lower) - return get_set + return ['None'] + sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path(path).glob('*.json'))), key=str.lower) def create_train_interface(): with gr.Tab('Train LoRA', elem_id='lora-train-tab'): @@ -32,7 +30,7 @@ def create_train_interface(): batch_size = gr.Slider(label='Batch Size', value=128, minimum=1, maximum=1024, step=4, info='Global batch size. The two batch sizes together determine gradient accumulation (gradientAccum = batch / microBatch). Higher gradient accum values lead to better quality training.') with gr.Row(): - epochs = gr.Number(label='Epochs', value=1, info='Number of times every entry in the dataset should be fed into training. So 1 means feed each item in once, 5 means feed it in five times, etc.') + epochs = gr.Number(label='Epochs', value=3, info='Number of times every entry in the dataset should be fed into training. So 1 means feed each item in once, 5 means feed it in five times, etc.') learning_rate = gr.Textbox(label='Learning Rate', value='3e-4', info='Learning rate, in scientific notation. 3e-4 is a good starting base point. 1e-2 is extremely high, 1e-6 is extremely low.') # TODO: What is the actual maximum rank? Likely distinct per model. This might be better to somehow be on a log scale. @@ -43,21 +41,19 @@ def create_train_interface(): cutoff_len = gr.Slider(label='Cutoff Length', minimum=1,maximum=2048, value=256, step=32, info='Cutoff length for text input. Essentially, how long of a line of text to feed in at a time. Higher values require drastically more VRAM.') with gr.Row(): - dataset_function = get_json_dataset('training/datasets') - dataset = gr.Dropdown(choices=dataset_function(), value='None', label='Dataset', info='The dataset file to use for training.') - ui.create_refresh_button(dataset, lambda : None, lambda : {'choices': dataset_function()}, 'refresh-button') - eval_dataset = gr.Dropdown(choices=dataset_function(), value='None', label='Evaluation Dataset', info='The dataset file used to evaluate the model after training.') - ui.create_refresh_button(eval_dataset, lambda : None, lambda : {'choices': dataset_function()}, 'refresh-button') - formats_function = get_json_dataset('training/formats') - format = gr.Dropdown(choices=formats_function(), value='None', label='Data Format', info='The format file used to decide how to format the dataset input.') - ui.create_refresh_button(format, lambda : None, lambda : {'choices': formats_function()}, 'refresh-button') + dataset = gr.Dropdown(choices=get_json_dataset('training/datasets'), value='None', label='Dataset', info='The dataset file to use for training.') + ui.create_refresh_button(dataset, lambda : None, lambda : {'choices': get_json_dataset('training/datasets')}, 'refresh-button') + eval_dataset = gr.Dropdown(choices=get_json_dataset('training/datasets'), value='None', label='Evaluation Dataset', info='The dataset file used to evaluate the model after training.') + ui.create_refresh_button(eval_dataset, lambda : None, lambda : {'choices': get_json_dataset('training/datasets')}, 'refresh-button') + format = gr.Dropdown(choices=get_json_dataset('training/formats'), value='None', label='Data Format', info='The format file used to decide how to format the dataset input.') + ui.create_refresh_button(format, lambda : None, lambda : {'choices': get_json_dataset('training/formats')}, 'refresh-button') with gr.Row(): start_button = gr.Button("Start LoRA Training") stop_button = gr.Button("Interrupt") output = gr.Markdown(value="Ready") - startEvent = start_button.click(do_train, [lora_name, micro_batch_size, batch_size, epochs, learning_rate, lora_rank, lora_alpha, lora_dropout, cutoff_len, dataset, eval_dataset, format], [output]) + start_button.click(do_train, [lora_name, micro_batch_size, batch_size, epochs, learning_rate, lora_rank, lora_alpha, lora_dropout, cutoff_len, dataset, eval_dataset, format], [output]) stop_button.click(do_interrupt, [], [], cancels=[], queue=False) def do_interrupt(): From 005f552ea311e9bf932b91337da101a490bdd5ff Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 27 Mar 2023 23:29:52 -0300 Subject: [PATCH 40/80] Some simplifications --- modules/shared.py | 6 +++--- server.py | 17 +++++++++++------ 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/modules/shared.py b/modules/shared.py index d9bcf241..71829a01 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -107,14 +107,14 @@ parser.add_argument('--rwkv-cuda-on', action='store_true', help='RWKV: Compile t parser.add_argument('--no-stream', action='store_true', help='Don\'t stream the text output in real time.') parser.add_argument('--settings', type=str, help='Load the default interface settings from this json file. See settings-template.json for an example. If you create a file called settings.json, this file will be loaded by default without the need to use the --settings flag.') parser.add_argument('--extensions', type=str, nargs="+", help='The list of extensions to load. If you want to load more than one extension, write the names separated by spaces.') +parser.add_argument("--model-dir", type=str, default='models/', help="Path to directory with all the models") +parser.add_argument("--lora-dir", type=str, default='loras/', help="Path to directory with all the loras") +parser.add_argument('--verbose', action='store_true', help='Print the prompts to the terminal.') parser.add_argument('--listen', action='store_true', help='Make the web UI reachable from your local network.') parser.add_argument('--listen-port', type=int, help='The listening port that the server will use.') parser.add_argument('--share', action='store_true', help='Create a public URL. This is useful for running the web UI on Google Colab or similar.') parser.add_argument('--auto-launch', action='store_true', default=False, help='Open the web UI in the default browser upon launch.') -parser.add_argument('--verbose', action='store_true', help='Print the prompts to the terminal.') parser.add_argument("--gradio-auth-path", type=str, help='set gradio authentication file path ex. "/path/to/auth/file" with format like "u1:p1,u2:p2,u3:p3"', default=None) -parser.add_argument("--model-dir", type=str, default='models/', help="Path to directory with all the models") -parser.add_argument("--lora-dir", type=str, default='loras/', help="Path to directory with all the loras") args = parser.parse_args() # Provisional, this will be deleted later diff --git a/server.py b/server.py index 15aa84bb..66f60074 100644 --- a/server.py +++ b/server.py @@ -498,16 +498,21 @@ def create_interface(): if shared.args.extensions is not None: extensions_module.create_extensions_block() + # Authentication + auth = None + if shared.args.gradio_auth_path is not None: + gradio_auth_creds = [] + with open(shared.args.gradio_auth_path, 'r', encoding="utf8") as file: + for line in file.readlines(): + gradio_auth_creds += [x.strip() for x in line.split(',') if x.strip()] + auth = [tuple(cred.split(':')) for cred in gradio_auth_creds] + # Launch the interface - gradio_auth_creds = [] - with open(shared.args.gradio_auth_path, 'r', encoding="utf8") as file: - for line in file.readlines(): - gradio_auth_creds += [x.strip() for x in line.split(',') if x.strip()] shared.gradio['interface'].queue() if shared.args.listen: - shared.gradio['interface'].launch(prevent_thread_lock=True, share=shared.args.share, server_name='0.0.0.0', server_port=shared.args.listen_port, inbrowser=shared.args.auto_launch, auth=[tuple(cred.split(':')) for cred in gradio_auth_creds] if gradio_auth_creds else None) + shared.gradio['interface'].launch(prevent_thread_lock=True, share=shared.args.share, server_name='0.0.0.0', server_port=shared.args.listen_port, inbrowser=shared.args.auto_launch, auth=auth) else: - shared.gradio['interface'].launch(prevent_thread_lock=True, share=shared.args.share, server_port=shared.args.listen_port, inbrowser=shared.args.auto_launch, auth=[tuple(cred.split(':')) for cred in gradio_auth_creds] if gradio_auth_creds else None) + shared.gradio['interface'].launch(prevent_thread_lock=True, share=shared.args.share, server_port=shared.args.listen_port, inbrowser=shared.args.auto_launch, auth=auth) create_interface() From 30585b3e716e646ffabb8d590e5fe3b53863656d Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 27 Mar 2023 23:35:01 -0300 Subject: [PATCH 41/80] Update README --- README.md | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 3bfbc72f..cd75284c 100644 --- a/README.md +++ b/README.md @@ -198,12 +198,15 @@ Optionally, you can use the following command-line flags: | `--rwkv-cuda-on` | RWKV: Compile the CUDA kernel for better performance. | | `--no-stream` | Don't stream the text output in real time. | | `--settings SETTINGS_FILE` | Load the default interface settings from this json file. See `settings-template.json` for an example. If you create a file called `settings.json`, this file will be loaded by default without the need to use the `--settings` flag.| -| `--extensions EXTENSIONS [EXTENSIONS ...]` | The list of extensions to load. If you want to load more than one extension, write the names separated by spaces. | -| `--listen` | Make the web UI reachable from your local network.| -| `--listen-port LISTEN_PORT` | The listening port that the server will use. | -| `--share` | Create a public URL. This is useful for running the web UI on Google Colab or similar. | -| `--auto-launch` | Open the web UI in the default browser upon launch. | -| `--verbose` | Print the prompts to the terminal. | +| `--extensions EXTENSIONS [EXTENSIONS ...]` | The list of extensions to load. If you want to load more than one extension, write the names separated by spaces. | +| `--model-dir MODEL_DIR` | Path to directory with all the models | +| `--lora-dir LORA_DIR` | Path to directory with all the loras | +| `--verbose` | Print the prompts to the terminal. | +| `--listen` | Make the web UI reachable from your local network. | +| `--listen-port LISTEN_PORT` | The listening port that the server will use. | +| `--share` | Create a public URL. This is useful for running the web UI on Google Colab or similar. | +| `--auto-launch` | Open the web UI in the default browser upon launch. | +| `--gradio-auth-path GRADIO_AUTH_PATH` | set gradio authentication file path ex. "/path/to/auth/file" with format like "u1:p1,u2:p2,u3:p3" | Out of memory errors? [Check the low VRAM guide](https://github.com/oobabooga/text-generation-webui/wiki/Low-VRAM-guide). From 036163a75134ba88d83754548b992331d2b450f5 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 27 Mar 2023 23:39:26 -0300 Subject: [PATCH 42/80] Change description --- README.md | 2 +- modules/shared.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index cd75284c..f6b1d4f5 100644 --- a/README.md +++ b/README.md @@ -206,7 +206,7 @@ Optionally, you can use the following command-line flags: | `--listen-port LISTEN_PORT` | The listening port that the server will use. | | `--share` | Create a public URL. This is useful for running the web UI on Google Colab or similar. | | `--auto-launch` | Open the web UI in the default browser upon launch. | -| `--gradio-auth-path GRADIO_AUTH_PATH` | set gradio authentication file path ex. "/path/to/auth/file" with format like "u1:p1,u2:p2,u3:p3" | +| `--gradio-auth-path GRADIO_AUTH_PATH` | Set the gradio authentication file path. The file should contain one or more user:password pairs in this format: "u1:p1,u2:p2,u3:p3" | Out of memory errors? [Check the low VRAM guide](https://github.com/oobabooga/text-generation-webui/wiki/Low-VRAM-guide). diff --git a/modules/shared.py b/modules/shared.py index 71829a01..ac9d750c 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -114,7 +114,7 @@ parser.add_argument('--listen', action='store_true', help='Make the web UI reach parser.add_argument('--listen-port', type=int, help='The listening port that the server will use.') parser.add_argument('--share', action='store_true', help='Create a public URL. This is useful for running the web UI on Google Colab or similar.') parser.add_argument('--auto-launch', action='store_true', default=False, help='Open the web UI in the default browser upon launch.') -parser.add_argument("--gradio-auth-path", type=str, help='set gradio authentication file path ex. "/path/to/auth/file" with format like "u1:p1,u2:p2,u3:p3"', default=None) +parser.add_argument("--gradio-auth-path", type=str, help='Set the gradio authentication file path. The file should contain one or more user:password pairs in this format: "u1:p1,u2:p2,u3:p3"', default=None) args = parser.parse_args() # Provisional, this will be deleted later From ee95e55df67468902fc411bbfc51bb961d1953d2 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 27 Mar 2023 23:42:29 -0300 Subject: [PATCH 43/80] Fix RWKV tokenizer --- modules/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/models.py b/modules/models.py index 5aaef800..26a10f7a 100644 --- a/modules/models.py +++ b/modules/models.py @@ -90,7 +90,7 @@ def load_model(model_name): from modules.RWKV import RWKVModel, RWKVTokenizer model = RWKVModel.from_pretrained(Path(f'{shared.args.model_dir}/{model_name}'), dtype="fp32" if shared.args.cpu else "bf16" if shared.args.bf16 else "fp16", device="cpu" if shared.args.cpu else "cuda") - tokenizer = RWKVTokenizer.from_pretrained(Path(shared.model_name)) + tokenizer = RWKVTokenizer.from_pretrained(Path(shared.args.model_dir)) return model, tokenizer From 53da672315d3914b1af728274f0223e7bac60b7a Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 27 Mar 2023 23:44:21 -0300 Subject: [PATCH 44/80] Fix FlexGen --- modules/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/models.py b/modules/models.py index 26a10f7a..a6839318 100644 --- a/modules/models.py +++ b/modules/models.py @@ -76,7 +76,7 @@ def load_model(model_name): num_bits=4, group_size=64, group_dim=2, symmetric=False)) - model = OptLM(f"facebook/{shared.model_name}", env, shared.model_name, policy) + model = OptLM(f"facebook/{shared.model_name}", env, shared.args.model_dir, policy) # DeepSpeed ZeRO-3 elif shared.args.deepspeed: From ec6224f5561ce200ef8c98f967c3f6edafd2ffb0 Mon Sep 17 00:00:00 2001 From: "Alex \"mcmonkey\" Goodwin" Date: Mon, 27 Mar 2023 20:04:16 -0700 Subject: [PATCH 45/80] use new shared.args.lora_dir --- modules/training.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/training.py b/modules/training.py index 52ecc55e..0d54a251 100644 --- a/modules/training.py +++ b/modules/training.py @@ -92,8 +92,7 @@ def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int # == Input validation / processing == yield "Prepping..." - # TODO: --lora-dir PR once pulled will need to be applied here - lora_name = f"loras/{clean_path(None, lora_name)}" + lora_name = f"{shared.args.lora_dir}/{clean_path(None, lora_name)}" if dataset is None: return "**Missing dataset choice input, cannot continue.**" if format is None: From b749952fe3de309ca1b5ec98fe114608be4c8dce Mon Sep 17 00:00:00 2001 From: "Alex \"mcmonkey\" Goodwin" Date: Mon, 27 Mar 2023 21:22:43 -0700 Subject: [PATCH 46/80] change number minimums to 0 gradio calculates 'step' relative to the minimum, so at '1' the step values were all offset awkwardly. 0 isn't valid, but, uh, just don't slam the slider to the left. --- modules/training.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/training.py b/modules/training.py index 0d54a251..656a8b3a 100644 --- a/modules/training.py +++ b/modules/training.py @@ -27,18 +27,18 @@ def create_train_interface(): with gr.Row(): # TODO: Implement multi-device support. micro_batch_size = gr.Slider(label='Micro Batch Size', value=4, minimum=1, maximum=128, step=1, info='Per-device batch size (NOTE: multiple devices not yet implemented). Increasing this will increase VRAM usage.') - batch_size = gr.Slider(label='Batch Size', value=128, minimum=1, maximum=1024, step=4, info='Global batch size. The two batch sizes together determine gradient accumulation (gradientAccum = batch / microBatch). Higher gradient accum values lead to better quality training.') + batch_size = gr.Slider(label='Batch Size', value=128, minimum=0, maximum=1024, step=4, info='Global batch size. The two batch sizes together determine gradient accumulation (gradientAccum = batch / microBatch). Higher gradient accum values lead to better quality training.') with gr.Row(): epochs = gr.Number(label='Epochs', value=3, info='Number of times every entry in the dataset should be fed into training. So 1 means feed each item in once, 5 means feed it in five times, etc.') learning_rate = gr.Textbox(label='Learning Rate', value='3e-4', info='Learning rate, in scientific notation. 3e-4 is a good starting base point. 1e-2 is extremely high, 1e-6 is extremely low.') # TODO: What is the actual maximum rank? Likely distinct per model. This might be better to somehow be on a log scale. - lora_rank = gr.Slider(label='LoRA Rank', value=8, minimum=1, maximum=1024, step=4, info='LoRA Rank, or dimension count. Higher values produce a larger file with better control over the model\'s content. Smaller values produce a smaller file with less overall control. Small values like 4 or 8 are great for stylistic guidance, high values like 128 or 256 are good for teaching content upgrades. Higher ranks also require higher VRAM.') - lora_alpha = gr.Slider(label='LoRA Alpha', value=16, minimum=1, maximum=2048, step=4, info='LoRA Alpha. This divided by the rank becomes the scaling of the LoRA. Higher means stronger. A good standard value is twice your Rank.') + lora_rank = gr.Slider(label='LoRA Rank', value=8, minimum=0, maximum=1024, step=4, info='LoRA Rank, or dimension count. Higher values produce a larger file with better control over the model\'s content. Smaller values produce a smaller file with less overall control. Small values like 4 or 8 are great for stylistic guidance, high values like 128 or 256 are good for teaching content upgrades. Higher ranks also require higher VRAM.') + lora_alpha = gr.Slider(label='LoRA Alpha', value=16, minimum=0, maximum=2048, step=4, info='LoRA Alpha. This divided by the rank becomes the scaling of the LoRA. Higher means stronger. A good standard value is twice your Rank.') # TODO: Better explain what this does, in terms of real world effect especially. lora_dropout = gr.Slider(label='LoRA Dropout', minimum=0.0, maximum=1.0, step=0.025, value=0.05, info='Percentage probability for dropout of LoRA layers.') - cutoff_len = gr.Slider(label='Cutoff Length', minimum=1,maximum=2048, value=256, step=32, info='Cutoff length for text input. Essentially, how long of a line of text to feed in at a time. Higher values require drastically more VRAM.') + cutoff_len = gr.Slider(label='Cutoff Length', minimum=0, maximum=2048, value=256, step=32, info='Cutoff length for text input. Essentially, how long of a line of text to feed in at a time. Higher values require drastically more VRAM.') with gr.Row(): dataset = gr.Dropdown(choices=get_json_dataset('training/datasets'), value='None', label='Dataset', info='The dataset file to use for training.') From 2e08af4edf07b5b79f3e105c0be892e518da28bd Mon Sep 17 00:00:00 2001 From: "Alex \"mcmonkey\" Goodwin" Date: Mon, 27 Mar 2023 22:15:32 -0700 Subject: [PATCH 47/80] implement initial Raw Text File Input also bump default Rank & Alpha for values that will make sense in testing if you don't know what you're doing and leave the defaults. --- modules/training.py | 116 +++++++++++++++++++++++++++++--------------- 1 file changed, 76 insertions(+), 40 deletions(-) diff --git a/modules/training.py b/modules/training.py index 656a8b3a..1949fa4e 100644 --- a/modules/training.py +++ b/modules/training.py @@ -7,7 +7,7 @@ from pathlib import Path import gradio as gr import torch import transformers -from datasets import load_dataset +from datasets import Dataset, load_dataset from peft import (LoraConfig, get_peft_model, get_peft_model_state_dict, prepare_model_for_int8_training) @@ -18,8 +18,8 @@ CURRENT_STEPS = 0 MAX_STEPS = 0 CURRENT_GRADIENT_ACCUM = 1 -def get_json_dataset(path: str): - return ['None'] + sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path(path).glob('*.json'))), key=str.lower) +def get_dataset(path: str, ext: str): + return ['None'] + sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path(path).glob(f'*.{ext}'))), key=str.lower) def create_train_interface(): with gr.Tab('Train LoRA', elem_id='lora-train-tab'): @@ -40,20 +40,26 @@ def create_train_interface(): lora_dropout = gr.Slider(label='LoRA Dropout', minimum=0.0, maximum=1.0, step=0.025, value=0.05, info='Percentage probability for dropout of LoRA layers.') cutoff_len = gr.Slider(label='Cutoff Length', minimum=0, maximum=2048, value=256, step=32, info='Cutoff length for text input. Essentially, how long of a line of text to feed in at a time. Higher values require drastically more VRAM.') - with gr.Row(): - dataset = gr.Dropdown(choices=get_json_dataset('training/datasets'), value='None', label='Dataset', info='The dataset file to use for training.') - ui.create_refresh_button(dataset, lambda : None, lambda : {'choices': get_json_dataset('training/datasets')}, 'refresh-button') - eval_dataset = gr.Dropdown(choices=get_json_dataset('training/datasets'), value='None', label='Evaluation Dataset', info='The dataset file used to evaluate the model after training.') - ui.create_refresh_button(eval_dataset, lambda : None, lambda : {'choices': get_json_dataset('training/datasets')}, 'refresh-button') - format = gr.Dropdown(choices=get_json_dataset('training/formats'), value='None', label='Data Format', info='The format file used to decide how to format the dataset input.') - ui.create_refresh_button(format, lambda : None, lambda : {'choices': get_json_dataset('training/formats')}, 'refresh-button') + with gr.Tab(label="Formatted Dataset"): + with gr.Row(): + dataset = gr.Dropdown(choices=get_dataset('training/datasets', 'json'), value='None', label='Dataset', info='The dataset file to use for training.') + ui.create_refresh_button(dataset, lambda : None, lambda : {'choices': get_dataset('training/datasets', 'json')}, 'refresh-button') + eval_dataset = gr.Dropdown(choices=get_dataset('training/datasets', 'json'), value='None', label='Evaluation Dataset', info='The dataset file used to evaluate the model after training.') + ui.create_refresh_button(eval_dataset, lambda : None, lambda : {'choices': get_dataset('training/datasets', 'json')}, 'refresh-button') + format = gr.Dropdown(choices=get_dataset('training/formats', 'json'), value='None', label='Data Format', info='The format file used to decide how to format the dataset input.') + ui.create_refresh_button(format, lambda : None, lambda : {'choices': get_dataset('training/formats', 'json')}, 'refresh-button') + with gr.Tab(label="Raw Text File"): + with gr.Row(): + raw_text_file = gr.Dropdown(choices=get_dataset('training/datasets', 'txt'), value='None', label='Text File', info='The raw text file to use for training.') + ui.create_refresh_button(raw_text_file, lambda : None, lambda : {'choices': get_dataset('training/datasets', 'txt')}, 'refresh-button') + overlap_len = gr.Slider(label='Overlap Length', minimum=0,maximum=512, value=32, step=8, info='Overlap length - ie how many tokens from the prior chunk of text to include into the next chunk. (The chunks themselves will be of a size determined by Cutoff Length above)') with gr.Row(): start_button = gr.Button("Start LoRA Training") stop_button = gr.Button("Interrupt") output = gr.Markdown(value="Ready") - start_button.click(do_train, [lora_name, micro_batch_size, batch_size, epochs, learning_rate, lora_rank, lora_alpha, lora_dropout, cutoff_len, dataset, eval_dataset, format], [output]) + start_button.click(do_train, [lora_name, micro_batch_size, batch_size, epochs, learning_rate, lora_rank, lora_alpha, lora_dropout, cutoff_len, dataset, eval_dataset, format, raw_text_file, overlap_len], [output]) stop_button.click(do_interrupt, [], [], cancels=[], queue=False) def do_interrupt(): @@ -84,7 +90,8 @@ def clean_path(base_path: str, path: str): return path return f'{Path(base_path).absolute()}/{path}' -def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int, learning_rate: float, lora_rank: int, lora_alpha: int, lora_dropout: float, cutoff_len: int, dataset: str, eval_dataset: str, format: str): +def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int, learning_rate: str, lora_rank: int, + lora_alpha: int, lora_dropout: float, cutoff_len: int, dataset: str, eval_dataset: str, format: str, raw_text_file: str, overlap_len: int): global WANT_INTERRUPT, CURRENT_STEPS, MAX_STEPS, CURRENT_GRADIENT_ACCUM WANT_INTERRUPT = False CURRENT_STEPS = 0 @@ -93,20 +100,17 @@ def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int # == Input validation / processing == yield "Prepping..." lora_name = f"{shared.args.lora_dir}/{clean_path(None, lora_name)}" - if dataset is None: - return "**Missing dataset choice input, cannot continue.**" - if format is None: - return "**Missing format choice input, cannot continue.**" + actual_lr = float(learning_rate) + + if cutoff_len <= 0 or micro_batch_size <= 0 or batch_size <= 0 or actual_lr <= 0 or lora_rank <= 0 or lora_alpha <= 0: + yield f"Cannot input zeroes." + return + gradient_accumulation_steps = batch_size // micro_batch_size CURRENT_GRADIENT_ACCUM = gradient_accumulation_steps - actual_lr = float(learning_rate) shared.tokenizer.pad_token = 0 shared.tokenizer.padding_side = "left" - # == Prep the dataset, format, etc == - with open(clean_path('training/formats', f'{format}.json'), 'r') as formatFile: - format_data: dict[str, str] = json.load(formatFile) - def tokenize(prompt): result = shared.tokenizer(prompt, truncation=True, max_length=cutoff_len + 1, padding="max_length") return { @@ -114,27 +118,55 @@ def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int "attention_mask": result["attention_mask"][:-1], } - def generate_prompt(data_point: dict[str, str]): - for options, data in format_data.items(): - if set(options.split(',')) == set(x[0] for x in data_point.items() if len(x[1].strip()) > 0): - for key, val in data_point.items(): - data = data.replace(f'%{key}%', val) - return data - raise RuntimeError(f'Data-point "{data_point}" has no keyset match within format "{list(format_data.keys())}"') - - def generate_and_tokenize_prompt(data_point): - prompt = generate_prompt(data_point) - return tokenize(prompt) - - print("Loading datasets...") - data = load_dataset("json", data_files=clean_path('training/datasets', f'{dataset}.json')) - train_data = data['train'].shuffle().map(generate_and_tokenize_prompt) - - if eval_dataset == 'None': + # == Prep the dataset, format, etc == + if raw_text_file is not None: + print("Loading raw text file dataset...") + with open(clean_path('training/datasets', f'{raw_text_file}.txt'), 'r') as file: + raw_text = file.read() + tokens = shared.tokenizer.encode(raw_text) + del raw_text # Note: could be a gig for a large dataset, so delete redundant data as we go to be safe on RAM + tokens = list(split_chunks(tokens, cutoff_len - overlap_len)) + for i in range(1, len(tokens)): + tokens[i] = tokens[i - 1][-overlap_len:] + tokens[i] + text_chunks = [shared.tokenizer.decode(x) for x in tokens] + del tokens + data = Dataset.from_list([tokenize(x) for x in text_chunks]) + train_data = data.shuffle() eval_data = None + del text_chunks + else: - eval_data = load_dataset("json", data_files=clean_path('training/datasets', f'{eval_dataset}.json')) - eval_data = eval_data['train'].shuffle().map(generate_and_tokenize_prompt) + with open(clean_path('training/formats', f'{format}.json'), 'r') as formatFile: + format_data: dict[str, str] = json.load(formatFile) + + if dataset is None: + yield "**Missing dataset choice input, cannot continue.**" + return + if format is None: + yield "**Missing format choice input, cannot continue.**" + return + + def generate_prompt(data_point: dict[str, str]): + for options, data in format_data.items(): + if set(options.split(',')) == set(x[0] for x in data_point.items() if len(x[1].strip()) > 0): + for key, val in data_point.items(): + data = data.replace(f'%{key}%', val) + return data + raise RuntimeError(f'Data-point "{data_point}" has no keyset match within format "{list(format_data.keys())}"') + + def generate_and_tokenize_prompt(data_point): + prompt = generate_prompt(data_point) + return tokenize(prompt) + + print("Loading JSON datasets...") + data = load_dataset("json", data_files=clean_path('training/datasets', f'{dataset}.json')) + train_data = data['train'].shuffle().map(generate_and_tokenize_prompt) + + if eval_dataset == 'None': + eval_data = None + else: + eval_data = load_dataset("json", data_files=clean_path('training/datasets', f'{eval_dataset}.json')) + eval_data = eval_data['train'].shuffle().map(generate_and_tokenize_prompt) # == Start prepping the model itself == if not hasattr(shared.model, 'lm_head') or hasattr(shared.model.lm_head, 'weight'): @@ -229,3 +261,7 @@ def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int else: print("Training complete!") yield f"Done! LoRA saved to `{lora_name}`" + +def split_chunks(arr, step): + for i in range(0, len(arr), step): + yield arr[i:i + step] From 9cc811a0e6abbc32ef5699255db4127740ea1e8d Mon Sep 17 00:00:00 2001 From: "Alex \"mcmonkey\" Goodwin" Date: Mon, 27 Mar 2023 22:16:40 -0700 Subject: [PATCH 48/80] fix LoRA path typo in #549 --- server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server.py b/server.py index 311a624f..4d9ee5e8 100644 --- a/server.py +++ b/server.py @@ -55,7 +55,7 @@ def get_available_softprompts(): return ['None'] + sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('softprompts').glob('*.zip'))), key=str.lower) def get_available_loras(): - return ['None'] + sorted([item.name for item in list(Path('shared.args.lora_dir').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower) + return ['None'] + sorted([item.name for item in list(Path(shared.args.lora_dir).glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower) def unload_model(): shared.model = shared.tokenizer = None From e817fac5424f7f19a5f20071dc08ce4e483d0636 Mon Sep 17 00:00:00 2001 From: "Alex \"mcmonkey\" Goodwin" Date: Mon, 27 Mar 2023 22:29:23 -0700 Subject: [PATCH 49/80] better defaults --- modules/training.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/training.py b/modules/training.py index 1949fa4e..7bcecb38 100644 --- a/modules/training.py +++ b/modules/training.py @@ -34,8 +34,8 @@ def create_train_interface(): learning_rate = gr.Textbox(label='Learning Rate', value='3e-4', info='Learning rate, in scientific notation. 3e-4 is a good starting base point. 1e-2 is extremely high, 1e-6 is extremely low.') # TODO: What is the actual maximum rank? Likely distinct per model. This might be better to somehow be on a log scale. - lora_rank = gr.Slider(label='LoRA Rank', value=8, minimum=0, maximum=1024, step=4, info='LoRA Rank, or dimension count. Higher values produce a larger file with better control over the model\'s content. Smaller values produce a smaller file with less overall control. Small values like 4 or 8 are great for stylistic guidance, high values like 128 or 256 are good for teaching content upgrades. Higher ranks also require higher VRAM.') - lora_alpha = gr.Slider(label='LoRA Alpha', value=16, minimum=0, maximum=2048, step=4, info='LoRA Alpha. This divided by the rank becomes the scaling of the LoRA. Higher means stronger. A good standard value is twice your Rank.') + lora_rank = gr.Slider(label='LoRA Rank', value=32, minimum=0, maximum=1024, step=4, info='LoRA Rank, or dimension count. Higher values produce a larger file with better control over the model\'s content. Smaller values produce a smaller file with less overall control. Small values like 4 or 8 are great for stylistic guidance, high values like 128 or 256 are good for teaching content upgrades. Higher ranks also require higher VRAM.') + lora_alpha = gr.Slider(label='LoRA Alpha', value=64, minimum=0, maximum=2048, step=4, info='LoRA Alpha. This divided by the rank becomes the scaling of the LoRA. Higher means stronger. A good standard value is twice your Rank.') # TODO: Better explain what this does, in terms of real world effect especially. lora_dropout = gr.Slider(label='LoRA Dropout', minimum=0.0, maximum=1.0, step=0.025, value=0.05, info='Percentage probability for dropout of LoRA layers.') cutoff_len = gr.Slider(label='Cutoff Length', minimum=0, maximum=2048, value=256, step=32, info='Cutoff length for text input. Essentially, how long of a line of text to feed in at a time. Higher values require drastically more VRAM.') @@ -52,7 +52,7 @@ def create_train_interface(): with gr.Row(): raw_text_file = gr.Dropdown(choices=get_dataset('training/datasets', 'txt'), value='None', label='Text File', info='The raw text file to use for training.') ui.create_refresh_button(raw_text_file, lambda : None, lambda : {'choices': get_dataset('training/datasets', 'txt')}, 'refresh-button') - overlap_len = gr.Slider(label='Overlap Length', minimum=0,maximum=512, value=32, step=8, info='Overlap length - ie how many tokens from the prior chunk of text to include into the next chunk. (The chunks themselves will be of a size determined by Cutoff Length above)') + overlap_len = gr.Slider(label='Overlap Length', minimum=0,maximum=512, value=128, step=16, info='Overlap length - ie how many tokens from the prior chunk of text to include into the next chunk. (The chunks themselves will be of a size determined by Cutoff Length above). Setting overlap to exactly half the cutoff length may be ideal.') with gr.Row(): start_button = gr.Button("Start LoRA Training") From b0f05046b307ce484c8fe8300a10e1909d94904d Mon Sep 17 00:00:00 2001 From: "Alex \"mcmonkey\" Goodwin" Date: Mon, 27 Mar 2023 22:50:37 -0700 Subject: [PATCH 50/80] remove duplicate import --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 8400250f..79da715d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,6 +10,5 @@ rwkv==0.7.1 safetensors==0.3.0 sentencepiece tqdm -peft datasets git+https://github.com/huggingface/transformers From 8579fe51dd09651ce7168d2191f9d741540881a5 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 28 Mar 2023 12:59:34 -0300 Subject: [PATCH 51/80] Fix new lines in the HTML tab --- modules/html_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/html_generator.py b/modules/html_generator.py index ff18c913..48d2e02e 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -34,7 +34,7 @@ def convert_to_markdown(string): string = string.replace('\\begin{blockquote}', '> ') string = string.replace('\\end{blockquote}', '') string = re.sub(r"(.)```", r"\1\n```", string) -# string = fix_newlines(string) + string = fix_newlines(string) return markdown.markdown(string, extensions=['fenced_code']) def generate_basic_html(string): From 91aa5b460ed1f330e35b02fd7f5368912ea6526c Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 28 Mar 2023 13:08:38 -0300 Subject: [PATCH 52/80] If both .pt and .safetensors are present, download only safetensors --- download-model.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/download-model.py b/download-model.py index 25386e5f..dce7e749 100644 --- a/download-model.py +++ b/download-model.py @@ -100,6 +100,7 @@ def get_download_links_from_huggingface(model, branch): links = [] classifications = [] has_pytorch = False + has_pt = False has_safetensors = False is_lora = False while True: @@ -115,7 +116,7 @@ def get_download_links_from_huggingface(model, branch): is_lora = True is_pytorch = re.match("(pytorch|adapter)_model.*\.bin", fname) - is_safetensors = re.match("model.*\.safetensors", fname) + is_safetensors = re.match(".*\.safetensors", fname) is_pt = re.match(".*\.pt", fname) is_tokenizer = re.match("tokenizer.*\.model", fname) is_text = re.match(".*\.(txt|json|py|md)", fname) or is_tokenizer @@ -134,6 +135,7 @@ def get_download_links_from_huggingface(model, branch): has_pytorch = True classifications.append('pytorch') elif is_pt: + has_pt = True classifications.append('pt') cursor = base64.b64encode(f'{{"file_name":"{dict[-1]["path"]}"}}'.encode()) + b':50' @@ -141,9 +143,9 @@ def get_download_links_from_huggingface(model, branch): cursor = cursor.replace(b'=', b'%3D') # If both pytorch and safetensors are available, download safetensors only - if has_pytorch and has_safetensors: + if (has_pytorch or has_pt) and has_safetensors: for i in range(len(classifications)-1, -1, -1): - if classifications[i] == 'pytorch': + if classifications[i] in ['pytorch', 'pt']: links.pop(i) return links, is_lora From 88ad86249d59b2984a99c7366e89728e8a6cc19e Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 28 Mar 2023 13:19:52 -0300 Subject: [PATCH 53/80] Remove unnecessary file --- training/formats/put-trainer-formats-here.txt | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 training/formats/put-trainer-formats-here.txt diff --git a/training/formats/put-trainer-formats-here.txt b/training/formats/put-trainer-formats-here.txt deleted file mode 100644 index e69de29b..00000000 From cac577d99f3ebf864ec8d4701211ec94cf32c4fa Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 28 Mar 2023 13:25:58 -0300 Subject: [PATCH 54/80] Fix interface reloading --- server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server.py b/server.py index 4d9ee5e8..6023451b 100644 --- a/server.py +++ b/server.py @@ -494,7 +494,7 @@ def create_interface(): shared.gradio['reset_interface'] = gr.Button("Apply and restart the interface", type="primary") shared.gradio['reset_interface'].click(set_interface_arguments, [shared.gradio[k] for k in ['interface_modes_menu', 'extensions_menu', 'cmd_arguments_menu']], None) - shared.gradio['reset_interface'].click(lambda : None, None, None, _js='() => {document.body.innerHTML=\'

Reloading...

\'; setTimeout(function(){location.reload()},2500)}') + shared.gradio['reset_interface'].click(lambda : None, None, None, _js='() => {document.body.innerHTML=\'

Reloading...

\'; setTimeout(function(){location.reload()},2500); return []}') if shared.args.extensions is not None: extensions_module.create_extensions_block() From c8207d474f9c5365ab5a1c269eb71bff05a31988 Mon Sep 17 00:00:00 2001 From: Maya Eary Date: Tue, 28 Mar 2023 20:38:55 +0300 Subject: [PATCH 55/80] Generalized load_quantized --- modules/GPTQ_loader.py | 54 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 10 deletions(-) diff --git a/modules/GPTQ_loader.py b/modules/GPTQ_loader.py index afb5695f..351d658d 100644 --- a/modules/GPTQ_loader.py +++ b/modules/GPTQ_loader.py @@ -4,13 +4,48 @@ from pathlib import Path import accelerate import torch +import transformers +from transformers import AutoConfig, AutoModelForCausalLM import modules.shared as shared sys.path.insert(0, str(Path("repositories/GPTQ-for-LLaMa"))) -import llama import llama_inference_offload -import opt +from quant import make_quant +from modelutils import find_layers + +def _load_quant(model, checkpoint, wbits, groupsize=-1, faster_kernel=False, exclude_layers=['lm_head']): + config = AutoConfig.from_pretrained(model) + def noop(*args, **kwargs): + pass + torch.nn.init.kaiming_uniform_ = noop + torch.nn.init.uniform_ = noop + torch.nn.init.normal_ = noop + + torch.set_default_dtype(torch.half) + transformers.modeling_utils._init_weights = False + torch.set_default_dtype(torch.half) + model = AutoModelForCausalLM.from_config(config) + torch.set_default_dtype(torch.float) + model = model.eval() + layers = find_layers(model) + for name in exclude_layers: + if name in layers: + del layers[name] + make_quant(model, layers, wbits, groupsize, faster=faster_kernel) + + del layers + + print('Loading model ...') + if checkpoint.endswith('.safetensors'): + from safetensors.torch import load_file as safe_load + model.load_state_dict(safe_load(checkpoint)) + else: + model.load_state_dict(torch.load(checkpoint)) + model.seqlen = 2048 + print('Done.') + + return model def load_quantized(model_name): @@ -20,6 +55,8 @@ def load_quantized(model_name): model_type = 'llama' elif model_name.lower().startswith(('opt', 'galactica')): model_type = 'opt' + elif model_name.lower().startswith(('gpt-j', 'pygmalion-6b')): + model_type = 'gptj' else: print("Can't determine model type from model name. Please specify it manually using --model_type " "argument") @@ -27,15 +64,12 @@ def load_quantized(model_name): else: model_type = shared.args.model_type.lower() - if model_type == 'llama': - if not shared.args.pre_layer: - load_quant = llama.load_quant - else: - load_quant = llama_inference_offload.load_quant - elif model_type == 'opt': - load_quant = opt.load_quant + if model_type == 'llama' and shared.args.pre_layer: + oad_quant = llama_inference_offload.load_quant + elif model_type in ('llama', 'opt', 'gptj'): + load_quant = _load_quant else: - print("Unknown pre-quantized model type specified. Only 'llama' and 'opt' are supported") + print("Unknown pre-quantized model type specified. Only 'llama', 'opt' and 'gptj' are supported") exit() # Now we are going to try to locate the quantized model file. From 1c075d8d219b5fd2bfeba1b4bad8f912b22a26da Mon Sep 17 00:00:00 2001 From: Maya Eary Date: Tue, 28 Mar 2023 20:43:50 +0300 Subject: [PATCH 56/80] Fix typo --- modules/GPTQ_loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/GPTQ_loader.py b/modules/GPTQ_loader.py index 351d658d..1fdd23c0 100644 --- a/modules/GPTQ_loader.py +++ b/modules/GPTQ_loader.py @@ -65,7 +65,7 @@ def load_quantized(model_name): model_type = shared.args.model_type.lower() if model_type == 'llama' and shared.args.pre_layer: - oad_quant = llama_inference_offload.load_quant + load_quant = llama_inference_offload.load_quant elif model_type in ('llama', 'opt', 'gptj'): load_quant = _load_quant else: From d1377c37af2bb29c97d06ec996b5a0a66010d005 Mon Sep 17 00:00:00 2001 From: Maya Eary Date: Tue, 28 Mar 2023 20:57:16 +0300 Subject: [PATCH 57/80] Fixes for api server - chat mode and integer temperature --- extensions/api/script.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/extensions/api/script.py b/extensions/api/script.py index bd7c1900..dd48f58f 100644 --- a/extensions/api/script.py +++ b/extensions/api/script.py @@ -43,14 +43,14 @@ class Handler(BaseHTTPRequestHandler): generator = generate_reply( question = prompt, - max_new_tokens = body.get('max_length', 200), + max_new_tokens = int(body.get('max_length', 200)), do_sample=True, - temperature=body.get('temperature', 0.5), - top_p=body.get('top_p', 1), - typical_p=body.get('typical', 1), - repetition_penalty=body.get('rep_pen', 1.1), + temperature=float(body.get('temperature', 0.5)), + top_p=float(body.get('top_p', 1)), + typical_p=float(body.get('typical', 1)), + repetition_penalty=float(body.get('rep_pen', 1.1)), encoder_repetition_penalty=1, - top_k=body.get('top_k', 0), + top_k=int(body.get('top_k', 0)), min_length=0, no_repeat_ngram_size=0, num_beams=1, @@ -62,7 +62,10 @@ class Handler(BaseHTTPRequestHandler): answer = '' for a in generator: - answer = a[0] + if isinstance(a, str): + answer = a + else: + answer = a[0] response = json.dumps({ 'results': [{ From 41ec682834de3e7b79cd8e27aeec98690bc209ac Mon Sep 17 00:00:00 2001 From: Maya Eary Date: Tue, 28 Mar 2023 22:45:38 +0300 Subject: [PATCH 58/80] Disable kernel threshold for gpt-j --- modules/GPTQ_loader.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/GPTQ_loader.py b/modules/GPTQ_loader.py index 1fdd23c0..2a9039a3 100644 --- a/modules/GPTQ_loader.py +++ b/modules/GPTQ_loader.py @@ -14,7 +14,7 @@ import llama_inference_offload from quant import make_quant from modelutils import find_layers -def _load_quant(model, checkpoint, wbits, groupsize=-1, faster_kernel=False, exclude_layers=['lm_head']): +def _load_quant(model, checkpoint, wbits, groupsize=-1, faster_kernel=False, exclude_layers=['lm_head'], kernel_switch_threshold=128): config = AutoConfig.from_pretrained(model) def noop(*args, **kwargs): pass @@ -32,7 +32,7 @@ def _load_quant(model, checkpoint, wbits, groupsize=-1, faster_kernel=False, exc for name in exclude_layers: if name in layers: del layers[name] - make_quant(model, layers, wbits, groupsize, faster=faster_kernel) + make_quant(model, layers, wbits, groupsize, faster=faster_kernel, kernel_switch_threshold=kernel_switch_threshold) del layers @@ -109,7 +109,8 @@ def load_quantized(model_name): if shared.args.pre_layer: model = load_quant(str(path_to_model), str(pt_path), shared.args.wbits, shared.args.groupsize, shared.args.pre_layer) else: - model = load_quant(str(path_to_model), str(pt_path), shared.args.wbits, shared.args.groupsize) + threshold = False if model_type == 'gptj' else 128 + model = load_quant(str(path_to_model), str(pt_path), shared.args.wbits, shared.args.groupsize, kernel_switch_threshold=threshold) # accelerate offload (doesn't work properly) if shared.args.gpu_memory: From 0bec15ebcd1571155a54e87b371dc40534864f2e Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 28 Mar 2023 17:34:15 -0300 Subject: [PATCH 59/80] Reorder imports --- modules/GPTQ_loader.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/modules/GPTQ_loader.py b/modules/GPTQ_loader.py index 2a9039a3..c99a63f3 100644 --- a/modules/GPTQ_loader.py +++ b/modules/GPTQ_loader.py @@ -5,14 +5,15 @@ from pathlib import Path import accelerate import torch import transformers -from transformers import AutoConfig, AutoModelForCausalLM +from transformers import AutoConfig, AutoModelForCausalLM import modules.shared as shared sys.path.insert(0, str(Path("repositories/GPTQ-for-LLaMa"))) import llama_inference_offload -from quant import make_quant from modelutils import find_layers +from quant import make_quant + def _load_quant(model, checkpoint, wbits, groupsize=-1, faster_kernel=False, exclude_layers=['lm_head'], kernel_switch_threshold=128): config = AutoConfig.from_pretrained(model) From 010b259dde859b5703a6ea4cf2ea6c0aa4f25343 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 28 Mar 2023 17:46:00 -0300 Subject: [PATCH 60/80] Update documentation --- README.md | 2 +- modules/GPTQ_loader.py | 1 - modules/shared.py | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index f6b1d4f5..ba386852 100644 --- a/README.md +++ b/README.md @@ -177,7 +177,7 @@ Optionally, you can use the following command-line flags: | `--cpu` | Use the CPU to generate text.| | `--load-in-8bit` | Load the model with 8-bit precision.| | `--wbits WBITS` | GPTQ: Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported. | -| `--model_type MODEL_TYPE` | GPTQ: Model type of pre-quantized model. Currently only LLaMA and OPT are supported. | +| `--model_type MODEL_TYPE` | GPTQ: Model type of pre-quantized model. Currently LLaMA, OPT, and GPT-J are supported. | | `--groupsize GROUPSIZE` | GPTQ: Group size. | | `--pre_layer PRE_LAYER` | GPTQ: The number of layers to preload. | | `--bf16` | Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU. | diff --git a/modules/GPTQ_loader.py b/modules/GPTQ_loader.py index c99a63f3..7926d0ab 100644 --- a/modules/GPTQ_loader.py +++ b/modules/GPTQ_loader.py @@ -48,7 +48,6 @@ def _load_quant(model, checkpoint, wbits, groupsize=-1, faster_kernel=False, exc return model - def load_quantized(model_name): if not shared.args.model_type: # Try to determine model type from model name diff --git a/modules/shared.py b/modules/shared.py index ac9d750c..5d1b42d4 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -84,7 +84,7 @@ parser.add_argument('--gptq-bits', type=int, default=0, help='DEPRECATED: use -- parser.add_argument('--gptq-model-type', type=str, help='DEPRECATED: use --model_type instead.') parser.add_argument('--gptq-pre-layer', type=int, default=0, help='DEPRECATED: use --pre_layer instead.') parser.add_argument('--wbits', type=int, default=0, help='GPTQ: Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported.') -parser.add_argument('--model_type', type=str, help='GPTQ: Model type of pre-quantized model. Currently only LLaMA and OPT are supported.') +parser.add_argument('--model_type', type=str, help='GPTQ: Model type of pre-quantized model. Currently LLaMA, OPT, and GPT-J are supported.') parser.add_argument('--groupsize', type=int, default=-1, help='GPTQ: Group size.') parser.add_argument('--pre_layer', type=int, default=0, help='GPTQ: The number of layers to preload.') From 4d8e10100686a680026f76e9854be90ef279a797 Mon Sep 17 00:00:00 2001 From: Nikita Skakun Date: Tue, 28 Mar 2023 14:24:23 -0700 Subject: [PATCH 61/80] Refactor download process to use multiprocessing The previous implementation used threads to download files in parallel, which could lead to performance issues due to the Global Interpreter Lock (GIL). This commit refactors the download process to use multiprocessing instead, which allows for true parallelism across multiple CPUs. This results in significantly faster downloads, particularly for large models. --- download-model.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/download-model.py b/download-model.py index dce7e749..48ae449e 100644 --- a/download-model.py +++ b/download-model.py @@ -17,13 +17,6 @@ from pathlib import Path import requests import tqdm -parser = argparse.ArgumentParser() -parser.add_argument('MODEL', type=str, default=None, nargs='?') -parser.add_argument('--branch', type=str, default='main', help='Name of the Git branch to download from.') -parser.add_argument('--threads', type=int, default=1, help='Number of files to download simultaneously.') -parser.add_argument('--text-only', action='store_true', help='Only download text files (txt/json).') -args = parser.parse_args() - def get_file(args): url = args[0] output_folder = args[1] @@ -150,7 +143,22 @@ def get_download_links_from_huggingface(model, branch): return links, is_lora +def download_files(file_list, output_folder, num_processes=8): + with multiprocessing.Pool(processes=num_processes) as pool: + args = [(url, output_folder, idx+1, len(file_list)) for idx, url in enumerate(file_list)] + for _ in tqdm.tqdm(pool.imap_unordered(get_file, args), total=len(args)): + pass + pool.close() + pool.join() + if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('MODEL', type=str, default=None, nargs='?') + parser.add_argument('--branch', type=str, default='main', help='Name of the Git branch to download from.') + parser.add_argument('--threads', type=int, default=1, help='Number of files to download simultaneously.') + parser.add_argument('--text-only', action='store_true', help='Only download text files (txt/json).') + args = parser.parse_args() + model = args.MODEL branch = args.branch if model is None: @@ -179,7 +187,4 @@ if __name__ == '__main__': # Downloading the files print(f"Downloading the model to {output_folder}") - pool = multiprocessing.Pool(processes=args.threads) - results = pool.map(get_file, [[links[i], output_folder, i+1, len(links)] for i in range(len(links))]) - pool.close() - pool.join() + download_files(links, output_folder, num_processes=args.threads) From 304f812c637f5494e6c42d296040f0506d9194a1 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 28 Mar 2023 19:20:50 -0300 Subject: [PATCH 62/80] Gracefully handle CUDA out of memory errors with streaming --- modules/callbacks.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/modules/callbacks.py b/modules/callbacks.py index d85f406d..aa92f9cb 100644 --- a/modules/callbacks.py +++ b/modules/callbacks.py @@ -1,4 +1,5 @@ import gc +import traceback from queue import Queue from threading import Thread @@ -63,6 +64,10 @@ class Iteratorize: ret = self.mfunc(callback=_callback, **self.kwargs) except ValueError: pass + except: + traceback.print_exc() + pass + clear_torch_cache() self.q.put(self.sentinel) if self.c_callback: From ff515ec2fe693cee7ea1d86d5e3f5bf0397aca2f Mon Sep 17 00:00:00 2001 From: Nikita Skakun Date: Tue, 28 Mar 2023 18:29:20 -0700 Subject: [PATCH 63/80] Improve progress bar visual style This commit reverts the performance improvements of the previous commit for for improved visual style of multithreaded progress bars. The style of the progress bar has been modified to take up the same amount of size to align them. --- download-model.py | 31 ++++++++++--------------------- 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/download-model.py b/download-model.py index 48ae449e..2954f4b1 100644 --- a/download-model.py +++ b/download-model.py @@ -16,23 +16,17 @@ from pathlib import Path import requests import tqdm +from tqdm.contrib.concurrent import thread_map -def get_file(args): - url = args[0] - output_folder = args[1] - idx = args[2] - tot = args[3] - - print(f"Downloading file {idx} of {tot}...") +def get_file(url, output_folder): r = requests.get(url, stream=True) - with open(output_folder / Path(url.split('/')[-1]), 'wb') as f: + with open(output_folder / Path(url.rsplit('/', 1)[1]), 'wb') as f: total_size = int(r.headers.get('content-length', 0)) block_size = 1024 - t = tqdm.tqdm(total=total_size, unit='iB', unit_scale=True) - for data in r.iter_content(block_size): - t.update(len(data)) - f.write(data) - t.close() + with tqdm.tqdm(total=total_size, unit='iB', unit_scale=True, bar_format='{l_bar}{bar}| {n_fmt:6}/{total_fmt:6} {rate_fmt:6}') as t: + for data in r.iter_content(block_size): + t.update(len(data)) + f.write(data) def sanitize_branch_name(branch_name): pattern = re.compile(r"^[a-zA-Z0-9._-]+$") @@ -143,13 +137,8 @@ def get_download_links_from_huggingface(model, branch): return links, is_lora -def download_files(file_list, output_folder, num_processes=8): - with multiprocessing.Pool(processes=num_processes) as pool: - args = [(url, output_folder, idx+1, len(file_list)) for idx, url in enumerate(file_list)] - for _ in tqdm.tqdm(pool.imap_unordered(get_file, args), total=len(args)): - pass - pool.close() - pool.join() +def download_files(file_list, output_folder, num_threads=8): + thread_map(lambda url: get_file(url, output_folder), file_list, max_workers=num_threads, verbose=False) if __name__ == '__main__': parser = argparse.ArgumentParser() @@ -187,4 +176,4 @@ if __name__ == '__main__': # Downloading the files print(f"Downloading the model to {output_folder}") - download_files(links, output_folder, num_processes=args.threads) + download_files(links, output_folder, args.threads) From aaa218a10216483b48cec068d73a1f891efb55ec Mon Sep 17 00:00:00 2001 From: Nikita Skakun Date: Tue, 28 Mar 2023 18:32:49 -0700 Subject: [PATCH 64/80] Remove unused import. --- download-model.py | 1 - 1 file changed, 1 deletion(-) diff --git a/download-model.py b/download-model.py index 2954f4b1..a2d3a6d6 100644 --- a/download-model.py +++ b/download-model.py @@ -9,7 +9,6 @@ python download-model.py facebook/opt-1.3b import argparse import base64 import json -import multiprocessing import re import sys from pathlib import Path From 1edfb9677840b03ce321a450aed87961af24a361 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 28 Mar 2023 23:27:02 -0300 Subject: [PATCH 65/80] Fix loading extensions from within the interface --- modules/extensions.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/modules/extensions.py b/modules/extensions.py index c3cf4de4..fe6a3945 100644 --- a/modules/extensions.py +++ b/modules/extensions.py @@ -7,7 +7,7 @@ import modules.shared as shared state = {} available_extensions = [] -setup_called = False +setup_called = set() def load_extensions(): global state @@ -53,13 +53,12 @@ def create_extensions_block(): should_display_ui = False # Running setup function - if not setup_called: - for extension, name in iterator(): - if hasattr(extension, "setup"): - extension.setup() - if hasattr(extension, "ui"): - should_display_ui = True - setup_called = True + for extension, name in iterator(): + if hasattr(extension, "ui"): + should_display_ui = True + if extension not in setup_called and hasattr(extension, "setup"): + setup_called.add(extension) + extension.setup() # Creating the extension ui elements if should_display_ui: From c2a863f87deee8b9a314e3c58d93b6b2703cf0d9 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 29 Mar 2023 02:11:51 -0300 Subject: [PATCH 66/80] Mention the updated one-click installer --- README.md | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index ba386852..241d0e03 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,28 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. ## Installation -The recommended installation methods are the following: +### One-click installers + +[oobabooga-windows.zip](https://github.com/oobabooga/text-generation-webui/releases/download/installers/oobabooga-windows.zip) + +Just download the zip above, extract it, and double click on "install". The web UI and all its dependencies will be installed in the same folder. + +* To download a model, double click on "download-model" +* To start the web UI, double click on "start-webui" + +Source codes: https://github.com/oobabooga/one-click-installers + +> **Note** +> +> Thanks to [@jllllll](https://github.com/jllllll) and [@ClayShoaf](https://github.com/ClayShoaf), the Windows 1-click installer now sets up 8-bit and 4-bit requirements out of the box. No additional installation steps are necessary. + +> **Note** +> +> There is no need to run the installer as admin. + +### Manual installation using Conda + +These are the recommended installation methods: * Linux and MacOS: using conda natively. * Windows: using conda on WSL ([WSL installation guide](https://github.com/oobabooga/text-generation-webui/wiki/Windows-Subsystem-for-Linux-(Ubuntu)-Installation-Guide)). @@ -84,24 +105,8 @@ pip install -r requirements.txt > > For bitsandbytes and `--load-in-8bit` to work on Linux/WSL, this dirty fix is currently necessary: https://github.com/oobabooga/text-generation-webui/issues/400#issuecomment-1474876859 -### Alternative: one-click installers -[oobabooga-windows.zip](https://github.com/oobabooga/one-click-installers/archive/refs/heads/oobabooga-windows.zip) - -[oobabooga-linux.zip](https://github.com/oobabooga/one-click-installers/archive/refs/heads/oobabooga-linux.zip) - -Just download the zip above, extract it, and double click on "install". The web UI and all its dependencies will be installed in the same folder. - -* To download a model, double click on "download-model" -* To start the web UI, double click on "start-webui" - -Source codes: https://github.com/oobabooga/one-click-installers - -> **Note** -> -> To get 8-bit and 4-bit models working in your 1-click Windows installation, you can use the [one-click-bandaid](https://github.com/ClayShoaf/oobabooga-one-click-bandaid). - -### Alternative: native Windows installation +### Alternative: manual Windows installation As an alternative to the recommended WSL method, you can install the web UI natively on Windows using this guide. It will be a lot harder and the performance may be slower: [Installation instructions for human beings](https://github.com/oobabooga/text-generation-webui/wiki/Installation-instructions-for-human-beings). From 5d0b83c341804bcdffe73d8876468012a2edc78b Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 29 Mar 2023 02:22:19 -0300 Subject: [PATCH 67/80] Update README.md --- README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 241d0e03..965c9d15 100644 --- a/README.md +++ b/README.md @@ -57,10 +57,9 @@ Source codes: https://github.com/oobabooga/one-click-installers ### Manual installation using Conda -These are the recommended installation methods: +Recommended if you have some experience with the command-line. -* Linux and MacOS: using conda natively. -* Windows: using conda on WSL ([WSL installation guide](https://github.com/oobabooga/text-generation-webui/wiki/Windows-Subsystem-for-Linux-(Ubuntu)-Installation-Guide)). +On Windows, I additionally recommend carrying out the installation on WSL instead of the base system: [WSL installation guide](https://github.com/oobabooga/text-generation-webui/wiki/Windows-Subsystem-for-Linux-(Ubuntu)-Installation-Guide). Conda can be downloaded here: https://docs.conda.io/en/latest/miniconda.html From 3b4447a4fe2ef7c99322a626b750ea1aa43083e8 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 29 Mar 2023 02:24:11 -0300 Subject: [PATCH 68/80] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 965c9d15..87367877 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,8 @@ Recommended if you have some experience with the command-line. On Windows, I additionally recommend carrying out the installation on WSL instead of the base system: [WSL installation guide](https://github.com/oobabooga/text-generation-webui/wiki/Windows-Subsystem-for-Linux-(Ubuntu)-Installation-Guide). +#### 0. Install Conda + Conda can be downloaded here: https://docs.conda.io/en/latest/miniconda.html On Linux or WSL, it can be automatically installed with these two commands: From 41b58bc47e84458b880386e57d0d17e2bfe6f76c Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 29 Mar 2023 11:02:29 -0300 Subject: [PATCH 69/80] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 87367877..97f26ccb 100644 --- a/README.md +++ b/README.md @@ -109,7 +109,7 @@ pip install -r requirements.txt ### Alternative: manual Windows installation -As an alternative to the recommended WSL method, you can install the web UI natively on Windows using this guide. It will be a lot harder and the performance may be slower: [Installation instructions for human beings](https://github.com/oobabooga/text-generation-webui/wiki/Installation-instructions-for-human-beings). +As an alternative to the recommended WSL method, you can install the web UI natively on Windows using this guide. It will be a lot harder and the performance may be slower: [Windows installation guide](https://github.com/oobabooga/text-generation-webui/wiki/Windows-installation-guide). ### Alternative: Docker From a6d03730639463eb261b40ec5dad380f5df791ed Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 29 Mar 2023 11:48:17 -0300 Subject: [PATCH 70/80] Fix training dataset loading #636 --- modules/training.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/modules/training.py b/modules/training.py index 7bcecb38..913866d9 100644 --- a/modules/training.py +++ b/modules/training.py @@ -119,7 +119,7 @@ def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int } # == Prep the dataset, format, etc == - if raw_text_file is not None: + if raw_text_file not in ['None', '']: print("Loading raw text file dataset...") with open(clean_path('training/datasets', f'{raw_text_file}.txt'), 'r') as file: raw_text = file.read() @@ -136,16 +136,17 @@ def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int del text_chunks else: - with open(clean_path('training/formats', f'{format}.json'), 'r') as formatFile: - format_data: dict[str, str] = json.load(formatFile) - - if dataset is None: + if dataset in ['None', '']: yield "**Missing dataset choice input, cannot continue.**" return - if format is None: + + if format in ['None', '']: yield "**Missing format choice input, cannot continue.**" return + with open(clean_path('training/formats', f'{format}.json'), 'r') as formatFile: + format_data: dict[str, str] = json.load(formatFile) + def generate_prompt(data_point: dict[str, str]): for options, data in format_data.items(): if set(options.split(',')) == set(x[0] for x in data_point.items() if len(x[1].strip()) > 0): From 58349f44a0924671e65de7cb42764fb846653afe Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 29 Mar 2023 11:55:34 -0300 Subject: [PATCH 71/80] Handle training exception for unsupported models --- modules/training.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/modules/training.py b/modules/training.py index 913866d9..62ba181c 100644 --- a/modules/training.py +++ b/modules/training.py @@ -2,6 +2,7 @@ import json import sys import threading import time +import traceback from pathlib import Path import gradio as gr @@ -184,7 +185,13 @@ def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int bias="none", task_type="CAUSAL_LM" ) - lora_model = get_peft_model(shared.model, config) + + try: + lora_model = get_peft_model(shared.model, config) + except: + yield traceback.format_exc() + return + trainer = transformers.Trainer( model=lora_model, train_dataset=train_data, From 1445ea86f7c2a0c8e3f88337ab15d4e076accc70 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 29 Mar 2023 20:26:44 -0300 Subject: [PATCH 72/80] Add --output and better metadata for downloading models --- download-model.py | 21 +++++++++++++++++---- loras/place-your-loras-here.txt | 0 2 files changed, 17 insertions(+), 4 deletions(-) delete mode 100644 loras/place-your-loras-here.txt diff --git a/download-model.py b/download-model.py index dce7e749..05d9dca4 100644 --- a/download-model.py +++ b/download-model.py @@ -8,6 +8,7 @@ python download-model.py facebook/opt-1.3b import argparse import base64 +import datetime import json import multiprocessing import re @@ -22,6 +23,7 @@ parser.add_argument('MODEL', type=str, default=None, nargs='?') parser.add_argument('--branch', type=str, default='main', help='Name of the Git branch to download from.') parser.add_argument('--threads', type=int, default=1, help='Number of files to download simultaneously.') parser.add_argument('--text-only', action='store_true', help='Only download text files (txt/json).') +parser.add_argument('--output', type=str, default=None, help='The folder where the model should be saved.') args = parser.parse_args() def get_file(args): @@ -169,13 +171,24 @@ if __name__ == '__main__': sys.exit() links, is_lora = get_download_links_from_huggingface(model, branch) - base_folder = 'models' if not is_lora else 'loras' - if branch != 'main': - output_folder = Path(base_folder) / (model.split('/')[-1] + f'_{branch}') + + if args.output is not None: + base_folder = args.output else: - output_folder = Path(base_folder) / model.split('/')[-1] + base_folder = 'models' if not is_lora else 'loras' + + output_folder = f"{'_'.join(model.split('/')[-2:])}" + if branch != 'main': + output_folder += f'_{branch}' + + # Creating the folder and writing the metadata + output_folder = Path(base_folder) / output_folder if not output_folder.exists(): output_folder.mkdir() + with open(output_folder / 'huggingface-metadata.txt', 'w') as f: + f.write(f'url: https://huggingface.co/{model}\n') + f.write(f'branch: {branch}\n') + f.write(f'download date: {str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))}\n') # Downloading the files print(f"Downloading the model to {output_folder}") diff --git a/loras/place-your-loras-here.txt b/loras/place-your-loras-here.txt deleted file mode 100644 index e69de29b..00000000 From 37754164eb44338e9f9bf7642a49cc6f0a9802b9 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 29 Mar 2023 20:47:36 -0300 Subject: [PATCH 73/80] Move argparse --- download-model.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/download-model.py b/download-model.py index f67055ba..dc6f3a9d 100644 --- a/download-model.py +++ b/download-model.py @@ -149,13 +149,6 @@ def download_files(file_list, output_folder, num_threads=8): thread_map(lambda url: get_file(url, output_folder), file_list, max_workers=num_threads, verbose=False) if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('MODEL', type=str, default=None, nargs='?') - parser.add_argument('--branch', type=str, default='main', help='Name of the Git branch to download from.') - parser.add_argument('--threads', type=int, default=1, help='Number of files to download simultaneously.') - parser.add_argument('--text-only', action='store_true', help='Only download text files (txt/json).') - args = parser.parse_args() - model = args.MODEL branch = args.branch if model is None: From 0345e042492d25d907b592bdce6cee47eebd2d0c Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 29 Mar 2023 21:17:48 -0300 Subject: [PATCH 74/80] Fix "Unknown argument(s): {'verbose': False}" --- download-model.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/download-model.py b/download-model.py index dc6f3a9d..6f0751d8 100644 --- a/download-model.py +++ b/download-model.py @@ -146,7 +146,7 @@ def get_download_links_from_huggingface(model, branch): return links, is_lora def download_files(file_list, output_folder, num_threads=8): - thread_map(lambda url: get_file(url, output_folder), file_list, max_workers=num_threads, verbose=False) + thread_map(lambda url: get_file(url, output_folder), file_list, max_workers=num_threads) if __name__ == '__main__': model = args.MODEL @@ -189,3 +189,4 @@ if __name__ == '__main__': # Downloading the files print(f"Downloading the model to {output_folder}") download_files(links, output_folder, args.threads) + print() From 1cb9246160bafca2599b20b69e7c4e9afff410e6 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 29 Mar 2023 21:47:36 -0300 Subject: [PATCH 75/80] Adapt to the new model names --- modules/GPTQ_loader.py | 7 ++++--- modules/models.py | 4 ++-- modules/shared.py | 4 ---- modules/text_generation.py | 6 +++--- server.py | 13 ++++++------- settings-template.json | 9 +++------ 6 files changed, 18 insertions(+), 25 deletions(-) diff --git a/modules/GPTQ_loader.py b/modules/GPTQ_loader.py index 7926d0ab..e7877de7 100644 --- a/modules/GPTQ_loader.py +++ b/modules/GPTQ_loader.py @@ -51,11 +51,12 @@ def _load_quant(model, checkpoint, wbits, groupsize=-1, faster_kernel=False, exc def load_quantized(model_name): if not shared.args.model_type: # Try to determine model type from model name - if model_name.lower().startswith(('llama', 'alpaca')): + name = model_name.lower() + if any((k in name for k in ['llama', 'alpaca'])): model_type = 'llama' - elif model_name.lower().startswith(('opt', 'galactica')): + elif any((k in name for k in ['opt-', 'galactica'])): model_type = 'opt' - elif model_name.lower().startswith(('gpt-j', 'pygmalion-6b')): + elif any((k in name for k in ['gpt-j', 'pygmalion-6b'])): model_type = 'gptj' else: print("Can't determine model type from model name. Please specify it manually using --model_type " diff --git a/modules/models.py b/modules/models.py index a6839318..b19507db 100644 --- a/modules/models.py +++ b/modules/models.py @@ -41,7 +41,7 @@ def load_model(model_name): print(f"Loading {model_name}...") t0 = time.time() - shared.is_RWKV = model_name.lower().startswith('rwkv-') + shared.is_RWKV = 'rwkv-' in model_name.lower() # Default settings if not any([shared.args.cpu, shared.args.load_in_8bit, shared.args.wbits, shared.args.auto_devices, shared.args.disk, shared.args.gpu_memory is not None, shared.args.cpu_memory is not None, shared.args.deepspeed, shared.args.flexgen, shared.is_RWKV]): @@ -159,7 +159,7 @@ def load_model(model_name): model = AutoModelForCausalLM.from_pretrained(checkpoint, **params) # Loading the tokenizer - if shared.model_name.lower().startswith(('gpt4chan', 'gpt-4chan', '4chan')) and Path(f"{shared.args.model_dir}/gpt-j-6B/").exists(): + if any((k in shared.model_name.lower() for k in ['gpt4chan', 'gpt-4chan'])) and Path(f"{shared.args.model_dir}/gpt-j-6B/").exists(): tokenizer = AutoTokenizer.from_pretrained(Path(f"{shared.args.model_dir}/gpt-j-6B/")) else: tokenizer = AutoTokenizer.from_pretrained(Path(f"{shared.args.model_dir}/{shared.model_name}/")) diff --git a/modules/shared.py b/modules/shared.py index 5d1b42d4..8bbf3b69 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -37,10 +37,6 @@ settings = { 'chat_generation_attempts': 1, 'chat_generation_attempts_min': 1, 'chat_generation_attempts_max': 5, - 'name1_pygmalion': 'You', - 'name2_pygmalion': 'Kawaii', - 'context_pygmalion': "Kawaii's persona: Kawaii is a cheerful person who loves to make others smile. She is an optimist who loves to spread happiness and positivity wherever she goes.\n", - 'stop_at_newline_pygmalion': False, 'default_extensions': [], 'chat_default_extensions': ["gallery"], 'presets': { diff --git a/modules/text_generation.py b/modules/text_generation.py index 20a07ca3..7b5fcd6a 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -42,7 +42,7 @@ def encode(prompt, tokens_to_generate=0, add_special_tokens=True): def decode(output_ids): # Open Assistant relies on special tokens like <|endoftext|> - if re.match('(oasst|galactica)-*', shared.model_name.lower()): + if re.match('.*(oasst|galactica)-*', shared.model_name.lower()): return shared.tokenizer.decode(output_ids, skip_special_tokens=False) else: reply = shared.tokenizer.decode(output_ids, skip_special_tokens=True) @@ -77,10 +77,10 @@ def fix_galactica(s): def formatted_outputs(reply, model_name): if not (shared.args.chat or shared.args.cai_chat): - if model_name.lower().startswith('galactica'): + if 'galactica' in model_name.lower(): reply = fix_galactica(reply) return reply, reply, generate_basic_html(reply) - elif model_name.lower().startswith(('gpt4chan', 'gpt-4chan', '4chan')): + elif any((k in shared.model_name.lower() for k in ['gpt4chan', 'gpt-4chan'])): reply = fix_gpt4chan(reply) return reply, 'Only applicable for GALACTICA models.', generate_4chan_html(reply) else: diff --git a/server.py b/server.py index 6023451b..62a7ebfb 100644 --- a/server.py +++ b/server.py @@ -282,7 +282,6 @@ else: default_text = shared.settings['prompts'][next((k for k in shared.settings['prompts'] if re.match(k.lower(), shared.model_name.lower())), 'default')] title ='Text generation web UI' description = '\n\n# Text generation lab\nGenerate text using Large Language Models.\n' -suffix = '_pygmalion' if 'pygmalion' in shared.model_name.lower() else '' def create_interface(): @@ -294,7 +293,7 @@ def create_interface(): if shared.args.chat or shared.args.cai_chat: with gr.Tab("Text generation", elem_id="main"): if shared.args.cai_chat: - shared.gradio['display'] = gr.HTML(value=generate_chat_html(shared.history['visible'], shared.settings[f'name1{suffix}'], shared.settings[f'name2{suffix}'], shared.character)) + shared.gradio['display'] = gr.HTML(value=generate_chat_html(shared.history['visible'], shared.settings['name1'], shared.settings['name2'], shared.character)) else: shared.gradio['display'] = gr.Chatbot(value=shared.history['visible']).style(color_map=("#326efd", "#212528")) shared.gradio['textbox'] = gr.Textbox(label='Input') @@ -314,9 +313,9 @@ def create_interface(): shared.gradio['Clear history-cancel'] = gr.Button('Cancel', visible=False) with gr.Tab("Character", elem_id="chat-settings"): - shared.gradio['name1'] = gr.Textbox(value=shared.settings[f'name1{suffix}'], lines=1, label='Your name') - shared.gradio['name2'] = gr.Textbox(value=shared.settings[f'name2{suffix}'], lines=1, label='Bot\'s name') - shared.gradio['context'] = gr.Textbox(value=shared.settings[f'context{suffix}'], lines=5, label='Context') + shared.gradio['name1'] = gr.Textbox(value=shared.settings['name1'], lines=1, label='Your name') + shared.gradio['name2'] = gr.Textbox(value=shared.settings['name2'], lines=1, label='Bot\'s name') + shared.gradio['context'] = gr.Textbox(value=shared.settings['context'], lines=5, label='Context') with gr.Row(): shared.gradio['character_menu'] = gr.Dropdown(choices=available_characters, value='None', label='Character', elem_id='character-menu') ui.create_refresh_button(shared.gradio['character_menu'], lambda : None, lambda : {'choices': get_available_characters()}, 'refresh-button') @@ -354,7 +353,7 @@ def create_interface(): shared.gradio['chat_prompt_size_slider'] = gr.Slider(minimum=shared.settings['chat_prompt_size_min'], maximum=shared.settings['chat_prompt_size_max'], step=1, label='Maximum prompt size in tokens', value=shared.settings['chat_prompt_size']) with gr.Column(): shared.gradio['chat_generation_attempts'] = gr.Slider(minimum=shared.settings['chat_generation_attempts_min'], maximum=shared.settings['chat_generation_attempts_max'], value=shared.settings['chat_generation_attempts'], step=1, label='Generation attempts (for longer replies)') - shared.gradio['check'] = gr.Checkbox(value=shared.settings[f'stop_at_newline{suffix}'], label='Stop generating at new line character?') + shared.gradio['check'] = gr.Checkbox(value=shared.settings['stop_at_newline'], label='Stop generating at new line character?') create_settings_menus(default_preset) @@ -401,7 +400,7 @@ def create_interface(): shared.gradio['Stop'].click(reload_func, reload_inputs, [shared.gradio['display']]) shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.main_js+ui.chat_js}}}") - shared.gradio['interface'].load(lambda : chat.load_default_history(shared.settings[f'name1{suffix}'], shared.settings[f'name2{suffix}']), None, None) + shared.gradio['interface'].load(lambda : chat.load_default_history(shared.settings['name1'], shared.settings['name2']), None, None) shared.gradio['interface'].load(reload_func, reload_inputs, [shared.gradio['display']], show_progress=True) elif shared.args.notebook: diff --git a/settings-template.json b/settings-template.json index 79fd5023..2a2aaed9 100644 --- a/settings-template.json +++ b/settings-template.json @@ -12,10 +12,6 @@ "chat_generation_attempts": 1, "chat_generation_attempts_min": 1, "chat_generation_attempts_max": 5, - "name1_pygmalion": "You", - "name2_pygmalion": "Kawaii", - "context_pygmalion": "Kawaii's persona: Kawaii is a cheerful person who loves to make others smile. She is an optimist who loves to spread happiness and positivity wherever she goes.\n", - "stop_at_newline_pygmalion": false, "default_extensions": [], "chat_default_extensions": [ "gallery" @@ -29,10 +25,11 @@ "default": "Common sense questions and answers\n\nQuestion: \nFactual answer:", "^(gpt4chan|gpt-4chan|4chan)": "-----\n--- 865467536\nInput text\n--- 865467537\n", "(rosey|chip|joi)_.*_instruct.*": "User: \n", - "oasst-*": "<|prompter|>Write a story about future of AI development<|endoftext|><|assistant|>" + "oasst-*": "<|prompter|>Write a story about future of AI development<|endoftext|><|assistant|>", + "alpaca-*": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction:\nWrite a poem about the transformers Python library. \nMention the word \"large language models\" in that poem.\n### Response:\n" }, "lora_prompts": { "default": "Common sense questions and answers\n\nQuestion: \nFactual answer:", - "alpaca-lora-7b": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction:\nWrite a poem about the transformers Python library. \nMention the word \"large language models\" in that poem.\n### Response:\n" + "(alpaca-lora-7b|alpaca-lora-13b|alpaca-lora-30b)": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction:\nWrite a poem about the transformers Python library. \nMention the word \"large language models\" in that poem.\n### Response:\n" } } From 55755e27b9ddb98e48d5b2dba08c3fb728d7d680 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 29 Mar 2023 22:40:04 -0300 Subject: [PATCH 76/80] Don't hardcode prompts in the settings dict/json --- modules/shared.py | 17 ++++++++--------- prompts/GPT-4chan.txt | 6 ++++++ server.py | 16 ++++++++-------- settings-template.json | 17 ++++++++--------- 4 files changed, 30 insertions(+), 26 deletions(-) create mode 100644 prompts/GPT-4chan.txt diff --git a/modules/shared.py b/modules/shared.py index 8bbf3b69..348defa3 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -41,19 +41,18 @@ settings = { 'chat_default_extensions': ["gallery"], 'presets': { 'default': 'NovelAI-Sphinx Moth', - 'pygmalion-*': 'Pygmalion', - 'RWKV-*': 'Naive', + '.*pygmalion': 'Pygmalion', + '.*RWKV': 'Naive', }, 'prompts': { - 'default': 'Common sense questions and answers\n\nQuestion: \nFactual answer:', - '^(gpt4chan|gpt-4chan|4chan)': '-----\n--- 865467536\nInput text\n--- 865467537\n', - '(rosey|chip|joi)_.*_instruct.*': 'User: \n', - 'oasst-*': '<|prompter|>Write a story about future of AI development<|endoftext|><|assistant|>', - 'alpaca-*': "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction:\nWrite a poem about the transformers Python library. \nMention the word \"large language models\" in that poem.\n### Response:\n", + 'default': 'QA', + '.*(gpt4chan|gpt-4chan|4chan)': 'GPT-4chan', + '.*oasst': 'Open Assistant', + '.*alpaca': "Alpaca", }, 'lora_prompts': { - 'default': 'Common sense questions and answers\n\nQuestion: \nFactual answer:', - '(alpaca-lora-7b|alpaca-lora-13b|alpaca-lora-30b)': "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction:\nWrite a poem about the transformers Python library. \nMention the word \"large language models\" in that poem.\n### Response:\n" + 'default': 'QA', + '.*(alpaca-lora-7b|alpaca-lora-13b|alpaca-lora-30b)': "Alpaca", } } diff --git a/prompts/GPT-4chan.txt b/prompts/GPT-4chan.txt new file mode 100644 index 00000000..1bc8c7f4 --- /dev/null +++ b/prompts/GPT-4chan.txt @@ -0,0 +1,6 @@ +----- +--- 865467536 +Hello, AI frens! +How are you doing on this fine day? +--- 865467537 + diff --git a/server.py b/server.py index 62a7ebfb..50af759e 100644 --- a/server.py +++ b/server.py @@ -73,9 +73,7 @@ def load_model_wrapper(selected_model): def load_lora_wrapper(selected_lora): add_lora_to_model(selected_lora) - default_text = shared.settings['lora_prompts'][next((k for k in shared.settings['lora_prompts'] if re.match(k.lower(), shared.lora_name.lower())), 'default')] - - return selected_lora, default_text + return selected_lora def load_preset_values(preset_menu, return_dict=False): generate_params = { @@ -141,7 +139,10 @@ def load_prompt(fname): return '' else: with open(Path(f'prompts/{fname}.txt'), 'r', encoding='utf-8') as f: - return f.read() + text = f.read() + if text[-1] == '\n': + text = text[:-1] + return text def create_prompt_menus(): with gr.Row(): @@ -212,7 +213,7 @@ def create_settings_menus(default_preset): shared.gradio['model_menu'].change(load_model_wrapper, [shared.gradio['model_menu']], [shared.gradio['model_menu']], show_progress=True) shared.gradio['preset_menu'].change(load_preset_values, [shared.gradio['preset_menu']], [shared.gradio[k] for k in ['do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']]) - shared.gradio['lora_menu'].change(load_lora_wrapper, [shared.gradio['lora_menu']], [shared.gradio['lora_menu'], shared.gradio['textbox']], show_progress=True) + shared.gradio['lora_menu'].change(load_lora_wrapper, [shared.gradio['lora_menu']], [shared.gradio['lora_menu']], show_progress=True) shared.gradio['softprompts_menu'].change(load_soft_prompt, [shared.gradio['softprompts_menu']], [shared.gradio['softprompts_menu']], show_progress=True) shared.gradio['upload_softprompt'].upload(upload_soft_prompt, [shared.gradio['upload_softprompt']], [shared.gradio['softprompts_menu']]) @@ -277,11 +278,10 @@ if shared.args.lora: # Default UI settings default_preset = shared.settings['presets'][next((k for k in shared.settings['presets'] if re.match(k.lower(), shared.model_name.lower())), 'default')] if shared.lora_name != "None": - default_text = shared.settings['lora_prompts'][next((k for k in shared.settings['lora_prompts'] if re.match(k.lower(), shared.lora_name.lower())), 'default')] + default_text = load_prompt(shared.settings['lora_prompts'][next((k for k in shared.settings['lora_prompts'] if re.match(k.lower(), shared.lora_name.lower())), 'default')]) else: - default_text = shared.settings['prompts'][next((k for k in shared.settings['prompts'] if re.match(k.lower(), shared.model_name.lower())), 'default')] + default_text = load_prompt(shared.settings['prompts'][next((k for k in shared.settings['prompts'] if re.match(k.lower(), shared.model_name.lower())), 'default')]) title ='Text generation web UI' -description = '\n\n# Text generation lab\nGenerate text using Large Language Models.\n' def create_interface(): diff --git a/settings-template.json b/settings-template.json index 2a2aaed9..da767cda 100644 --- a/settings-template.json +++ b/settings-template.json @@ -18,18 +18,17 @@ ], "presets": { "default": "NovelAI-Sphinx Moth", - "pygmalion-*": "Pygmalion", - "RWKV-*": "Naive" + ".*pygmalion": "Pygmalion", + ".*RWKV": "Naive" }, "prompts": { - "default": "Common sense questions and answers\n\nQuestion: \nFactual answer:", - "^(gpt4chan|gpt-4chan|4chan)": "-----\n--- 865467536\nInput text\n--- 865467537\n", - "(rosey|chip|joi)_.*_instruct.*": "User: \n", - "oasst-*": "<|prompter|>Write a story about future of AI development<|endoftext|><|assistant|>", - "alpaca-*": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction:\nWrite a poem about the transformers Python library. \nMention the word \"large language models\" in that poem.\n### Response:\n" + "default": "QA", + ".*(gpt4chan|gpt-4chan|4chan)": "GPT-4chan", + ".*oasst": "Open Assistant", + ".*alpaca": "Alpaca" }, "lora_prompts": { - "default": "Common sense questions and answers\n\nQuestion: \nFactual answer:", - "(alpaca-lora-7b|alpaca-lora-13b|alpaca-lora-30b)": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction:\nWrite a poem about the transformers Python library. \nMention the word \"large language models\" in that poem.\n### Response:\n" + "default": "QA", + ".*(alpaca-lora-7b|alpaca-lora-13b|alpaca-lora-30b)": "Alpaca" } } From a21e5807821a8aee7ecddfdbcc9f8e3bde8c83a3 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 29 Mar 2023 22:50:58 -0300 Subject: [PATCH 77/80] Move an import --- modules/LoRA.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/LoRA.py b/modules/LoRA.py index 1d36c333..8c30e609 100644 --- a/modules/LoRA.py +++ b/modules/LoRA.py @@ -1,6 +1,7 @@ from pathlib import Path import torch +from peft import PeftModel import modules.shared as shared from modules.models import load_model @@ -14,8 +15,6 @@ def reload_model(): def add_lora_to_model(lora_name): - from peft import PeftModel - # If a LoRA had been previously loaded, or if we want # to unload a LoRA, reload the model if shared.lora_name not in ['None', ''] or lora_name in ['None', '']: From 131753fcf5e3c3b22a8e2e0ac67fb44c3e1dfd4e Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 29 Mar 2023 23:28:16 -0300 Subject: [PATCH 78/80] Save the sha256sum of downloaded models --- download-model.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/download-model.py b/download-model.py index 6f0751d8..7e5f61b2 100644 --- a/download-model.py +++ b/download-model.py @@ -93,6 +93,7 @@ def get_download_links_from_huggingface(model, branch): cursor = b"" links = [] + sha256 = [] classifications = [] has_pytorch = False has_pt = False @@ -117,6 +118,8 @@ def get_download_links_from_huggingface(model, branch): is_text = re.match(".*\.(txt|json|py|md)", fname) or is_tokenizer if any((is_pytorch, is_safetensors, is_pt, is_tokenizer, is_text)): + if 'lfs' in dict[i]: + sha256.append([fname, dict[i]['lfs']['oid']]) if is_text: links.append(f"https://huggingface.co/{model}/resolve/{branch}/{fname}") classifications.append('text') @@ -143,7 +146,7 @@ def get_download_links_from_huggingface(model, branch): if classifications[i] in ['pytorch', 'pt']: links.pop(i) - return links, is_lora + return links, sha256, is_lora def download_files(file_list, output_folder, num_threads=8): thread_map(lambda url: get_file(url, output_folder), file_list, max_workers=num_threads) @@ -166,7 +169,7 @@ if __name__ == '__main__': print(f"Error: {err_branch}") sys.exit() - links, is_lora = get_download_links_from_huggingface(model, branch) + links, sha256, is_lora = get_download_links_from_huggingface(model, branch) if args.output is not None: base_folder = args.output @@ -185,6 +188,11 @@ if __name__ == '__main__': f.write(f'url: https://huggingface.co/{model}\n') f.write(f'branch: {branch}\n') f.write(f'download date: {str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))}\n') + sha256_str = '' + for i in range(len(sha256)): + sha256_str += f' {sha256[i][1]} {sha256[i][0]}\n' + if sha256_str != '': + f.write(f'sha256sum:\n{sha256_str}') # Downloading the files print(f"Downloading the model to {output_folder}") From bd65940a48c8cb25dfdda8f64aa3bfc3bdf1b10b Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 30 Mar 2023 00:43:49 -0300 Subject: [PATCH 79/80] Increase --chat box height --- css/chat.css | 3 +++ server.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/css/chat.css b/css/chat.css index 1e703530..dee76beb 100644 --- a/css/chat.css +++ b/css/chat.css @@ -29,3 +29,6 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { padding: 0; } +#gradio-chatbot { + height: 66.67vh; +} diff --git a/server.py b/server.py index 50af759e..27223f84 100644 --- a/server.py +++ b/server.py @@ -295,7 +295,7 @@ def create_interface(): if shared.args.cai_chat: shared.gradio['display'] = gr.HTML(value=generate_chat_html(shared.history['visible'], shared.settings['name1'], shared.settings['name2'], shared.character)) else: - shared.gradio['display'] = gr.Chatbot(value=shared.history['visible']).style(color_map=("#326efd", "#212528")) + shared.gradio['display'] = gr.Chatbot(value=shared.history['visible'], elem_id="gradio-chatbot") shared.gradio['textbox'] = gr.Textbox(label='Input') with gr.Row(): shared.gradio['Generate'] = gr.Button('Generate') From f0fdab08d31d4c00e2e15c5871413ce847ca842b Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 30 Mar 2023 01:02:11 -0300 Subject: [PATCH 80/80] Increase --chat height --- css/chat.css | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/css/chat.css b/css/chat.css index dee76beb..c8a9d70a 100644 --- a/css/chat.css +++ b/css/chat.css @@ -32,3 +32,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { #gradio-chatbot { height: 66.67vh; } + +.wrap.svelte-6roggh.svelte-6roggh { + max-height: 92.5%; +}