From 1917b1527503d7efbce3d33aa7df9a216aaf36fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=CE=A6=CF=86?= <42910943+Brawlence@users.noreply.github.com>
Date: Tue, 21 Mar 2023 13:15:42 +0300
Subject: [PATCH 01/80] Unload and reload models on request

---
 server.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/server.py b/server.py
index cdf7aa93..1309c17d 100644
--- a/server.py
+++ b/server.py
@@ -63,6 +63,18 @@ def load_model_wrapper(selected_model):
 
     return selected_model
 
+def reload_model():
+    if not shared.args.cpu:
+        gc.collect()
+        torch.cuda.empty_cache()
+    shared.model, shared.tokenizer = load_model(shared.model_name)
+
+def unload_model():
+    shared.model = shared.tokenizer = None
+    if not shared.args.cpu:
+        gc.collect()
+        torch.cuda.empty_cache()
+
 def load_lora_wrapper(selected_lora):
     shared.lora_name = selected_lora
     default_text = shared.settings['lora_prompts'][next((k for k in shared.settings['lora_prompts'] if re.match(k.lower(), shared.lora_name.lower())), 'default')]
@@ -126,6 +138,9 @@ def create_model_and_preset_menus():
             with gr.Row():
                 shared.gradio['preset_menu'] = gr.Dropdown(choices=available_presets, value=default_preset if not shared.args.flexgen else 'Naive', label='Generation parameters preset')
                 ui.create_refresh_button(shared.gradio['preset_menu'], lambda : None, lambda : {'choices': get_available_presets()}, 'refresh-button')
+    with gr.Row():
+        shared.gradio['unload_model'] = gr.Button(value='Unload model to free VRAM', elem_id="unload_model")
+        shared.gradio['reload_model'] = gr.Button(value='Reload the model into VRAM', elem_id="reload_model")
 
 def create_settings_menus(default_preset):
     generate_params = load_preset_values(default_preset if not shared.args.flexgen else 'Naive', return_dict=True)
@@ -180,6 +195,8 @@ def create_settings_menus(default_preset):
             shared.gradio['upload_softprompt'] = gr.File(type='binary', file_types=['.zip'])
 
     shared.gradio['model_menu'].change(load_model_wrapper, [shared.gradio['model_menu']], [shared.gradio['model_menu']], show_progress=True)
+    shared.gradio['unload_model'].click(fn=unload_model,inputs=[],outputs=[])    
+    shared.gradio['reload_model'].click(fn=reload_model,inputs=[],outputs=[])
     shared.gradio['preset_menu'].change(load_preset_values, [shared.gradio['preset_menu']], [shared.gradio[k] for k in ['preset_menu_mirror', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']])
     shared.gradio['preset_menu_mirror'].change(load_preset_values, [shared.gradio['preset_menu_mirror']], [shared.gradio[k] for k in ['preset_menu', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']])
     shared.gradio['lora_menu'].change(load_lora_wrapper, [shared.gradio['lora_menu']], [shared.gradio['lora_menu'], shared.gradio['textbox']], show_progress=True)

From 483d173d23309f77d197951ad9f21632955fd13a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=CE=A6=CF=86?= <42910943+Brawlence@users.noreply.github.com>
Date: Tue, 21 Mar 2023 20:19:38 +0300
Subject: [PATCH 02/80] Code reuse + indication

Now shows the message in the console when unloading weights. Also reload_model() calls unload_model() first to free the memory so that multiple reloads won't overfill it.
---
 server.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/server.py b/server.py
index 1309c17d..4c3497c9 100644
--- a/server.py
+++ b/server.py
@@ -64,9 +64,7 @@ def load_model_wrapper(selected_model):
     return selected_model
 
 def reload_model():
-    if not shared.args.cpu:
-        gc.collect()
-        torch.cuda.empty_cache()
+    unload_model()
     shared.model, shared.tokenizer = load_model(shared.model_name)
 
 def unload_model():
@@ -74,6 +72,7 @@ def unload_model():
     if not shared.args.cpu:
         gc.collect()
         torch.cuda.empty_cache()
+        print("Model weights unloaded.")
 
 def load_lora_wrapper(selected_lora):
     shared.lora_name = selected_lora

From b37c54edcfee36ef5fdbaae9f6337d236be52b99 Mon Sep 17 00:00:00 2001
From: catalpaaa <caojingzifang@gmail.com>
Date: Fri, 24 Mar 2023 17:30:18 -0700
Subject: [PATCH 03/80] lora-dir, model-dir and login auth

Added lora-dir, model-dir, and a login auth arguments that points to a file contains usernames and passwords in the format of "u:pw,u:pw,..."
---
 modules/LoRA.py   |  2 +-
 modules/models.py | 20 ++++++++++----------
 modules/shared.py |  3 +++
 server.py         | 14 +++++++++-----
 4 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/modules/LoRA.py b/modules/LoRA.py
index aa68ad32..394f7367 100644
--- a/modules/LoRA.py
+++ b/modules/LoRA.py
@@ -30,7 +30,7 @@ def add_lora_to_model(lora_name):
             elif shared.args.load_in_8bit:
                 params['device_map'] = {'': 0}
             
-        shared.model = PeftModel.from_pretrained(shared.model, Path(f"loras/{lora_name}"), **params)
+        shared.model = PeftModel.from_pretrained(shared.model, Path(f"{shared.args.lora_dir}/{lora_name}"), **params)
         if not shared.args.load_in_8bit and not shared.args.cpu:
             shared.model.half()
             if not hasattr(shared.model, "hf_device_map"):
diff --git a/modules/models.py b/modules/models.py
index ccb97da3..757eb8b9 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -46,9 +46,9 @@ def load_model(model_name):
     # Default settings
     if not any([shared.args.cpu, shared.args.load_in_8bit, shared.args.gptq_bits, shared.args.auto_devices, shared.args.disk, shared.args.gpu_memory is not None, shared.args.cpu_memory is not None, shared.args.deepspeed, shared.args.flexgen, shared.is_RWKV]):
         if any(size in shared.model_name.lower() for size in ('13b', '20b', '30b')):
-            model = AutoModelForCausalLM.from_pretrained(Path(f"models/{shared.model_name}"), device_map='auto', load_in_8bit=True)
+            model = AutoModelForCausalLM.from_pretrained(Path(f"{shared.args.model_dir}/{shared.model_name}"), device_map='auto', load_in_8bit=True)
         else:
-            model = AutoModelForCausalLM.from_pretrained(Path(f"models/{shared.model_name}"), low_cpu_mem_usage=True, torch_dtype=torch.bfloat16 if shared.args.bf16 else torch.float16)
+            model = AutoModelForCausalLM.from_pretrained(Path(f"{shared.args.model_dir}/{shared.model_name}"), low_cpu_mem_usage=True, torch_dtype=torch.bfloat16 if shared.args.bf16 else torch.float16)
             if torch.has_mps:
                 device = torch.device('mps')
                 model = model.to(device)
@@ -76,11 +76,11 @@ def load_model(model_name):
                             num_bits=4, group_size=64,
                             group_dim=2, symmetric=False))
 
-        model = OptLM(f"facebook/{shared.model_name}", env, "models", policy)
+        model = OptLM(f"facebook/{shared.model_name}", env, shared.model_name, policy)
 
     # DeepSpeed ZeRO-3
     elif shared.args.deepspeed:
-        model = AutoModelForCausalLM.from_pretrained(Path(f"models/{shared.model_name}"), torch_dtype=torch.bfloat16 if shared.args.bf16 else torch.float16)
+        model = AutoModelForCausalLM.from_pretrained(Path(f"{shared.args.model_dir}/{shared.model_name}"), torch_dtype=torch.bfloat16 if shared.args.bf16 else torch.float16)
         model = deepspeed.initialize(model=model, config_params=ds_config, model_parameters=None, optimizer=None, lr_scheduler=None)[0]
         model.module.eval() # Inference
         print(f"DeepSpeed ZeRO-3 is enabled: {is_deepspeed_zero3_enabled()}")
@@ -89,8 +89,8 @@ def load_model(model_name):
     elif shared.is_RWKV:
         from modules.RWKV import RWKVModel, RWKVTokenizer
 
-        model = RWKVModel.from_pretrained(Path(f'models/{model_name}'), dtype="fp32" if shared.args.cpu else "bf16" if shared.args.bf16 else "fp16", device="cpu" if shared.args.cpu else "cuda")
-        tokenizer = RWKVTokenizer.from_pretrained(Path('models'))
+        model = RWKVModel.from_pretrained(Path(f'{shared.args.model_dir}/{model_name}'), dtype="fp32" if shared.args.cpu else "bf16" if shared.args.bf16 else "fp16", device="cpu" if shared.args.cpu else "cuda")
+        tokenizer = RWKVTokenizer.from_pretrained(Path(shared.model_name))
 
         return model, tokenizer
 
@@ -142,7 +142,7 @@ def load_model(model_name):
             if shared.args.disk:
                 params["offload_folder"] = shared.args.disk_cache_dir
 
-        checkpoint = Path(f'models/{shared.model_name}')
+        checkpoint = Path(f'{shared.args.model_dir}/{shared.model_name}')
 
         if shared.args.load_in_8bit and params.get('max_memory', None) is not None and params['device_map'] == 'auto':
             config = AutoConfig.from_pretrained(checkpoint)
@@ -159,10 +159,10 @@ def load_model(model_name):
         model = AutoModelForCausalLM.from_pretrained(checkpoint, **params)
 
     # Loading the tokenizer
-    if shared.model_name.lower().startswith(('gpt4chan', 'gpt-4chan', '4chan')) and Path("models/gpt-j-6B/").exists():
-        tokenizer = AutoTokenizer.from_pretrained(Path("models/gpt-j-6B/"))
+    if shared.model_name.lower().startswith(('gpt4chan', 'gpt-4chan', '4chan')) and Path(f"{shared.args.model_dir}/gpt-j-6B/").exists():
+        tokenizer = AutoTokenizer.from_pretrained(Path(f"{shared.args.model_dir}/gpt-j-6B/"))
     else:
-        tokenizer = AutoTokenizer.from_pretrained(Path(f"models/{shared.model_name}/"))
+        tokenizer = AutoTokenizer.from_pretrained(Path(f"{shared.args.model_dir}/{shared.model_name}/"))
     tokenizer.truncation_side = 'left'
 
     print(f"Loaded the model in {(time.time()-t0):.2f} seconds.")
diff --git a/modules/shared.py b/modules/shared.py
index 720c697e..72cea1d4 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -106,6 +106,9 @@ parser.add_argument('--listen-port', type=int, help='The listening port that the
 parser.add_argument('--share', action='store_true', help='Create a public URL. This is useful for running the web UI on Google Colab or similar.')
 parser.add_argument('--auto-launch', action='store_true', default=False, help='Open the web UI in the default browser upon launch.')
 parser.add_argument('--verbose', action='store_true', help='Print the prompts to the terminal.')
+parser.add_argument("--gradio-auth-path", type=str, help='set gradio authentication file path ex. "/path/to/auth/file" with format like "u1:p1,u2:p2,u3:p3"', default=None)
+parser.add_argument("--model-dir", type=str, default='models/', help="Path to directory with all the models")
+parser.add_argument("--lora-dir", type=str, default='loras/', help="Path to directory with all the loras")
 args = parser.parse_args()
 
 # Provisional, this will be deleted later
diff --git a/server.py b/server.py
index f423e368..f8fd663c 100644
--- a/server.py
+++ b/server.py
@@ -31,9 +31,9 @@ if settings_file is not None:
 
 def get_available_models():
     if shared.args.flexgen:
-        return sorted([re.sub('-np$', '', item.name) for item in list(Path('models/').glob('*')) if item.name.endswith('-np')], key=str.lower)
+        return sorted([re.sub('-np$', '', item.name) for item in list(Path(f'{shared.model_name}/').glob('*')) if item.name.endswith('-np')], key=str.lower)
     else:
-        return sorted([re.sub('.pth$', '', item.name) for item in list(Path('models/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower)
+        return sorted([re.sub('.pth$', '', item.name) for item in list(Path(f'{shared.model_name}/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower)
 
 def get_available_presets():
     return sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('presets').glob('*.txt'))), key=str.lower)
@@ -48,7 +48,7 @@ def get_available_softprompts():
     return ['None'] + sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('softprompts').glob('*.zip'))), key=str.lower)
 
 def get_available_loras():
-    return ['None'] + sorted([item.name for item in list(Path('loras/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower)
+    return ['None'] + sorted([item.name for item in list(Path('shared.args.lora_dir').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower)
 
 def load_model_wrapper(selected_model):
     if selected_model != shared.model_name:
@@ -448,11 +448,15 @@ def create_interface():
             extensions_module.create_extensions_block()
 
     # Launch the interface
+    gradio_auth_creds = []
+    with open(shared.args.gradio_auth_path, 'r', encoding="utf8") as file:
+        for line in file.readlines():
+            gradio_auth_creds += [x.strip() for x in line.split(',') if x.strip()]
     shared.gradio['interface'].queue()
     if shared.args.listen:
-        shared.gradio['interface'].launch(prevent_thread_lock=True, share=shared.args.share, server_name='0.0.0.0', server_port=shared.args.listen_port, inbrowser=shared.args.auto_launch)
+        shared.gradio['interface'].launch(prevent_thread_lock=True, share=shared.args.share, server_name='0.0.0.0', server_port=shared.args.listen_port, inbrowser=shared.args.auto_launch, auth=[tuple(cred.split(':')) for cred in gradio_auth_creds] if gradio_auth_creds else None)
     else:
-        shared.gradio['interface'].launch(prevent_thread_lock=True, share=shared.args.share, server_port=shared.args.listen_port, inbrowser=shared.args.auto_launch)
+        shared.gradio['interface'].launch(prevent_thread_lock=True, share=shared.args.share, server_port=shared.args.listen_port, inbrowser=shared.args.auto_launch, auth=[tuple(cred.split(':')) for cred in gradio_auth_creds] if gradio_auth_creds else None)
 
 create_interface()
 

From ec2a1faceecddf1400245a6c8983e40ef430cccf Mon Sep 17 00:00:00 2001
From: catalpaaa <caojingzifang@gmail.com>
Date: Fri, 24 Mar 2023 17:34:33 -0700
Subject: [PATCH 04/80] Update server.py

---
 server.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/server.py b/server.py
index f8fd663c..c69abb4b 100644
--- a/server.py
+++ b/server.py
@@ -31,9 +31,9 @@ if settings_file is not None:
 
 def get_available_models():
     if shared.args.flexgen:
-        return sorted([re.sub('-np$', '', item.name) for item in list(Path(f'{shared.model_name}/').glob('*')) if item.name.endswith('-np')], key=str.lower)
+        return sorted([re.sub('-np$', '', item.name) for item in list(Path(f'{shared.model_dir}/').glob('*')) if item.name.endswith('-np')], key=str.lower)
     else:
-        return sorted([re.sub('.pth$', '', item.name) for item in list(Path(f'{shared.model_name}/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower)
+        return sorted([re.sub('.pth$', '', item.name) for item in list(Path(f'{shared.model_dir}/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower)
 
 def get_available_presets():
     return sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('presets').glob('*.txt'))), key=str.lower)

From 9e2963e0c86180fec5a88db4ec77530ad2de7d69 Mon Sep 17 00:00:00 2001
From: catalpaaa <caojingzifang@gmail.com>
Date: Fri, 24 Mar 2023 17:35:45 -0700
Subject: [PATCH 05/80] Update server.py

---
 server.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/server.py b/server.py
index c69abb4b..67c1e915 100644
--- a/server.py
+++ b/server.py
@@ -31,9 +31,9 @@ if settings_file is not None:
 
 def get_available_models():
     if shared.args.flexgen:
-        return sorted([re.sub('-np$', '', item.name) for item in list(Path(f'{shared.model_dir}/').glob('*')) if item.name.endswith('-np')], key=str.lower)
+        return sorted([re.sub('-np$', '', item.name) for item in list(Path(f'{shared.arg.model_dir}/').glob('*')) if item.name.endswith('-np')], key=str.lower)
     else:
-        return sorted([re.sub('.pth$', '', item.name) for item in list(Path(f'{shared.model_dir}/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower)
+        return sorted([re.sub('.pth$', '', item.name) for item in list(Path(f'{shared.arg.model_dir}/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower)
 
 def get_available_presets():
     return sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('presets').glob('*.txt'))), key=str.lower)

From d51cb8292b42eb29e4e45ed850d23b446208a0d3 Mon Sep 17 00:00:00 2001
From: catalpaaa <caojingzifang@gmail.com>
Date: Fri, 24 Mar 2023 17:36:31 -0700
Subject: [PATCH 06/80] Update server.py

yea i should go to bed
---
 server.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/server.py b/server.py
index 67c1e915..8ac6031a 100644
--- a/server.py
+++ b/server.py
@@ -31,9 +31,9 @@ if settings_file is not None:
 
 def get_available_models():
     if shared.args.flexgen:
-        return sorted([re.sub('-np$', '', item.name) for item in list(Path(f'{shared.arg.model_dir}/').glob('*')) if item.name.endswith('-np')], key=str.lower)
+        return sorted([re.sub('-np$', '', item.name) for item in list(Path(f'{shared.args.model_dir}/').glob('*')) if item.name.endswith('-np')], key=str.lower)
     else:
-        return sorted([re.sub('.pth$', '', item.name) for item in list(Path(f'{shared.arg.model_dir}/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower)
+        return sorted([re.sub('.pth$', '', item.name) for item in list(Path(f'{shared.args.model_dir}/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower)
 
 def get_available_presets():
     return sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('presets').glob('*.txt'))), key=str.lower)

From 1a1e420e65e5e9aed26419ccccc59765505b38c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=CE=A6=CF=86?= <42910943+Brawlence@users.noreply.github.com>
Date: Sat, 25 Mar 2023 21:31:13 +0300
Subject: [PATCH 07/80] Silero_tts streaming fix

Temporarily suppress the streaming during the audio response as it would interfere with the audio (making it stutter and play anew)
---
 extensions/silero_tts/script.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/extensions/silero_tts/script.py b/extensions/silero_tts/script.py
index a81a5da1..1352993a 100644
--- a/extensions/silero_tts/script.py
+++ b/extensions/silero_tts/script.py
@@ -26,6 +26,7 @@ current_params = params.copy()
 voices_by_gender = ['en_99', 'en_45', 'en_18', 'en_117', 'en_49', 'en_51', 'en_68', 'en_0', 'en_26', 'en_56', 'en_74', 'en_5', 'en_38', 'en_53', 'en_21', 'en_37', 'en_107', 'en_10', 'en_82', 'en_16', 'en_41', 'en_12', 'en_67', 'en_61', 'en_14', 'en_11', 'en_39', 'en_52', 'en_24', 'en_97', 'en_28', 'en_72', 'en_94', 'en_36', 'en_4', 'en_43', 'en_88', 'en_25', 'en_65', 'en_6', 'en_44', 'en_75', 'en_91', 'en_60', 'en_109', 'en_85', 'en_101', 'en_108', 'en_50', 'en_96', 'en_64', 'en_92', 'en_76', 'en_33', 'en_116', 'en_48', 'en_98', 'en_86', 'en_62', 'en_54', 'en_95', 'en_55', 'en_111', 'en_3', 'en_83', 'en_8', 'en_47', 'en_59', 'en_1', 'en_2', 'en_7', 'en_9', 'en_13', 'en_15', 'en_17', 'en_19', 'en_20', 'en_22', 'en_23', 'en_27', 'en_29', 'en_30', 'en_31', 'en_32', 'en_34', 'en_35', 'en_40', 'en_42', 'en_46', 'en_57', 'en_58', 'en_63', 'en_66', 'en_69', 'en_70', 'en_71', 'en_73', 'en_77', 'en_78', 'en_79', 'en_80', 'en_81', 'en_84', 'en_87', 'en_89', 'en_90', 'en_93', 'en_100', 'en_102', 'en_103', 'en_104', 'en_105', 'en_106', 'en_110', 'en_112', 'en_113', 'en_114', 'en_115']
 voice_pitches = ['x-low', 'low', 'medium', 'high', 'x-high']
 voice_speeds = ['x-slow', 'slow', 'medium', 'fast', 'x-fast']
+streaming_state = shared.args.no_stream # remember if chat streaming was enabled
 
 # Used for making text xml compatible, needed for voice pitch and speed control
 table = str.maketrans({
@@ -77,6 +78,7 @@ def input_modifier(string):
         shared.history['visible'][-1] = [shared.history['visible'][-1][0], shared.history['visible'][-1][1].replace('controls autoplay>','controls>')]
 
     shared.processing_message = "*Is recording a voice message...*"
+    shared.args.no_stream = True # Disable streaming cause otherwise the audio output will stutter and begin anew every time the message is being updated
     return string
 
 def output_modifier(string):
@@ -84,7 +86,7 @@ def output_modifier(string):
     This function is applied to the model outputs.
     """
 
-    global model, current_params
+    global model, current_params, streaming_state
 
     for i in params:
         if params[i] != current_params[i]:
@@ -116,6 +118,7 @@ def output_modifier(string):
             string += f'\n\n{original_string}'
 
     shared.processing_message = "*Is typing...*"
+    shared.args.no_stream = streaming_state # restore the streaming option to the previous value
     return string
 
 def bot_prefix_modifier(string):

From 566898a79a0915879273f3d77017908bcf7d62ab Mon Sep 17 00:00:00 2001
From: "Alex \"mcmonkey\" Goodwin" <git_commits@alexgoodwin.dev>
Date: Sat, 25 Mar 2023 12:08:26 -0700
Subject: [PATCH 08/80] initial lora training tab

---
 modules/training.py                           | 139 ++++++++++++++++++
 requirements.txt                              |   2 +
 server.py                                     |   7 +-
 .../datasets/put-trainer-datasets-here.txt    |   0
 training/formats/alpaca-chatbot-format.json   |   4 +
 training/formats/alpaca-format.json           |   4 +
 training/formats/put-trainer-formats-here.txt |   0
 7 files changed, 153 insertions(+), 3 deletions(-)
 create mode 100644 modules/training.py
 create mode 100644 training/datasets/put-trainer-datasets-here.txt
 create mode 100644 training/formats/alpaca-chatbot-format.json
 create mode 100644 training/formats/alpaca-format.json
 create mode 100644 training/formats/put-trainer-formats-here.txt

diff --git a/modules/training.py b/modules/training.py
new file mode 100644
index 00000000..96cd6e7c
--- /dev/null
+++ b/modules/training.py
@@ -0,0 +1,139 @@
+import sys, torch, json
+from pathlib import Path
+import gradio as gr
+from datasets import load_dataset
+import transformers
+from modules import ui, shared
+from peft import prepare_model_for_int8_training, LoraConfig, get_peft_model, get_peft_model_state_dict
+
+def get_json_dataset(path: str):
+    def get_set():
+        return ['None'] + sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path(path).glob('*.json'))), key=str.lower)
+    return get_set
+
+def create_train_interface():
+    with gr.Tab('Train LoRA', elem_id='lora-train-tab'):
+        loraName = gr.Textbox(label="Name", info="The name of your new LoRA file")
+        # TODO: Add explanations of batch sizes and recommendations. Note that batch/microBatch determines gradient accumulation and explain what that means. Note the effects on VRAM usage from changing these values.
+        microBatchSize = gr.Slider(label='Micro Batch Size', value=4, minimum=1, maximum=128, step=1, info='(TODO)')
+        batchSize = gr.Slider(label='Batch Size', value=128, minimum=1, maximum=1024, step=4, info='(TODO)')
+        epochs = gr.Slider(label='Epochs', value=1, minimum=1, maximum=1000, info='Number of times every entry in the dataset should be fed into training. So 1 means feed each item in once, 5 means feed it in five times, etc.')
+        learningRate = gr.Textbox(label='Learning Rate', value='3e-4', info='Learning rate, in scientific notation. 3e-4 is a good starting base point. 1e-2 is extremely high, 1e-6 is extremely low.')
+        # TODO: What is the actual maximum rank? Likely distinct per model. This might be better to somehow be on a log scale.
+        loraRank = gr.Slider(label='LoRA Rank', value=8, minimum=1, maximum=1024, step=4, info='LoRA Rank, or dimension count. Higher values produce a larger file with better control over the model\'s content. Smaller values produce a smaller file with less overall control. Small values like 4 or 8 are great for stylistic guidance, high values like 128 or 256 are good for teaching content upgrades. Higher ranks also require higher VRAM.')
+        loraAlpha = gr.Slider(label='LoRA Alpha', value=16, minimum=1, maximum=2048, step=4, info='LoRA Alpha. This divided by the rank becomes the scaling of the LoRA. Higher means stronger. A good standard value is twice your Rank.')
+        # TODO: Better explain what this does.
+        loraDropout = gr.Slider(label='LoRA Dropout', minimum=0.0, maximum=1.0, step=0.025, value=0.05, info='Percentage probability for dropout of LoRA layers.')
+        cutoffLen = gr.Slider(label='Cutoff Length', minimum=1,maximum=2048, value=256, step=32, info='Cutoff length for text input. Essentially, how long of a line of text to feed in at a time. Higher values require drastically more VRAM.')
+        with gr.Row():
+            datasetFunction = get_json_dataset('training/datasets')
+            dataset = gr.Dropdown(choices=datasetFunction(), value='None', label='Dataset')
+            ui.create_refresh_button(dataset, lambda : None, lambda : {'choices': datasetFunction()}, 'refresh-button')
+        with gr.Row():
+            evalDataset = gr.Dropdown(choices=datasetFunction(), value='None', label='Evaluation Dataset')
+            ui.create_refresh_button(evalDataset, lambda : None, lambda : {'choices': datasetFunction()}, 'refresh-button')
+        with gr.Row():
+            formatsFunction = get_json_dataset('training/formats')
+            format = gr.Dropdown(choices=formatsFunction(), value='None', label='Data Format')
+            ui.create_refresh_button(format, lambda : None, lambda : {'choices': formatsFunction()}, 'refresh-button')
+        startButton = gr.Button("Start LoRA Training")
+        output = gr.Markdown(value="(...)")
+        startButton.click(do_train, [loraName, microBatchSize, batchSize, epochs, learningRate, loraRank, loraAlpha, loraDropout, cutoffLen, dataset, evalDataset, format], [output])
+
+def cleanPath(basePath: str, path: str):
+    """"Strips unusual symbols and forcibly builds a path as relative to the intended directory."""
+    # TODO: Probably could do with a security audit to guarantee there's no ways this can be bypassed to target an unwanted path.
+    # Or swap it to a strict whitelist of [a-zA-Z_0-9]
+    path = path.replace('\\', '/').replace('..', '_')
+    if basePath is None:
+        return path
+    return f'{Path(basePath).absolute()}/{path}'
+
+def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, learningRate: float, loraRank: int, loraAlpha: int, loraDropout: float, cutoffLen: int, dataset: str, evalDataset: str, format: str):
+    # Input validation / processing
+    # TODO: --lora-dir PR once pulled will need to be applied here
+    loraName = f"loras/{cleanPath(None, loraName)}"
+    if dataset is None:
+        return "**Missing dataset choice input, cannot continue.**"
+    if format is None:
+        return "**Missing format choice input, cannot continue.**"
+    gradientAccumulationSteps = batchSize // microBatchSize
+    actualLR = float(learningRate)
+    model = shared.model
+    tokenizer = shared.tokenizer
+    tokenizer.pad_token = 0
+    tokenizer.padding_side = "left"
+    # Prep the dataset, format, etc
+    with open(cleanPath('training/formats', f'{format}.json'), 'r') as formatFile:
+        formatData: dict[str, str] = json.load(formatFile)
+    def tokenize(prompt):
+        result = tokenizer(prompt, truncation=True, max_length=cutoffLen + 1, padding="max_length")
+        return {
+            "input_ids": result["input_ids"][:-1],
+            "attention_mask": result["attention_mask"][:-1],
+        }
+    def generate_prompt(data_point: dict[str, str]):
+        for options, data in formatData.items():
+            if set(options.split(',')) == set(data_point.keys()):
+                for key, val in data_point.items():
+                    data = data.replace(f'%{key}%', val)
+            return data
+        raise RuntimeError(f'Data-point "{data_point}" has no keyset match within format "{list(formatData.keys())}"')
+    def generate_and_tokenize_prompt(data_point):
+        prompt = generate_prompt(data_point)
+        return tokenize(prompt)
+    data = load_dataset("json", data_files=cleanPath('training/datasets', f'{dataset}.json'))
+    train_data = data['train'].shuffle().map(generate_and_tokenize_prompt)
+    if evalDataset == 'None':
+        evalData = None
+    else:
+        evalData = load_dataset("json", data_files=cleanPath('training/datasets', f'{evalDataset}.json'))
+        evalData = evalData['train'].shuffle().map(generate_and_tokenize_prompt)
+    # Start prepping the model itself
+    model = prepare_model_for_int8_training(model)
+    config = LoraConfig(
+        r=loraRank,
+        lora_alpha=loraAlpha,
+        # TODO: Should target_modules be configurable?
+        target_modules=[ "q_proj", "v_proj" ],
+        lora_dropout=loraDropout,
+        bias="none",
+        task_type="CAUSAL_LM"
+    )
+    model = get_peft_model(model, config)
+    trainer = transformers.Trainer(
+        model=model,
+        train_dataset=train_data,
+        eval_dataset=evalData,
+        args=transformers.TrainingArguments(
+            per_device_train_batch_size=microBatchSize,
+            gradient_accumulation_steps=gradientAccumulationSteps,
+            # TODO: Should more of these be configurable? Probably.
+            warmup_steps=100,
+            num_train_epochs=epochs,
+            learning_rate=actualLR,
+            fp16=True,
+            logging_steps=20,
+            evaluation_strategy="steps" if evalData is not None else "no",
+            save_strategy="steps",
+            eval_steps=200 if evalData is not None else None,
+            save_steps=200,
+            output_dir=loraName,
+            save_total_limit=3,
+            load_best_model_at_end=True if evalData is not None else False,
+            # TODO: Enable multi-device support
+            ddp_find_unused_parameters=None,
+        ),
+        data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
+    )
+    model.config.use_cache = False
+    old_state_dict = model.state_dict
+    model.state_dict = (
+        lambda self, *_, **__: get_peft_model_state_dict(self, old_state_dict())
+    ).__get__(model, type(model))
+    if torch.__version__ >= "2" and sys.platform != "win32":
+        model = torch.compile(model)
+    # Actually start and run and save at the end
+    trainer.train()
+    model.save_pretrained(loraName)
+    return "Done!"
diff --git a/requirements.txt b/requirements.txt
index e5b3de69..c93ce671 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,4 +10,6 @@ rwkv==0.7.0
 safetensors==0.3.0
 sentencepiece
 tqdm
+peft
+datasets
 git+https://github.com/huggingface/transformers
diff --git a/server.py b/server.py
index f423e368..cd95d5ef 100644
--- a/server.py
+++ b/server.py
@@ -8,10 +8,8 @@ from pathlib import Path
 
 import gradio as gr
 
-import modules.chat as chat
+from modules import chat, shared, ui, training
 import modules.extensions as extensions_module
-import modules.shared as shared
-import modules.ui as ui
 from modules.html_generator import generate_chat_html
 from modules.LoRA import add_lora_to_model
 from modules.models import load_model, load_soft_prompt
@@ -443,6 +441,9 @@ def create_interface():
 
             shared.gradio['reset_interface'].click(set_interface_arguments, [shared.gradio[k] for k in ['interface_modes_menu', 'extensions_menu', 'cmd_arguments_menu']], None)
             shared.gradio['reset_interface'].click(lambda : None, None, None, _js='() => {document.body.innerHTML=\'<h1 style="font-family:monospace;margin-top:20%;color:lightgray;text-align:center;">Reloading...</h1>\'; setTimeout(function(){location.reload()},2500)}')
+        
+        with gr.Tab("Training", elem_id="training-tab"):
+            training.create_train_interface()
 
         if shared.args.extensions is not None:
             extensions_module.create_extensions_block()
diff --git a/training/datasets/put-trainer-datasets-here.txt b/training/datasets/put-trainer-datasets-here.txt
new file mode 100644
index 00000000..e69de29b
diff --git a/training/formats/alpaca-chatbot-format.json b/training/formats/alpaca-chatbot-format.json
new file mode 100644
index 00000000..4b38103f
--- /dev/null
+++ b/training/formats/alpaca-chatbot-format.json
@@ -0,0 +1,4 @@
+{
+    "instruction,output": "User: %instruction%\nAssistant: %output%",
+    "instruction,input,output": "User: %instruction%: %input%\nAssistant: %output%"
+}
diff --git a/training/formats/alpaca-format.json b/training/formats/alpaca-format.json
new file mode 100644
index 00000000..dd6df956
--- /dev/null
+++ b/training/formats/alpaca-format.json
@@ -0,0 +1,4 @@
+{
+    "instruction,output": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n%instruction%\n\n### Response:\n%output%",
+    "instruction,input,output": "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n%instruction%\n\n### Input:\n%input%\n\n### Response:\n%output%"
+}
diff --git a/training/formats/put-trainer-formats-here.txt b/training/formats/put-trainer-formats-here.txt
new file mode 100644
index 00000000..e69de29b

From 7bf601107c1b9aebd5bbbb5d08aa3d20c697daf1 Mon Sep 17 00:00:00 2001
From: "Alex \"mcmonkey\" Goodwin" <git_commits@alexgoodwin.dev>
Date: Sat, 25 Mar 2023 12:28:46 -0700
Subject: [PATCH 09/80] automatically strip empty data entries (for better
 alpaca dataset compat)

---
 modules/training.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/training.py b/modules/training.py
index 96cd6e7c..e2be18e8 100644
--- a/modules/training.py
+++ b/modules/training.py
@@ -74,7 +74,7 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le
         }
     def generate_prompt(data_point: dict[str, str]):
         for options, data in formatData.items():
-            if set(options.split(',')) == set(data_point.keys()):
+            if set(options.split(',')) == set(x[0] for x in data_point.items() if len(x[1].strip()) > 0):
                 for key, val in data_point.items():
                     data = data.replace(f'%{key}%', val)
             return data

From 5c49a0dcd02c3cf2e31a00fdaf554f36895276d7 Mon Sep 17 00:00:00 2001
From: "Alex \"mcmonkey\" Goodwin" <git_commits@alexgoodwin.dev>
Date: Sat, 25 Mar 2023 12:37:32 -0700
Subject: [PATCH 10/80] fix error from prepare call running twice in a row

---
 modules/training.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modules/training.py b/modules/training.py
index e2be18e8..0e210c52 100644
--- a/modules/training.py
+++ b/modules/training.py
@@ -90,7 +90,8 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le
         evalData = load_dataset("json", data_files=cleanPath('training/datasets', f'{evalDataset}.json'))
         evalData = evalData['train'].shuffle().map(generate_and_tokenize_prompt)
     # Start prepping the model itself
-    model = prepare_model_for_int8_training(model)
+    if not hasattr(model, 'lm_head') or hasattr(model.lm_head, 'weight'):
+        model = prepare_model_for_int8_training(model)
     config = LoraConfig(
         r=loraRank,
         lora_alpha=loraAlpha,

From 8da237223ed008c418386a805524929ddebb59ba Mon Sep 17 00:00:00 2001
From: "Alex \"mcmonkey\" Goodwin" <git_commits@alexgoodwin.dev>
Date: Sat, 25 Mar 2023 12:48:35 -0700
Subject: [PATCH 11/80] document options better

---
 modules/training.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/modules/training.py b/modules/training.py
index 0e210c52..250093a0 100644
--- a/modules/training.py
+++ b/modules/training.py
@@ -14,27 +14,27 @@ def get_json_dataset(path: str):
 def create_train_interface():
     with gr.Tab('Train LoRA', elem_id='lora-train-tab'):
         loraName = gr.Textbox(label="Name", info="The name of your new LoRA file")
-        # TODO: Add explanations of batch sizes and recommendations. Note that batch/microBatch determines gradient accumulation and explain what that means. Note the effects on VRAM usage from changing these values.
-        microBatchSize = gr.Slider(label='Micro Batch Size', value=4, minimum=1, maximum=128, step=1, info='(TODO)')
-        batchSize = gr.Slider(label='Batch Size', value=128, minimum=1, maximum=1024, step=4, info='(TODO)')
+        # TODO: Implement multi-device support.
+        microBatchSize = gr.Slider(label='Micro Batch Size', value=4, minimum=1, maximum=128, step=1, info='Per-device batch size (NOTE: multiple devices not yet implemented). Increasing this will increase VRAM usage.')
+        batchSize = gr.Slider(label='Batch Size', value=128, minimum=1, maximum=1024, step=4, info='Global batch size. The two batch sizes together determine gradient accumulation (gradientAccum = batch / microBatch). Higher gradient accum values lead to better quality training.')
         epochs = gr.Slider(label='Epochs', value=1, minimum=1, maximum=1000, info='Number of times every entry in the dataset should be fed into training. So 1 means feed each item in once, 5 means feed it in five times, etc.')
         learningRate = gr.Textbox(label='Learning Rate', value='3e-4', info='Learning rate, in scientific notation. 3e-4 is a good starting base point. 1e-2 is extremely high, 1e-6 is extremely low.')
         # TODO: What is the actual maximum rank? Likely distinct per model. This might be better to somehow be on a log scale.
         loraRank = gr.Slider(label='LoRA Rank', value=8, minimum=1, maximum=1024, step=4, info='LoRA Rank, or dimension count. Higher values produce a larger file with better control over the model\'s content. Smaller values produce a smaller file with less overall control. Small values like 4 or 8 are great for stylistic guidance, high values like 128 or 256 are good for teaching content upgrades. Higher ranks also require higher VRAM.')
         loraAlpha = gr.Slider(label='LoRA Alpha', value=16, minimum=1, maximum=2048, step=4, info='LoRA Alpha. This divided by the rank becomes the scaling of the LoRA. Higher means stronger. A good standard value is twice your Rank.')
-        # TODO: Better explain what this does.
+        # TODO: Better explain what this does, in terms of real world effect especially.
         loraDropout = gr.Slider(label='LoRA Dropout', minimum=0.0, maximum=1.0, step=0.025, value=0.05, info='Percentage probability for dropout of LoRA layers.')
         cutoffLen = gr.Slider(label='Cutoff Length', minimum=1,maximum=2048, value=256, step=32, info='Cutoff length for text input. Essentially, how long of a line of text to feed in at a time. Higher values require drastically more VRAM.')
         with gr.Row():
             datasetFunction = get_json_dataset('training/datasets')
-            dataset = gr.Dropdown(choices=datasetFunction(), value='None', label='Dataset')
+            dataset = gr.Dropdown(choices=datasetFunction(), value='None', label='Dataset', info='The dataset file to use for training.')
             ui.create_refresh_button(dataset, lambda : None, lambda : {'choices': datasetFunction()}, 'refresh-button')
         with gr.Row():
-            evalDataset = gr.Dropdown(choices=datasetFunction(), value='None', label='Evaluation Dataset')
+            evalDataset = gr.Dropdown(choices=datasetFunction(), value='None', label='Evaluation Dataset', info='The dataset file used to evaluate the model after training.')
             ui.create_refresh_button(evalDataset, lambda : None, lambda : {'choices': datasetFunction()}, 'refresh-button')
         with gr.Row():
             formatsFunction = get_json_dataset('training/formats')
-            format = gr.Dropdown(choices=formatsFunction(), value='None', label='Data Format')
+            format = gr.Dropdown(choices=formatsFunction(), value='None', label='Data Format', info='The format file used to decide how to format the dataset input.')
             ui.create_refresh_button(format, lambda : None, lambda : {'choices': formatsFunction()}, 'refresh-button')
         startButton = gr.Button("Start LoRA Training")
         output = gr.Markdown(value="(...)")

From f1ba2196b1a640bd094623120486b847ca59ccf5 Mon Sep 17 00:00:00 2001
From: "Alex \"mcmonkey\" Goodwin" <git_commits@alexgoodwin.dev>
Date: Sat, 25 Mar 2023 12:57:36 -0700
Subject: [PATCH 12/80] make 'model' variables less ambiguous

---
 modules/training.py | 30 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/modules/training.py b/modules/training.py
index 250093a0..f9f0790f 100644
--- a/modules/training.py
+++ b/modules/training.py
@@ -59,15 +59,13 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le
         return "**Missing format choice input, cannot continue.**"
     gradientAccumulationSteps = batchSize // microBatchSize
     actualLR = float(learningRate)
-    model = shared.model
-    tokenizer = shared.tokenizer
-    tokenizer.pad_token = 0
-    tokenizer.padding_side = "left"
+    shared.tokenizer.pad_token = 0
+    shared.tokenizer.padding_side = "left"
     # Prep the dataset, format, etc
     with open(cleanPath('training/formats', f'{format}.json'), 'r') as formatFile:
         formatData: dict[str, str] = json.load(formatFile)
     def tokenize(prompt):
-        result = tokenizer(prompt, truncation=True, max_length=cutoffLen + 1, padding="max_length")
+        result = shared.tokenizer(prompt, truncation=True, max_length=cutoffLen + 1, padding="max_length")
         return {
             "input_ids": result["input_ids"][:-1],
             "attention_mask": result["attention_mask"][:-1],
@@ -90,8 +88,8 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le
         evalData = load_dataset("json", data_files=cleanPath('training/datasets', f'{evalDataset}.json'))
         evalData = evalData['train'].shuffle().map(generate_and_tokenize_prompt)
     # Start prepping the model itself
-    if not hasattr(model, 'lm_head') or hasattr(model.lm_head, 'weight'):
-        model = prepare_model_for_int8_training(model)
+    if not hasattr(shared.model, 'lm_head') or hasattr(shared.model.lm_head, 'weight'):
+        prepare_model_for_int8_training(shared.model)
     config = LoraConfig(
         r=loraRank,
         lora_alpha=loraAlpha,
@@ -101,9 +99,9 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le
         bias="none",
         task_type="CAUSAL_LM"
     )
-    model = get_peft_model(model, config)
+    loraModel = get_peft_model(shared.model, config)
     trainer = transformers.Trainer(
-        model=model,
+        model=loraModel,
         train_dataset=train_data,
         eval_dataset=evalData,
         args=transformers.TrainingArguments(
@@ -125,16 +123,16 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le
             # TODO: Enable multi-device support
             ddp_find_unused_parameters=None,
         ),
-        data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
+        data_collator=transformers.DataCollatorForLanguageModeling(shared.tokenizer, mlm=False),
     )
-    model.config.use_cache = False
-    old_state_dict = model.state_dict
-    model.state_dict = (
+    loraModel.config.use_cache = False
+    old_state_dict = loraModel.state_dict
+    loraModel.state_dict = (
         lambda self, *_, **__: get_peft_model_state_dict(self, old_state_dict())
-    ).__get__(model, type(model))
+    ).__get__(loraModel, type(loraModel))
     if torch.__version__ >= "2" and sys.platform != "win32":
-        model = torch.compile(model)
+        loraModel = torch.compile(loraModel)
     # Actually start and run and save at the end
     trainer.train()
-    model.save_pretrained(loraName)
+    loraModel.save_pretrained(loraName)
     return "Done!"

From 0bac80d9ebf03d91ed5b8f921be03debc3c65cee Mon Sep 17 00:00:00 2001
From: Sean Fitzgerald <sean@fitzgeralds.me>
Date: Sat, 25 Mar 2023 13:08:45 -0700
Subject: [PATCH 13/80] Potential fix for issues/571

---
 modules/text_generation.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/modules/text_generation.py b/modules/text_generation.py
index fd017e2c..eb8f6ca1 100644
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@@ -236,8 +236,6 @@ def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typi
                         break
                     yield formatted_outputs(reply, shared.model_name)
 
-                yield formatted_outputs(reply, shared.model_name)
-
         # Stream the output naively for FlexGen since it doesn't support 'stopping_criteria'
         else:
             for i in range(max_new_tokens//8+1):

From 9ff6a538b6055b6845efd2f0e625386a847945eb Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 26 Mar 2023 22:11:19 -0300
Subject: [PATCH 14/80] Bump gradio version

Make sure to upgrade with

`pip install -r requirements.txt --upgrade`
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index e5b3de69..c84f2948 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 accelerate==0.17.1
 bitsandbytes==0.37.1
 flexgen==0.1.7
-gradio==3.18.0
+gradio==3.23.0
 markdown
 numpy
 peft==0.2.0

From 1c77fdca4cdfca5c636595a8aaaff3281b859d3a Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 26 Mar 2023 22:20:30 -0300
Subject: [PATCH 15/80] Change notebook mode appearance

---
 css/chat.css          |  6 ++++++
 css/main.css          | 10 ++++++++++
 css/main.js           |  2 +-
 modules/extensions.py |  4 ++--
 server.py             | 25 ++++++++++++++-----------
 5 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/css/chat.css b/css/chat.css
index 8d9d88a6..1e703530 100644
--- a/css/chat.css
+++ b/css/chat.css
@@ -23,3 +23,9 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
 .pending.svelte-1ed2p3z {
     opacity: 1;
 }
+
+#extensions {
+  padding: 0;
+  padding: 0;
+}
+
diff --git a/css/main.css b/css/main.css
index 09f3b6a8..97879f01 100644
--- a/css/main.css
+++ b/css/main.css
@@ -54,3 +54,13 @@ ol li p, ul li p {
 .gradio-container-3-18-0 .prose * h1, h2, h3, h4 {
   color: white;
 }
+
+.gradio-container {
+  max-width: 100% !important;
+  padding-top: 0 !important;
+}
+
+#extensions {
+  padding: 15px;
+  padding: 15px;
+}
diff --git a/css/main.js b/css/main.js
index 9db3fe8b..029ecb62 100644
--- a/css/main.js
+++ b/css/main.js
@@ -11,7 +11,7 @@ let extensions = document.getElementById('extensions');
 main_parent.addEventListener('click', function(e) {
     // Check if the main element is visible
     if (main.offsetHeight > 0 && main.offsetWidth > 0) {
-        extensions.style.display = 'block';
+        extensions.style.display = 'flex';
     } else {
         extensions.style.display = 'none';
     }
diff --git a/modules/extensions.py b/modules/extensions.py
index c55dc978..c3cf4de4 100644
--- a/modules/extensions.py
+++ b/modules/extensions.py
@@ -63,8 +63,8 @@ def create_extensions_block():
 
     # Creating the extension ui elements
     if should_display_ui:
-        with gr.Box(elem_id="extensions"):
-            gr.Markdown("Extensions")
+        with gr.Column(elem_id="extensions"):
             for extension, name in iterator():
+                gr.Markdown(f"\n### {name}")
                 if hasattr(extension, "ui"):
                     extension.ui()
diff --git a/server.py b/server.py
index f1b95a5b..56bb499d 100644
--- a/server.py
+++ b/server.py
@@ -369,19 +369,22 @@ def create_interface():
 
         elif shared.args.notebook:
             with gr.Tab("Text generation", elem_id="main"):
-                with gr.Tab('Raw'):
-                    shared.gradio['textbox'] = gr.Textbox(value=default_text, lines=25)
-                with gr.Tab('Markdown'):
-                    shared.gradio['markdown'] = gr.Markdown()
-                with gr.Tab('HTML'):
-                    shared.gradio['html'] = gr.HTML()
-
                 with gr.Row():
-                    shared.gradio['Stop'] = gr.Button('Stop')
-                    shared.gradio['Generate'] = gr.Button('Generate')
-                shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens'])
+                    with gr.Column(scale=4):
+                        with gr.Tab('Raw'):
+                            shared.gradio['textbox'] = gr.Textbox(value=default_text, elem_id="textbox", lines=25)
+                        with gr.Tab('Markdown'):
+                            shared.gradio['markdown'] = gr.Markdown()
+                        with gr.Tab('HTML'):
+                            shared.gradio['html'] = gr.HTML()
 
-                create_model_and_preset_menus()
+                        with gr.Row():
+                            shared.gradio['Stop'] = gr.Button('Stop')
+                            shared.gradio['Generate'] = gr.Button('Generate')
+                    with gr.Column(scale=1):
+                        shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens'])
+
+                        create_model_and_preset_menus()
             with gr.Tab("Parameters", elem_id="parameters"):
                 create_settings_menus(default_preset)
 

From 95c97e1747f277e62db997da73556a94904c1f9c Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 26 Mar 2023 23:47:29 -0300
Subject: [PATCH 16/80] Unload the model using the "Remove all" button

---
 server.py | 27 ++++++++-------------------
 1 file changed, 8 insertions(+), 19 deletions(-)

diff --git a/server.py b/server.py
index 3e31377c..db83b4f3 100644
--- a/server.py
+++ b/server.py
@@ -50,26 +50,20 @@ def get_available_softprompts():
 def get_available_loras():
     return ['None'] + sorted([item.name for item in list(Path('loras/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower)
 
+def unload_model():
+    shared.model = shared.tokenizer = None
+    clear_torch_cache()
+
 def load_model_wrapper(selected_model):
     if selected_model != shared.model_name:
         shared.model_name = selected_model
-        shared.model = shared.tokenizer = None
-        clear_torch_cache()
-        shared.model, shared.tokenizer = load_model(shared.model_name)
+
+        unload_model()
+        if selected_model != '':
+            shared.model, shared.tokenizer = load_model(shared.model_name)
 
     return selected_model
 
-def reload_model():
-    unload_model()
-    shared.model, shared.tokenizer = load_model(shared.model_name)
-
-def unload_model():
-    shared.model = shared.tokenizer = None
-    if not shared.args.cpu:
-        gc.collect()
-        torch.cuda.empty_cache()
-        print("Model weights unloaded.")
-
 def load_lora_wrapper(selected_lora):
     add_lora_to_model(selected_lora)
     default_text = shared.settings['lora_prompts'][next((k for k in shared.settings['lora_prompts'] if re.match(k.lower(), shared.lora_name.lower())), 'default')]
@@ -128,9 +122,6 @@ def create_model_and_preset_menus():
             with gr.Row():
                 shared.gradio['preset_menu'] = gr.Dropdown(choices=available_presets, value=default_preset if not shared.args.flexgen else 'Naive', label='Generation parameters preset')
                 ui.create_refresh_button(shared.gradio['preset_menu'], lambda : None, lambda : {'choices': get_available_presets()}, 'refresh-button')
-    with gr.Row():
-        shared.gradio['unload_model'] = gr.Button(value='Unload model to free VRAM', elem_id="unload_model")
-        shared.gradio['reload_model'] = gr.Button(value='Reload the model into VRAM', elem_id="reload_model")
 
 def create_settings_menus(default_preset):
     generate_params = load_preset_values(default_preset if not shared.args.flexgen else 'Naive', return_dict=True)
@@ -185,8 +176,6 @@ def create_settings_menus(default_preset):
             shared.gradio['upload_softprompt'] = gr.File(type='binary', file_types=['.zip'])
 
     shared.gradio['model_menu'].change(load_model_wrapper, [shared.gradio['model_menu']], [shared.gradio['model_menu']], show_progress=True)
-    shared.gradio['unload_model'].click(fn=unload_model,inputs=[],outputs=[])    
-    shared.gradio['reload_model'].click(fn=reload_model,inputs=[],outputs=[])
     shared.gradio['preset_menu'].change(load_preset_values, [shared.gradio['preset_menu']], [shared.gradio[k] for k in ['preset_menu_mirror', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']])
     shared.gradio['preset_menu_mirror'].change(load_preset_values, [shared.gradio['preset_menu_mirror']], [shared.gradio[k] for k in ['preset_menu', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']])
     shared.gradio['lora_menu'].change(load_lora_wrapper, [shared.gradio['lora_menu']], [shared.gradio['lora_menu'], shared.gradio['textbox']], show_progress=True)

From 3dc61284d581bdf688731cb68f1dc3fa47ae59d1 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 27 Mar 2023 00:04:43 -0300
Subject: [PATCH 17/80] Handle unloading LoRA from dropdown menu icon

---
 modules/LoRA.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/LoRA.py b/modules/LoRA.py
index 283fcf4c..f5dfe4ed 100644
--- a/modules/LoRA.py
+++ b/modules/LoRA.py
@@ -18,11 +18,11 @@ def add_lora_to_model(lora_name):
 
     # If a LoRA had been previously loaded, or if we want
     # to unload a LoRA, reload the model
-    if shared.lora_name != "None" or lora_name == "None":
+    if shared.lora_name not in ['None', ''] or lora_name in ['None', '']:
         reload_model()
     shared.lora_name = lora_name
 
-    if lora_name != "None":
+    if lora_name not in ['None', '']:
         print(f"Adding the LoRA {lora_name} to the model...")
         params = {}
         if not shared.args.cpu:

From 57345b8f30fcaf5eef2a5be2b4c239d51750a6ba Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 27 Mar 2023 12:16:37 -0300
Subject: [PATCH 18/80] Add prompt loading/saving menus + reorganize interface

---
 prompts/Alpaca.txt         |  6 ++++
 prompts/Open Assistant.txt |  1 +
 prompts/QA.txt             |  4 +++
 server.py                  | 67 +++++++++++++++++++++++++++++---------
 4 files changed, 63 insertions(+), 15 deletions(-)
 create mode 100644 prompts/Alpaca.txt
 create mode 100644 prompts/Open Assistant.txt
 create mode 100644 prompts/QA.txt

diff --git a/prompts/Alpaca.txt b/prompts/Alpaca.txt
new file mode 100644
index 00000000..8434a80c
--- /dev/null
+++ b/prompts/Alpaca.txt
@@ -0,0 +1,6 @@
+Below is an instruction that describes a task. Write a response that appropriately completes the request.
+### Instruction:
+Write a poem about the transformers Python library. 
+Mention the word "large language models" in that poem.
+### Response:
+
diff --git a/prompts/Open Assistant.txt b/prompts/Open Assistant.txt
new file mode 100644
index 00000000..cf1ae4a2
--- /dev/null
+++ b/prompts/Open Assistant.txt	
@@ -0,0 +1 @@
+<|prompter|>Write a story about future of AI development<|endoftext|><|assistant|>
diff --git a/prompts/QA.txt b/prompts/QA.txt
new file mode 100644
index 00000000..32b0e235
--- /dev/null
+++ b/prompts/QA.txt
@@ -0,0 +1,4 @@
+Common sense questions and answers
+
+Question: 
+Factual answer:
diff --git a/server.py b/server.py
index db83b4f3..b789ab16 100644
--- a/server.py
+++ b/server.py
@@ -4,6 +4,7 @@ import re
 import sys
 import time
 import zipfile
+from datetime import datetime
 from pathlib import Path
 
 import gradio as gr
@@ -38,6 +39,13 @@ def get_available_models():
 def get_available_presets():
     return sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('presets').glob('*.txt'))), key=str.lower)
 
+def get_available_prompts():
+    prompts = []
+    prompts += sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('prompts').glob('[0-9]*.txt'))), key=str.lower, reverse=True)
+    prompts += sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('prompts').glob('*.txt'))), key=str.lower)
+    prompts += ['None']
+    return prompts
+
 def get_available_characters():
     return ['None'] + sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('characters').glob('*.json'))), key=str.lower)
 
@@ -98,7 +106,7 @@ def load_preset_values(preset_menu, return_dict=False):
     if return_dict:
         return generate_params
     else:
-        return preset_menu, generate_params['do_sample'], generate_params['temperature'], generate_params['top_p'], generate_params['typical_p'], generate_params['repetition_penalty'], generate_params['encoder_repetition_penalty'], generate_params['top_k'], generate_params['min_length'], generate_params['no_repeat_ngram_size'], generate_params['num_beams'], generate_params['penalty_alpha'], generate_params['length_penalty'], generate_params['early_stopping']
+        return generate_params['do_sample'], generate_params['temperature'], generate_params['top_p'], generate_params['typical_p'], generate_params['repetition_penalty'], generate_params['encoder_repetition_penalty'], generate_params['top_k'], generate_params['min_length'], generate_params['no_repeat_ngram_size'], generate_params['num_beams'], generate_params['penalty_alpha'], generate_params['length_penalty'], generate_params['early_stopping']
 
 def upload_soft_prompt(file):
     with zipfile.ZipFile(io.BytesIO(file)) as zf:
@@ -123,9 +131,43 @@ def create_model_and_preset_menus():
                 shared.gradio['preset_menu'] = gr.Dropdown(choices=available_presets, value=default_preset if not shared.args.flexgen else 'Naive', label='Generation parameters preset')
                 ui.create_refresh_button(shared.gradio['preset_menu'], lambda : None, lambda : {'choices': get_available_presets()}, 'refresh-button')
 
+def save_prompt(text):
+    fname = f"{datetime.now().strftime('%Y-%m-%d-%H:%M:%S')}.txt"
+    with open(Path(f'prompts/{fname}'), 'w', encoding='utf-8') as f:
+        f.write(text)
+    return f"Saved prompt to prompts/{fname}"
+
+def load_prompt(fname):
+    if fname in ['None', '']:
+        return ''
+    else:
+        with open(Path(f'prompts/{fname}.txt'), 'r', encoding='utf-8') as f:
+            return f.read()
+        
+def create_prompt_menus():
+    with gr.Row():
+        with gr.Column():
+            with gr.Row():
+                shared.gradio['prompt_menu'] = gr.Dropdown(choices=get_available_prompts(), value='None', label='Prompt')
+                ui.create_refresh_button(shared.gradio['prompt_menu'], lambda : None, lambda : {'choices': get_available_prompts()}, 'refresh-button')
+
+        with gr.Column():
+            with gr.Column():
+                shared.gradio['save_prompt'] = gr.Button('Save prompt')
+                shared.gradio['status'] = gr.Markdown('Ready')
+
+    shared.gradio['prompt_menu'].change(load_prompt, [shared.gradio['prompt_menu']], [shared.gradio['textbox']], show_progress=True)
+    shared.gradio['save_prompt'].click(save_prompt, [shared.gradio['textbox']], [shared.gradio['status']], show_progress=False)
+
 def create_settings_menus(default_preset):
     generate_params = load_preset_values(default_preset if not shared.args.flexgen else 'Naive', return_dict=True)
 
+    with gr.Row():
+        with gr.Column():
+            create_model_and_preset_menus()
+        with gr.Column():
+            shared.gradio['seed'] = gr.Number(value=-1, label='Seed (-1 for random)')
+
     with gr.Row():
         with gr.Column():
             with gr.Box():
@@ -156,12 +198,6 @@ def create_settings_menus(default_preset):
                         shared.gradio['length_penalty'] = gr.Slider(-5, 5, value=generate_params['length_penalty'], label='length_penalty')
                 shared.gradio['early_stopping'] = gr.Checkbox(value=generate_params['early_stopping'], label='early_stopping')
 
-            shared.gradio['seed'] = gr.Number(value=-1, label='Seed (-1 for random)')
-
-    with gr.Row():
-        shared.gradio['preset_menu_mirror'] = gr.Dropdown(choices=available_presets, value=default_preset if not shared.args.flexgen else 'Naive', label='Generation parameters preset')
-        ui.create_refresh_button(shared.gradio['preset_menu_mirror'], lambda : None, lambda : {'choices': get_available_presets()}, 'refresh-button')
-
     with gr.Row():
         shared.gradio['lora_menu'] = gr.Dropdown(choices=available_loras, value=shared.lora_name, label='LoRA')
         ui.create_refresh_button(shared.gradio['lora_menu'], lambda : None, lambda : {'choices': get_available_loras()}, 'refresh-button')
@@ -176,8 +212,7 @@ def create_settings_menus(default_preset):
             shared.gradio['upload_softprompt'] = gr.File(type='binary', file_types=['.zip'])
 
     shared.gradio['model_menu'].change(load_model_wrapper, [shared.gradio['model_menu']], [shared.gradio['model_menu']], show_progress=True)
-    shared.gradio['preset_menu'].change(load_preset_values, [shared.gradio['preset_menu']], [shared.gradio[k] for k in ['preset_menu_mirror', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']])
-    shared.gradio['preset_menu_mirror'].change(load_preset_values, [shared.gradio['preset_menu_mirror']], [shared.gradio[k] for k in ['preset_menu', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']])
+    shared.gradio['preset_menu'].change(load_preset_values, [shared.gradio['preset_menu']], [shared.gradio[k] for k in ['do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']])
     shared.gradio['lora_menu'].change(load_lora_wrapper, [shared.gradio['lora_menu']], [shared.gradio['lora_menu'], shared.gradio['textbox']], show_progress=True)
     shared.gradio['softprompts_menu'].change(load_soft_prompt, [shared.gradio['softprompts_menu']], [shared.gradio['softprompts_menu']], show_progress=True)
     shared.gradio['upload_softprompt'].upload(upload_soft_prompt, [shared.gradio['upload_softprompt']], [shared.gradio['softprompts_menu']])
@@ -265,8 +300,8 @@ def create_interface():
                     shared.gradio['display'] = gr.Chatbot(value=shared.history['visible']).style(color_map=("#326efd", "#212528"))
                 shared.gradio['textbox'] = gr.Textbox(label='Input')
                 with gr.Row():
-                    shared.gradio['Stop'] = gr.Button('Stop', elem_id="stop")
                     shared.gradio['Generate'] = gr.Button('Generate')
+                    shared.gradio['Stop'] = gr.Button('Stop', elem_id="stop")
                 with gr.Row():
                     shared.gradio['Impersonate'] = gr.Button('Impersonate')
                     shared.gradio['Regenerate'] = gr.Button('Regenerate')
@@ -279,8 +314,6 @@ def create_interface():
                     shared.gradio['Clear history-confirm'] = gr.Button('Confirm', variant="stop", visible=False)
                     shared.gradio['Clear history-cancel'] = gr.Button('Cancel', visible=False)
 
-                create_model_and_preset_menus()
-
             with gr.Tab("Character", elem_id="chat-settings"):
                 shared.gradio['name1'] = gr.Textbox(value=shared.settings[f'name1{suffix}'], lines=1, label='Your name')
                 shared.gradio['name2'] = gr.Textbox(value=shared.settings[f'name2{suffix}'], lines=1, label='Bot\'s name')
@@ -384,12 +417,15 @@ def create_interface():
                             shared.gradio['html'] = gr.HTML()
 
                         with gr.Row():
-                            shared.gradio['Stop'] = gr.Button('Stop')
                             shared.gradio['Generate'] = gr.Button('Generate')
+                            shared.gradio['Stop'] = gr.Button('Stop')
+
                     with gr.Column(scale=1):
+                        gr.Markdown("\n")
                         shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens'])
 
-                        create_model_and_preset_menus()
+                        create_prompt_menus()
+
             with gr.Tab("Parameters", elem_id="parameters"):
                 create_settings_menus(default_preset)
 
@@ -413,7 +449,7 @@ def create_interface():
                             with gr.Column():
                                 shared.gradio['Stop'] = gr.Button('Stop')
 
-                        create_model_and_preset_menus()
+                        create_prompt_menus()
 
                     with gr.Column():
                         with gr.Tab('Raw'):
@@ -422,6 +458,7 @@ def create_interface():
                             shared.gradio['markdown'] = gr.Markdown()
                         with gr.Tab('HTML'):
                             shared.gradio['html'] = gr.HTML()
+
             with gr.Tab("Parameters", elem_id="parameters"):
                 create_settings_menus(default_preset)
 

From 8e2d94a5a1a8252131715d7dfe068fc8e49d9aaf Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 27 Mar 2023 12:21:19 -0300
Subject: [PATCH 19/80] Add saved promtps to gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 36852916..a9c47a5a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,3 +19,4 @@ repositories
 settings.json
 img_bot*
 img_me*
+prompts/[0-9]*

From 202e981d00755e45850e3f428e24dc9e8be75b0c Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 27 Mar 2023 12:30:57 -0300
Subject: [PATCH 20/80] Make Generate/Stop buttons smaller in notebook mode

---
 server.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/server.py b/server.py
index b789ab16..27525e43 100644
--- a/server.py
+++ b/server.py
@@ -417,8 +417,12 @@ def create_interface():
                             shared.gradio['html'] = gr.HTML()
 
                         with gr.Row():
-                            shared.gradio['Generate'] = gr.Button('Generate')
-                            shared.gradio['Stop'] = gr.Button('Stop')
+                            with gr.Column():
+                                with gr.Row():
+                                    shared.gradio['Generate'] = gr.Button('Generate')
+                                    shared.gradio['Stop'] = gr.Button('Stop')
+                            with gr.Column():
+                                pass
 
                     with gr.Column(scale=1):
                         gr.Markdown("\n")

From d911c22af9019312eb05f3981ffee22c7243f1d8 Mon Sep 17 00:00:00 2001
From: "Alex \"mcmonkey\" Goodwin" <git_commits@alexgoodwin.dev>
Date: Mon, 27 Mar 2023 08:31:49 -0700
Subject: [PATCH 21/80] use shared rows to make the LoRA Trainer interface a
 bit more compact / clean

---
 modules/training.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/modules/training.py b/modules/training.py
index f9f0790f..aa085fda 100644
--- a/modules/training.py
+++ b/modules/training.py
@@ -14,11 +14,13 @@ def get_json_dataset(path: str):
 def create_train_interface():
     with gr.Tab('Train LoRA', elem_id='lora-train-tab'):
         loraName = gr.Textbox(label="Name", info="The name of your new LoRA file")
-        # TODO: Implement multi-device support.
-        microBatchSize = gr.Slider(label='Micro Batch Size', value=4, minimum=1, maximum=128, step=1, info='Per-device batch size (NOTE: multiple devices not yet implemented). Increasing this will increase VRAM usage.')
-        batchSize = gr.Slider(label='Batch Size', value=128, minimum=1, maximum=1024, step=4, info='Global batch size. The two batch sizes together determine gradient accumulation (gradientAccum = batch / microBatch). Higher gradient accum values lead to better quality training.')
-        epochs = gr.Slider(label='Epochs', value=1, minimum=1, maximum=1000, info='Number of times every entry in the dataset should be fed into training. So 1 means feed each item in once, 5 means feed it in five times, etc.')
-        learningRate = gr.Textbox(label='Learning Rate', value='3e-4', info='Learning rate, in scientific notation. 3e-4 is a good starting base point. 1e-2 is extremely high, 1e-6 is extremely low.')
+        with gr.Row():
+            # TODO: Implement multi-device support.
+            microBatchSize = gr.Slider(label='Micro Batch Size', value=4, minimum=1, maximum=128, step=1, info='Per-device batch size (NOTE: multiple devices not yet implemented). Increasing this will increase VRAM usage.')
+            batchSize = gr.Slider(label='Batch Size', value=128, minimum=1, maximum=1024, step=4, info='Global batch size. The two batch sizes together determine gradient accumulation (gradientAccum = batch / microBatch). Higher gradient accum values lead to better quality training.')
+        with gr.Row():
+            epochs = gr.Number(label='Epochs', value=1, minimum=1, maximum=1000, info='Number of times every entry in the dataset should be fed into training. So 1 means feed each item in once, 5 means feed it in five times, etc.')
+            learningRate = gr.Textbox(label='Learning Rate', value='3e-4', info='Learning rate, in scientific notation. 3e-4 is a good starting base point. 1e-2 is extremely high, 1e-6 is extremely low.')
         # TODO: What is the actual maximum rank? Likely distinct per model. This might be better to somehow be on a log scale.
         loraRank = gr.Slider(label='LoRA Rank', value=8, minimum=1, maximum=1024, step=4, info='LoRA Rank, or dimension count. Higher values produce a larger file with better control over the model\'s content. Smaller values produce a smaller file with less overall control. Small values like 4 or 8 are great for stylistic guidance, high values like 128 or 256 are good for teaching content upgrades. Higher ranks also require higher VRAM.')
         loraAlpha = gr.Slider(label='LoRA Alpha', value=16, minimum=1, maximum=2048, step=4, info='LoRA Alpha. This divided by the rank becomes the scaling of the LoRA. Higher means stronger. A good standard value is twice your Rank.')
@@ -29,10 +31,8 @@ def create_train_interface():
             datasetFunction = get_json_dataset('training/datasets')
             dataset = gr.Dropdown(choices=datasetFunction(), value='None', label='Dataset', info='The dataset file to use for training.')
             ui.create_refresh_button(dataset, lambda : None, lambda : {'choices': datasetFunction()}, 'refresh-button')
-        with gr.Row():
             evalDataset = gr.Dropdown(choices=datasetFunction(), value='None', label='Evaluation Dataset', info='The dataset file used to evaluate the model after training.')
             ui.create_refresh_button(evalDataset, lambda : None, lambda : {'choices': datasetFunction()}, 'refresh-button')
-        with gr.Row():
             formatsFunction = get_json_dataset('training/formats')
             format = gr.Dropdown(choices=formatsFunction(), value='None', label='Data Format', info='The format file used to decide how to format the dataset input.')
             ui.create_refresh_button(format, lambda : None, lambda : {'choices': formatsFunction()}, 'refresh-button')

From 2afe1c13c143dd3e8c2d63c15fb8c1ef59895448 Mon Sep 17 00:00:00 2001
From: "Alex \"mcmonkey\" Goodwin" <git_commits@alexgoodwin.dev>
Date: Mon, 27 Mar 2023 08:32:32 -0700
Subject: [PATCH 22/80] move Training to before Interface mode

as Interface Mode seems to be a core 'settings' page that naturally belongs at the very end
---
 server.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/server.py b/server.py
index 03158ac6..0e512c7b 100644
--- a/server.py
+++ b/server.py
@@ -468,6 +468,9 @@ def create_interface():
             shared.gradio['Stop'].click(None, None, None, cancels=gen_events)
             shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.main_js}}}")
 
+        with gr.Tab("Training", elem_id="training-tab"):
+            training.create_train_interface()
+
         with gr.Tab("Interface mode", elem_id="interface-mode"):
             modes = ["default", "notebook", "chat", "cai_chat"]
             current_mode = "default"
@@ -488,9 +491,6 @@ def create_interface():
             shared.gradio['reset_interface'].click(set_interface_arguments, [shared.gradio[k] for k in ['interface_modes_menu', 'extensions_menu', 'cmd_arguments_menu']], None)
             shared.gradio['reset_interface'].click(lambda : None, None, None, _js='() => {document.body.innerHTML=\'<h1 style="font-family:monospace;margin-top:20%;color:lightgray;text-align:center;">Reloading...</h1>\'; setTimeout(function(){location.reload()},2500)}')
         
-        with gr.Tab("Training", elem_id="training-tab"):
-            training.create_train_interface()
-
         if shared.args.extensions is not None:
             extensions_module.create_extensions_block()
 

From 572bafcd24099553cd432e4a695a20050386f8c9 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 27 Mar 2023 12:43:37 -0300
Subject: [PATCH 23/80] Less verbose message

---
 server.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/server.py b/server.py
index 27525e43..020093ee 100644
--- a/server.py
+++ b/server.py
@@ -135,7 +135,7 @@ def save_prompt(text):
     fname = f"{datetime.now().strftime('%Y-%m-%d-%H:%M:%S')}.txt"
     with open(Path(f'prompts/{fname}'), 'w', encoding='utf-8') as f:
         f.write(text)
-    return f"Saved prompt to prompts/{fname}"
+    return f"Saved to prompts/{fname}"
 
 def load_prompt(fname):
     if fname in ['None', '']:

From addb9777f9130f6cef773db68992bfac7cff8058 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 27 Mar 2023 12:59:07 -0300
Subject: [PATCH 24/80] Increase size of GALACTICA equations

---
 css/main.css | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/css/main.css b/css/main.css
index 97879f01..3f044094 100644
--- a/css/main.css
+++ b/css/main.css
@@ -37,12 +37,6 @@
   text-decoration: none !important;
 }
 
-svg {
-  display: unset !important;
-  vertical-align: middle !important;
-  margin: 5px;
-}
-
 ol li p, ul li p {
     display: inline-block;
 }
@@ -64,3 +58,8 @@ ol li p, ul li p {
   padding: 15px;
   padding: 15px;
 }
+
+span.math.inline {
+  font-size: 27px;
+  vertical-align: baseline !important;
+}

From af65c129008c7b84a933247867673575de07ae33 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 27 Mar 2023 13:23:59 -0300
Subject: [PATCH 25/80] Change Stop button behavior

---
 modules/callbacks.py       | 2 +-
 modules/chat.py            | 4 ----
 modules/text_generation.py | 4 ++++
 server.py                  | 9 +++++----
 4 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/modules/callbacks.py b/modules/callbacks.py
index 8d30d615..d85f406d 100644
--- a/modules/callbacks.py
+++ b/modules/callbacks.py
@@ -54,7 +54,7 @@ class Iteratorize:
         self.stop_now = False
 
         def _callback(val):
-            if self.stop_now:
+            if self.stop_now or shared.stop_everything:
                 raise ValueError
             self.q.put(val)
 
diff --git a/modules/chat.py b/modules/chat.py
index 1a43cf3d..cc3c45c7 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -80,11 +80,7 @@ def extract_message_from_reply(reply, name1, name2, check):
     reply = fix_newlines(reply)
     return reply, next_character_found
 
-def stop_everything_event():
-    shared.stop_everything = True
-
 def chatbot_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, encoder_repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, seed, name1, name2, context, check, chat_prompt_size, chat_generation_attempts=1, regenerate=False):
-    shared.stop_everything = False
     just_started = True
     eos_token = '\n' if check else None
     name1_original = name1
diff --git a/modules/text_generation.py b/modules/text_generation.py
index 9b2c233d..477257c2 100644
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@@ -99,9 +99,13 @@ def set_manual_seed(seed):
         if torch.cuda.is_available():
             torch.cuda.manual_seed_all(seed)
 
+def stop_everything_event():
+    shared.stop_everything = True
+
 def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, encoder_repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, seed, eos_token=None, stopping_strings=[]):
     clear_torch_cache()
     set_manual_seed(seed)
+    shared.stop_everything = False
     t0 = time.time()
 
     original_question = question
diff --git a/server.py b/server.py
index 020093ee..9f90c79b 100644
--- a/server.py
+++ b/server.py
@@ -16,7 +16,8 @@ import modules.ui as ui
 from modules.html_generator import generate_chat_html
 from modules.LoRA import add_lora_to_model
 from modules.models import load_model, load_soft_prompt
-from modules.text_generation import clear_torch_cache, generate_reply
+from modules.text_generation import (clear_torch_cache, generate_reply,
+                                     stop_everything_event)
 
 # Loading custom settings
 settings_file = None
@@ -366,7 +367,7 @@ def create_interface():
             gen_events.append(shared.gradio['textbox'].submit(eval(function_call), shared.input_params, shared.gradio['display'], show_progress=shared.args.no_stream))
             gen_events.append(shared.gradio['Regenerate'].click(chat.regenerate_wrapper, shared.input_params, shared.gradio['display'], show_progress=shared.args.no_stream))
             gen_events.append(shared.gradio['Impersonate'].click(chat.impersonate_wrapper, shared.input_params, shared.gradio['textbox'], show_progress=shared.args.no_stream))
-            shared.gradio['Stop'].click(chat.stop_everything_event, [], [], cancels=gen_events, queue=False)
+            shared.gradio['Stop'].click(stop_everything_event, [], [], queue=False, cancels=gen_events if shared.args.no_stream else None)
 
             shared.gradio['Copy last reply'].click(chat.send_last_reply_to_input, [], shared.gradio['textbox'], show_progress=shared.args.no_stream)
             shared.gradio['Replace last reply'].click(chat.replace_last_reply, [shared.gradio['textbox'], shared.gradio['name1'], shared.gradio['name2']], shared.gradio['display'], show_progress=shared.args.no_stream)
@@ -437,7 +438,7 @@ def create_interface():
             output_params = [shared.gradio[k] for k in ['textbox', 'markdown', 'html']]
             gen_events.append(shared.gradio['Generate'].click(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream, api_name='textgen'))
             gen_events.append(shared.gradio['textbox'].submit(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream))
-            shared.gradio['Stop'].click(None, None, None, cancels=gen_events)
+            shared.gradio['Stop'].click(stop_everything_event, [], [], queue=False, cancels=gen_events if shared.args.no_stream else None)
             shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.main_js}}}")
 
         else:
@@ -471,7 +472,7 @@ def create_interface():
             gen_events.append(shared.gradio['Generate'].click(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream, api_name='textgen'))
             gen_events.append(shared.gradio['textbox'].submit(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream))
             gen_events.append(shared.gradio['Continue'].click(generate_reply, [shared.gradio['output_textbox']] + shared.input_params[1:], output_params, show_progress=shared.args.no_stream))
-            shared.gradio['Stop'].click(None, None, None, cancels=gen_events)
+            shared.gradio['Stop'].click(stop_everything_event, [], [], queue=False, cancels=gen_events if shared.args.no_stream else None)
             shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.main_js}}}")
 
         with gr.Tab("Interface mode", elem_id="interface-mode"):

From c07bcd0850ce0312826f6195450a3b04eb1788f8 Mon Sep 17 00:00:00 2001
From: "Alex \"mcmonkey\" Goodwin" <git_commits@alexgoodwin.dev>
Date: Mon, 27 Mar 2023 09:41:06 -0700
Subject: [PATCH 26/80] add some outputs to indicate progress updates (sorta)

Actual progressbar still needed. Also minor formatting fixes.
---
 modules/training.py | 15 ++++++++++++---
 server.py           |  2 +-
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/modules/training.py b/modules/training.py
index aa085fda..b9f3d192 100644
--- a/modules/training.py
+++ b/modules/training.py
@@ -19,7 +19,7 @@ def create_train_interface():
             microBatchSize = gr.Slider(label='Micro Batch Size', value=4, minimum=1, maximum=128, step=1, info='Per-device batch size (NOTE: multiple devices not yet implemented). Increasing this will increase VRAM usage.')
             batchSize = gr.Slider(label='Batch Size', value=128, minimum=1, maximum=1024, step=4, info='Global batch size. The two batch sizes together determine gradient accumulation (gradientAccum = batch / microBatch). Higher gradient accum values lead to better quality training.')
         with gr.Row():
-            epochs = gr.Number(label='Epochs', value=1, minimum=1, maximum=1000, info='Number of times every entry in the dataset should be fed into training. So 1 means feed each item in once, 5 means feed it in five times, etc.')
+            epochs = gr.Number(label='Epochs', value=1, info='Number of times every entry in the dataset should be fed into training. So 1 means feed each item in once, 5 means feed it in five times, etc.')
             learningRate = gr.Textbox(label='Learning Rate', value='3e-4', info='Learning rate, in scientific notation. 3e-4 is a good starting base point. 1e-2 is extremely high, 1e-6 is extremely low.')
         # TODO: What is the actual maximum rank? Likely distinct per model. This might be better to somehow be on a log scale.
         loraRank = gr.Slider(label='LoRA Rank', value=8, minimum=1, maximum=1024, step=4, info='LoRA Rank, or dimension count. Higher values produce a larger file with better control over the model\'s content. Smaller values produce a smaller file with less overall control. Small values like 4 or 8 are great for stylistic guidance, high values like 128 or 256 are good for teaching content upgrades. Higher ranks also require higher VRAM.')
@@ -50,6 +50,7 @@ def cleanPath(basePath: str, path: str):
     return f'{Path(basePath).absolute()}/{path}'
 
 def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, learningRate: float, loraRank: int, loraAlpha: int, loraDropout: float, cutoffLen: int, dataset: str, evalDataset: str, format: str):
+    yield "Prepping..."
     # Input validation / processing
     # TODO: --lora-dir PR once pulled will need to be applied here
     loraName = f"loras/{cleanPath(None, loraName)}"
@@ -80,6 +81,7 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le
     def generate_and_tokenize_prompt(data_point):
         prompt = generate_prompt(data_point)
         return tokenize(prompt)
+    print("Loading datasets...")
     data = load_dataset("json", data_files=cleanPath('training/datasets', f'{dataset}.json'))
     train_data = data['train'].shuffle().map(generate_and_tokenize_prompt)
     if evalDataset == 'None':
@@ -89,7 +91,9 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le
         evalData = evalData['train'].shuffle().map(generate_and_tokenize_prompt)
     # Start prepping the model itself
     if not hasattr(shared.model, 'lm_head') or hasattr(shared.model.lm_head, 'weight'):
+        print("Getting model ready...")
         prepare_model_for_int8_training(shared.model)
+    print("Prepping for training...")
     config = LoraConfig(
         r=loraRank,
         lora_alpha=loraAlpha,
@@ -121,7 +125,7 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le
             save_total_limit=3,
             load_best_model_at_end=True if evalData is not None else False,
             # TODO: Enable multi-device support
-            ddp_find_unused_parameters=None,
+            ddp_find_unused_parameters=None
         ),
         data_collator=transformers.DataCollatorForLanguageModeling(shared.tokenizer, mlm=False),
     )
@@ -133,6 +137,11 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le
     if torch.__version__ >= "2" and sys.platform != "win32":
         loraModel = torch.compile(loraModel)
     # Actually start and run and save at the end
+    # TODO: save/load checkpoints to resume from?
+    print("Starting training...")
+    yield "Running..."
     trainer.train()
+    print("Training complete, saving...")
     loraModel.save_pretrained(loraName)
-    return "Done!"
+    print("Training complete!")
+    yield f"Done! Lora saved to `{loraName}`"
diff --git a/server.py b/server.py
index 0e512c7b..caca85c9 100644
--- a/server.py
+++ b/server.py
@@ -490,7 +490,7 @@ def create_interface():
 
             shared.gradio['reset_interface'].click(set_interface_arguments, [shared.gradio[k] for k in ['interface_modes_menu', 'extensions_menu', 'cmd_arguments_menu']], None)
             shared.gradio['reset_interface'].click(lambda : None, None, None, _js='() => {document.body.innerHTML=\'<h1 style="font-family:monospace;margin-top:20%;color:lightgray;text-align:center;">Reloading...</h1>\'; setTimeout(function(){location.reload()},2500)}')
-        
+
         if shared.args.extensions is not None:
             extensions_module.create_extensions_block()
 

From 268abd1cbabf00ca841efc85211428b3a7f54680 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 27 Mar 2023 13:52:12 -0300
Subject: [PATCH 27/80] Add some space in notebook mode

---
 server.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/server.py b/server.py
index 9f90c79b..36083cc1 100644
--- a/server.py
+++ b/server.py
@@ -426,7 +426,7 @@ def create_interface():
                                 pass
 
                     with gr.Column(scale=1):
-                        gr.Markdown("\n")
+                        gr.HTML('<div style="padding-bottom: 13px"></div>')
                         shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens'])
 
                         create_prompt_menus()

From 8fc723fc95d82755ae9280d9a8fe8b6feb804b1e Mon Sep 17 00:00:00 2001
From: "Alex \"mcmonkey\" Goodwin" <git_commits@alexgoodwin.dev>
Date: Mon, 27 Mar 2023 10:25:08 -0700
Subject: [PATCH 28/80] initial progress tracker in UI

---
 modules/training.py | 48 +++++++++++++++++++++++++++++++++++++--------
 1 file changed, 40 insertions(+), 8 deletions(-)

diff --git a/modules/training.py b/modules/training.py
index b9f3d192..c83427d6 100644
--- a/modules/training.py
+++ b/modules/training.py
@@ -1,4 +1,4 @@
-import sys, torch, json
+import sys, torch, json, threading, time
 from pathlib import Path
 import gradio as gr
 from datasets import load_dataset
@@ -6,6 +6,9 @@ import transformers
 from modules import ui, shared
 from peft import prepare_model_for_int8_training, LoraConfig, get_peft_model, get_peft_model_state_dict
 
+CURRENT_STEPS = 0
+MAX_STEPS = 0
+
 def get_json_dataset(path: str):
     def get_set():
         return ['None'] + sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path(path).glob('*.json'))), key=str.lower)
@@ -40,6 +43,12 @@ def create_train_interface():
         output = gr.Markdown(value="(...)")
         startButton.click(do_train, [loraName, microBatchSize, batchSize, epochs, learningRate, loraRank, loraAlpha, loraDropout, cutoffLen, dataset, evalDataset, format], [output])
 
+class Callbacks(transformers.TrainerCallback):
+    def on_step_begin(self, args: transformers.TrainingArguments, state: transformers.TrainerState, control: transformers.TrainerControl, **kwargs):
+        global CURRENT_STEPS, MAX_STEPS
+        CURRENT_STEPS = state.global_step
+        MAX_STEPS = state.max_steps
+
 def cleanPath(basePath: str, path: str):
     """"Strips unusual symbols and forcibly builds a path as relative to the intended directory."""
     # TODO: Probably could do with a security audit to guarantee there's no ways this can be bypassed to target an unwanted path.
@@ -50,8 +59,11 @@ def cleanPath(basePath: str, path: str):
     return f'{Path(basePath).absolute()}/{path}'
 
 def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, learningRate: float, loraRank: int, loraAlpha: int, loraDropout: float, cutoffLen: int, dataset: str, evalDataset: str, format: str):
+    global CURRENT_STEPS, MAX_STEPS
+    CURRENT_STEPS = 0
+    MAX_STEPS = 0
     yield "Prepping..."
-    # Input validation / processing
+    # == Input validation / processing ==
     # TODO: --lora-dir PR once pulled will need to be applied here
     loraName = f"loras/{cleanPath(None, loraName)}"
     if dataset is None:
@@ -62,7 +74,7 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le
     actualLR = float(learningRate)
     shared.tokenizer.pad_token = 0
     shared.tokenizer.padding_side = "left"
-    # Prep the dataset, format, etc
+    # == Prep the dataset, format, etc ==
     with open(cleanPath('training/formats', f'{format}.json'), 'r') as formatFile:
         formatData: dict[str, str] = json.load(formatFile)
     def tokenize(prompt):
@@ -89,7 +101,7 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le
     else:
         evalData = load_dataset("json", data_files=cleanPath('training/datasets', f'{evalDataset}.json'))
         evalData = evalData['train'].shuffle().map(generate_and_tokenize_prompt)
-    # Start prepping the model itself
+    # == Start prepping the model itself ==
     if not hasattr(shared.model, 'lm_head') or hasattr(shared.model.lm_head, 'weight'):
         print("Getting model ready...")
         prepare_model_for_int8_training(shared.model)
@@ -128,6 +140,7 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le
             ddp_find_unused_parameters=None
         ),
         data_collator=transformers.DataCollatorForLanguageModeling(shared.tokenizer, mlm=False),
+        callbacks=list([Callbacks()])
     )
     loraModel.config.use_cache = False
     old_state_dict = loraModel.state_dict
@@ -136,12 +149,31 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le
     ).__get__(loraModel, type(loraModel))
     if torch.__version__ >= "2" and sys.platform != "win32":
         loraModel = torch.compile(loraModel)
-    # Actually start and run and save at the end
+    # == Main run and monitor loop ==
     # TODO: save/load checkpoints to resume from?
     print("Starting training...")
-    yield "Running..."
-    trainer.train()
+    yield "Starting..."
+    def threadedRun():
+        trainer.train()
+    thread = threading.Thread(target=threadedRun)
+    thread.start()
+    lastStep = 0
+    startTime = time.perf_counter()
+    while thread.is_alive():
+        time.sleep(0.5)
+        if CURRENT_STEPS != lastStep:
+            lastStep = CURRENT_STEPS
+            timeElapsed = time.perf_counter() - startTime
+            if timeElapsed <= 0:
+                timerInfo = ""
+            else:
+                its = CURRENT_STEPS / timeElapsed
+                if its > 1:
+                    timerInfo = f"`{its:.2f}` it/s"
+                else:
+                    timerInfo = f"`{1.0/its:.2f}` s/it"
+            yield f"Running... **{CURRENT_STEPS}** / **{MAX_STEPS}** ... {timerInfo}, `{timeElapsed:.1f}` seconds"
     print("Training complete, saving...")
     loraModel.save_pretrained(loraName)
     print("Training complete!")
-    yield f"Done! Lora saved to `{loraName}`"
+    yield f"Done! LoRA saved to `{loraName}`"

From 16ea4fc36df9ec0cde796eaecf22db64c4d91fd8 Mon Sep 17 00:00:00 2001
From: "Alex \"mcmonkey\" Goodwin" <git_commits@alexgoodwin.dev>
Date: Mon, 27 Mar 2023 10:43:01 -0700
Subject: [PATCH 29/80] interrupt button

---
 modules/training.py | 42 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 34 insertions(+), 8 deletions(-)

diff --git a/modules/training.py b/modules/training.py
index c83427d6..19f33220 100644
--- a/modules/training.py
+++ b/modules/training.py
@@ -6,8 +6,10 @@ import transformers
 from modules import ui, shared
 from peft import prepare_model_for_int8_training, LoraConfig, get_peft_model, get_peft_model_state_dict
 
+WANT_INTERRUPT = False
 CURRENT_STEPS = 0
 MAX_STEPS = 0
+CURRENT_GRADIENT_ACCUM = 1
 
 def get_json_dataset(path: str):
     def get_set():
@@ -39,15 +41,31 @@ def create_train_interface():
             formatsFunction = get_json_dataset('training/formats')
             format = gr.Dropdown(choices=formatsFunction(), value='None', label='Data Format', info='The format file used to decide how to format the dataset input.')
             ui.create_refresh_button(format, lambda : None, lambda : {'choices': formatsFunction()}, 'refresh-button')
-        startButton = gr.Button("Start LoRA Training")
+        with gr.Row():
+            startButton = gr.Button("Start LoRA Training")
+            stopButton = gr.Button("Interrupt")
         output = gr.Markdown(value="(...)")
-        startButton.click(do_train, [loraName, microBatchSize, batchSize, epochs, learningRate, loraRank, loraAlpha, loraDropout, cutoffLen, dataset, evalDataset, format], [output])
+        startEvent = startButton.click(do_train, [loraName, microBatchSize, batchSize, epochs, learningRate, loraRank, loraAlpha, loraDropout, cutoffLen, dataset, evalDataset, format], [output])
+        stopButton.click(doInterrupt, [], [], cancels=[], queue=False)
+
+def doInterrupt():
+    global WANT_INTERRUPT
+    WANT_INTERRUPT = True
 
 class Callbacks(transformers.TrainerCallback):
     def on_step_begin(self, args: transformers.TrainingArguments, state: transformers.TrainerState, control: transformers.TrainerControl, **kwargs):
         global CURRENT_STEPS, MAX_STEPS
-        CURRENT_STEPS = state.global_step
-        MAX_STEPS = state.max_steps
+        CURRENT_STEPS = state.global_step * CURRENT_GRADIENT_ACCUM
+        MAX_STEPS = state.max_steps * CURRENT_GRADIENT_ACCUM
+        if WANT_INTERRUPT:
+            control.should_epoch_stop = True
+            control.should_training_stop = True
+    def on_substep_end(self, args: transformers.TrainingArguments, state: transformers.TrainerState, control: transformers.TrainerControl, **kwargs):
+        global CURRENT_STEPS
+        CURRENT_STEPS += 1
+        if WANT_INTERRUPT:
+            control.should_epoch_stop = True
+            control.should_training_stop = True
 
 def cleanPath(basePath: str, path: str):
     """"Strips unusual symbols and forcibly builds a path as relative to the intended directory."""
@@ -59,7 +77,8 @@ def cleanPath(basePath: str, path: str):
     return f'{Path(basePath).absolute()}/{path}'
 
 def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, learningRate: float, loraRank: int, loraAlpha: int, loraDropout: float, cutoffLen: int, dataset: str, evalDataset: str, format: str):
-    global CURRENT_STEPS, MAX_STEPS
+    global WANT_INTERRUPT, CURRENT_STEPS, MAX_STEPS, CURRENT_GRADIENT_ACCUM
+    WANT_INTERRUPT = False
     CURRENT_STEPS = 0
     MAX_STEPS = 0
     yield "Prepping..."
@@ -71,6 +90,7 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le
     if format is None:
         return "**Missing format choice input, cannot continue.**"
     gradientAccumulationSteps = batchSize // microBatchSize
+    CURRENT_GRADIENT_ACCUM = gradientAccumulationSteps
     actualLR = float(learningRate)
     shared.tokenizer.pad_token = 0
     shared.tokenizer.padding_side = "left"
@@ -161,7 +181,9 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le
     startTime = time.perf_counter()
     while thread.is_alive():
         time.sleep(0.5)
-        if CURRENT_STEPS != lastStep:
+        if WANT_INTERRUPT:
+            yield "Interrupting, please wait... *(Run will stop after the current training step completes.)*"
+        elif CURRENT_STEPS != lastStep:
             lastStep = CURRENT_STEPS
             timeElapsed = time.perf_counter() - startTime
             if timeElapsed <= 0:
@@ -175,5 +197,9 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le
             yield f"Running... **{CURRENT_STEPS}** / **{MAX_STEPS}** ... {timerInfo}, `{timeElapsed:.1f}` seconds"
     print("Training complete, saving...")
     loraModel.save_pretrained(loraName)
-    print("Training complete!")
-    yield f"Done! LoRA saved to `{loraName}`"
+    if WANT_INTERRUPT:
+        print("Training interrupted.")
+        yield f"Interrupted. Incomplete LoRA saved to `{loraName}`"
+    else:
+        print("Training complete!")
+        yield f"Done! LoRA saved to `{loraName}`"

From 641e1a09a746f3c1172d0a60fb68067d27d903c0 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 27 Mar 2023 14:48:43 -0300
Subject: [PATCH 30/80] Don't flash when selecting a new prompt

---
 server.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/server.py b/server.py
index 36083cc1..8b26a90f 100644
--- a/server.py
+++ b/server.py
@@ -157,7 +157,7 @@ def create_prompt_menus():
                 shared.gradio['save_prompt'] = gr.Button('Save prompt')
                 shared.gradio['status'] = gr.Markdown('Ready')
 
-    shared.gradio['prompt_menu'].change(load_prompt, [shared.gradio['prompt_menu']], [shared.gradio['textbox']], show_progress=True)
+    shared.gradio['prompt_menu'].change(load_prompt, [shared.gradio['prompt_menu']], [shared.gradio['textbox']], show_progress=False)
     shared.gradio['save_prompt'].click(save_prompt, [shared.gradio['textbox']], [shared.gradio['status']], show_progress=False)
 
 def create_settings_menus(default_preset):

From 9ced75746de1335e383626d996ced7d0d17e489b Mon Sep 17 00:00:00 2001
From: "Alex \"mcmonkey\" Goodwin" <git_commits@alexgoodwin.dev>
Date: Mon, 27 Mar 2023 10:57:27 -0700
Subject: [PATCH 31/80] add total time estimate

---
 modules/training.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/modules/training.py b/modules/training.py
index 19f33220..f8846049 100644
--- a/modules/training.py
+++ b/modules/training.py
@@ -188,13 +188,15 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le
             timeElapsed = time.perf_counter() - startTime
             if timeElapsed <= 0:
                 timerInfo = ""
+                totalTimeEstimate = 999
             else:
                 its = CURRENT_STEPS / timeElapsed
                 if its > 1:
                     timerInfo = f"`{its:.2f}` it/s"
                 else:
                     timerInfo = f"`{1.0/its:.2f}` s/it"
-            yield f"Running... **{CURRENT_STEPS}** / **{MAX_STEPS}** ... {timerInfo}, `{timeElapsed:.1f}` seconds"
+                totalTimeEstimate = (1.0/its) * (MAX_STEPS)
+            yield f"Running... **{CURRENT_STEPS}** / **{MAX_STEPS}** ... {timerInfo}, `{timeElapsed:.0f}`/`{totalTimeEstimate:.0f}` seconds"
     print("Training complete, saving...")
     loraModel.save_pretrained(loraName)
     if WANT_INTERRUPT:

From 9ec6c56680e96e59b12f68199827c1f8b9510f38 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 27 Mar 2023 15:12:43 -0300
Subject: [PATCH 32/80] Update stale.yml

---
 .github/workflows/stale.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
index 82cd1701..ce603a4f 100644
--- a/.github/workflows/stale.yml
+++ b/.github/workflows/stale.yml
@@ -13,7 +13,7 @@ jobs:
       - uses: actions/stale@v5
         with:
           stale-issue-message: ""
-          close-issue-message: "This issue has been closed due to inactivity for 30 days. If you believe it is still relevant, you can reopen it (if you are the author) or leave a comment below."
+          close-issue-message: "This issue has been closed due to inactivity for 30 days. If you believe it is still relevant, please leave a comment below."
           days-before-issue-stale: 30
           days-before-issue-close: 0
           stale-issue-label: "stale"

From 9c96919121ca281fb7857c0a503e6141ff94ba84 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 27 Mar 2023 21:05:19 +0000
Subject: [PATCH 33/80] Bump bitsandbytes from 0.37.1 to 0.37.2

Bumps [bitsandbytes](https://github.com/TimDettmers/bitsandbytes) from 0.37.1 to 0.37.2.
- [Release notes](https://github.com/TimDettmers/bitsandbytes/releases)
- [Changelog](https://github.com/TimDettmers/bitsandbytes/blob/main/CHANGELOG.md)
- [Commits](https://github.com/TimDettmers/bitsandbytes/commits)

---
updated-dependencies:
- dependency-name: bitsandbytes
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index c84f2948..bfec2c9d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 accelerate==0.17.1
-bitsandbytes==0.37.1
+bitsandbytes==0.37.2
 flexgen==0.1.7
 gradio==3.23.0
 markdown

From e9c0226b092025173ac368d3af3992561b3edef3 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 27 Mar 2023 21:05:35 +0000
Subject: [PATCH 34/80] Bump rwkv from 0.7.0 to 0.7.1

Bumps [rwkv](https://github.com/BlinkDL/ChatRWKV) from 0.7.0 to 0.7.1.
- [Release notes](https://github.com/BlinkDL/ChatRWKV/releases)
- [Commits](https://github.com/BlinkDL/ChatRWKV/commits)

---
updated-dependencies:
- dependency-name: rwkv
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index c84f2948..257a29b9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,7 +6,7 @@ markdown
 numpy
 peft==0.2.0
 requests
-rwkv==0.7.0
+rwkv==0.7.1
 safetensors==0.3.0
 sentencepiece
 tqdm

From 2f0571bfa4a17300113b3e91f422cc8aa5471b4d Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 27 Mar 2023 21:24:39 -0300
Subject: [PATCH 35/80] Small style changes

---
 css/main.css        |  2 +-
 modules/training.py | 23 ++++++++++++++++++-----
 server.py           |  2 +-
 3 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/css/main.css b/css/main.css
index 3f044094..6aa3bc1a 100644
--- a/css/main.css
+++ b/css/main.css
@@ -41,7 +41,7 @@ ol li p, ul li p {
     display: inline-block;
 }
 
-#main, #parameters, #chat-settings, #interface-mode, #lora {
+#main, #parameters, #chat-settings, #interface-mode, #lora, #training-tab {
   border: 0;
 }
 
diff --git a/modules/training.py b/modules/training.py
index f8846049..bc5b3878 100644
--- a/modules/training.py
+++ b/modules/training.py
@@ -1,10 +1,17 @@
-import sys, torch, json, threading, time
+import json
+import sys
+import threading
+import time
 from pathlib import Path
+
 import gradio as gr
-from datasets import load_dataset
+import torch
 import transformers
-from modules import ui, shared
-from peft import prepare_model_for_int8_training, LoraConfig, get_peft_model, get_peft_model_state_dict
+from datasets import load_dataset
+from peft import (LoraConfig, get_peft_model, get_peft_model_state_dict,
+                  prepare_model_for_int8_training)
+
+from modules import shared, ui
 
 WANT_INTERRUPT = False
 CURRENT_STEPS = 0
@@ -44,7 +51,7 @@ def create_train_interface():
         with gr.Row():
             startButton = gr.Button("Start LoRA Training")
             stopButton = gr.Button("Interrupt")
-        output = gr.Markdown(value="(...)")
+        output = gr.Markdown(value="Ready")
         startEvent = startButton.click(do_train, [loraName, microBatchSize, batchSize, epochs, learningRate, loraRank, loraAlpha, loraDropout, cutoffLen, dataset, evalDataset, format], [output])
         stopButton.click(doInterrupt, [], [], cancels=[], queue=False)
 
@@ -169,16 +176,20 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le
     ).__get__(loraModel, type(loraModel))
     if torch.__version__ >= "2" and sys.platform != "win32":
         loraModel = torch.compile(loraModel)
+
     # == Main run and monitor loop ==
     # TODO: save/load checkpoints to resume from?
     print("Starting training...")
     yield "Starting..."
+
     def threadedRun():
         trainer.train()
+
     thread = threading.Thread(target=threadedRun)
     thread.start()
     lastStep = 0
     startTime = time.perf_counter()
+
     while thread.is_alive():
         time.sleep(0.5)
         if WANT_INTERRUPT:
@@ -197,8 +208,10 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le
                     timerInfo = f"`{1.0/its:.2f}` s/it"
                 totalTimeEstimate = (1.0/its) * (MAX_STEPS)
             yield f"Running... **{CURRENT_STEPS}** / **{MAX_STEPS}** ... {timerInfo}, `{timeElapsed:.0f}`/`{totalTimeEstimate:.0f}` seconds"
+
     print("Training complete, saving...")
     loraModel.save_pretrained(loraName)
+
     if WANT_INTERRUPT:
         print("Training interrupted.")
         yield f"Interrupted. Incomplete LoRA saved to `{loraName}`"
diff --git a/server.py b/server.py
index cf37dc50..c3c8d2c8 100644
--- a/server.py
+++ b/server.py
@@ -9,8 +9,8 @@ from pathlib import Path
 
 import gradio as gr
 
-from modules import chat, shared, ui, training
 import modules.extensions as extensions_module
+from modules import chat, shared, training, ui
 from modules.html_generator import generate_chat_html
 from modules.LoRA import add_lora_to_model
 from modules.models import load_model, load_soft_prompt

From 6368dad7dbf4a85d840930548bce5a28714f65e5 Mon Sep 17 00:00:00 2001
From: "Alex \"mcmonkey\" Goodwin" <git_commits@alexgoodwin.dev>
Date: Mon, 27 Mar 2023 18:17:42 -0700
Subject: [PATCH 36/80] Fix camelCase to snake_case to match repo format
 standard

---
 modules/training.py | 132 +++++++++++++++++++++++++-------------------
 1 file changed, 74 insertions(+), 58 deletions(-)

diff --git a/modules/training.py b/modules/training.py
index bc5b3878..f63f2990 100644
--- a/modules/training.py
+++ b/modules/training.py
@@ -25,35 +25,40 @@ def get_json_dataset(path: str):
 
 def create_train_interface():
     with gr.Tab('Train LoRA', elem_id='lora-train-tab'):
-        loraName = gr.Textbox(label="Name", info="The name of your new LoRA file")
+        lora_name = gr.Textbox(label="Name", info="The name of your new LoRA file")
         with gr.Row():
             # TODO: Implement multi-device support.
-            microBatchSize = gr.Slider(label='Micro Batch Size', value=4, minimum=1, maximum=128, step=1, info='Per-device batch size (NOTE: multiple devices not yet implemented). Increasing this will increase VRAM usage.')
-            batchSize = gr.Slider(label='Batch Size', value=128, minimum=1, maximum=1024, step=4, info='Global batch size. The two batch sizes together determine gradient accumulation (gradientAccum = batch / microBatch). Higher gradient accum values lead to better quality training.')
+            micro_batch_size = gr.Slider(label='Micro Batch Size', value=4, minimum=1, maximum=128, step=1, info='Per-device batch size (NOTE: multiple devices not yet implemented). Increasing this will increase VRAM usage.')
+            batch_size = gr.Slider(label='Batch Size', value=128, minimum=1, maximum=1024, step=4, info='Global batch size. The two batch sizes together determine gradient accumulation (gradientAccum = batch / microBatch). Higher gradient accum values lead to better quality training.')
+
         with gr.Row():
             epochs = gr.Number(label='Epochs', value=1, info='Number of times every entry in the dataset should be fed into training. So 1 means feed each item in once, 5 means feed it in five times, etc.')
-            learningRate = gr.Textbox(label='Learning Rate', value='3e-4', info='Learning rate, in scientific notation. 3e-4 is a good starting base point. 1e-2 is extremely high, 1e-6 is extremely low.')
+            learning_rate = gr.Textbox(label='Learning Rate', value='3e-4', info='Learning rate, in scientific notation. 3e-4 is a good starting base point. 1e-2 is extremely high, 1e-6 is extremely low.')
+
         # TODO: What is the actual maximum rank? Likely distinct per model. This might be better to somehow be on a log scale.
-        loraRank = gr.Slider(label='LoRA Rank', value=8, minimum=1, maximum=1024, step=4, info='LoRA Rank, or dimension count. Higher values produce a larger file with better control over the model\'s content. Smaller values produce a smaller file with less overall control. Small values like 4 or 8 are great for stylistic guidance, high values like 128 or 256 are good for teaching content upgrades. Higher ranks also require higher VRAM.')
-        loraAlpha = gr.Slider(label='LoRA Alpha', value=16, minimum=1, maximum=2048, step=4, info='LoRA Alpha. This divided by the rank becomes the scaling of the LoRA. Higher means stronger. A good standard value is twice your Rank.')
+        lora_rank = gr.Slider(label='LoRA Rank', value=8, minimum=1, maximum=1024, step=4, info='LoRA Rank, or dimension count. Higher values produce a larger file with better control over the model\'s content. Smaller values produce a smaller file with less overall control. Small values like 4 or 8 are great for stylistic guidance, high values like 128 or 256 are good for teaching content upgrades. Higher ranks also require higher VRAM.')
+        lora_alpha = gr.Slider(label='LoRA Alpha', value=16, minimum=1, maximum=2048, step=4, info='LoRA Alpha. This divided by the rank becomes the scaling of the LoRA. Higher means stronger. A good standard value is twice your Rank.')
         # TODO: Better explain what this does, in terms of real world effect especially.
-        loraDropout = gr.Slider(label='LoRA Dropout', minimum=0.0, maximum=1.0, step=0.025, value=0.05, info='Percentage probability for dropout of LoRA layers.')
-        cutoffLen = gr.Slider(label='Cutoff Length', minimum=1,maximum=2048, value=256, step=32, info='Cutoff length for text input. Essentially, how long of a line of text to feed in at a time. Higher values require drastically more VRAM.')
+        lora_dropout = gr.Slider(label='LoRA Dropout', minimum=0.0, maximum=1.0, step=0.025, value=0.05, info='Percentage probability for dropout of LoRA layers.')
+        cutoff_len = gr.Slider(label='Cutoff Length', minimum=1,maximum=2048, value=256, step=32, info='Cutoff length for text input. Essentially, how long of a line of text to feed in at a time. Higher values require drastically more VRAM.')
+
         with gr.Row():
-            datasetFunction = get_json_dataset('training/datasets')
-            dataset = gr.Dropdown(choices=datasetFunction(), value='None', label='Dataset', info='The dataset file to use for training.')
-            ui.create_refresh_button(dataset, lambda : None, lambda : {'choices': datasetFunction()}, 'refresh-button')
-            evalDataset = gr.Dropdown(choices=datasetFunction(), value='None', label='Evaluation Dataset', info='The dataset file used to evaluate the model after training.')
-            ui.create_refresh_button(evalDataset, lambda : None, lambda : {'choices': datasetFunction()}, 'refresh-button')
-            formatsFunction = get_json_dataset('training/formats')
-            format = gr.Dropdown(choices=formatsFunction(), value='None', label='Data Format', info='The format file used to decide how to format the dataset input.')
-            ui.create_refresh_button(format, lambda : None, lambda : {'choices': formatsFunction()}, 'refresh-button')
+            dataset_function = get_json_dataset('training/datasets')
+            dataset = gr.Dropdown(choices=dataset_function(), value='None', label='Dataset', info='The dataset file to use for training.')
+            ui.create_refresh_button(dataset, lambda : None, lambda : {'choices': dataset_function()}, 'refresh-button')
+            eval_dataset = gr.Dropdown(choices=dataset_function(), value='None', label='Evaluation Dataset', info='The dataset file used to evaluate the model after training.')
+            ui.create_refresh_button(eval_dataset, lambda : None, lambda : {'choices': dataset_function()}, 'refresh-button')
+            formats_function = get_json_dataset('training/formats')
+            format = gr.Dropdown(choices=formats_function(), value='None', label='Data Format', info='The format file used to decide how to format the dataset input.')
+            ui.create_refresh_button(format, lambda : None, lambda : {'choices': formats_function()}, 'refresh-button')
+
         with gr.Row():
-            startButton = gr.Button("Start LoRA Training")
-            stopButton = gr.Button("Interrupt")
+            start_button = gr.Button("Start LoRA Training")
+            stop_button = gr.Button("Interrupt")
+
         output = gr.Markdown(value="Ready")
-        startEvent = startButton.click(do_train, [loraName, microBatchSize, batchSize, epochs, learningRate, loraRank, loraAlpha, loraDropout, cutoffLen, dataset, evalDataset, format], [output])
-        stopButton.click(doInterrupt, [], [], cancels=[], queue=False)
+        startEvent = start_button.click(do_train, [lora_name, micro_batch_size, batch_size, epochs, learning_rate, lora_rank, lora_alpha, lora_dropout, cutoff_len, dataset, eval_dataset, format], [output])
+        stop_button.click(doInterrupt, [], [], cancels=[], queue=False)
 
 def doInterrupt():
     global WANT_INTERRUPT
@@ -74,108 +79,119 @@ class Callbacks(transformers.TrainerCallback):
             control.should_epoch_stop = True
             control.should_training_stop = True
 
-def cleanPath(basePath: str, path: str):
+def cleanPath(base_path: str, path: str):
     """"Strips unusual symbols and forcibly builds a path as relative to the intended directory."""
     # TODO: Probably could do with a security audit to guarantee there's no ways this can be bypassed to target an unwanted path.
     # Or swap it to a strict whitelist of [a-zA-Z_0-9]
     path = path.replace('\\', '/').replace('..', '_')
-    if basePath is None:
+    if base_path is None:
         return path
-    return f'{Path(basePath).absolute()}/{path}'
+    return f'{Path(base_path).absolute()}/{path}'
 
-def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, learningRate: float, loraRank: int, loraAlpha: int, loraDropout: float, cutoffLen: int, dataset: str, evalDataset: str, format: str):
+def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int, learning_rate: float, lora_rank: int, lora_alpha: int, lora_dropout: float, cutoff_len: int, dataset: str, eval_dataset: str, format: str):
     global WANT_INTERRUPT, CURRENT_STEPS, MAX_STEPS, CURRENT_GRADIENT_ACCUM
     WANT_INTERRUPT = False
     CURRENT_STEPS = 0
     MAX_STEPS = 0
-    yield "Prepping..."
+
     # == Input validation / processing ==
+    yield "Prepping..."
     # TODO: --lora-dir PR once pulled will need to be applied here
-    loraName = f"loras/{cleanPath(None, loraName)}"
+    lora_name = f"loras/{cleanPath(None, lora_name)}"
     if dataset is None:
         return "**Missing dataset choice input, cannot continue.**"
     if format is None:
         return "**Missing format choice input, cannot continue.**"
-    gradientAccumulationSteps = batchSize // microBatchSize
-    CURRENT_GRADIENT_ACCUM = gradientAccumulationSteps
-    actualLR = float(learningRate)
+    gradient_accumulation_steps = batch_size // micro_batch_size
+    CURRENT_GRADIENT_ACCUM = gradient_accumulation_steps
+    actual_lr = float(learning_rate)
     shared.tokenizer.pad_token = 0
     shared.tokenizer.padding_side = "left"
+
     # == Prep the dataset, format, etc ==
     with open(cleanPath('training/formats', f'{format}.json'), 'r') as formatFile:
-        formatData: dict[str, str] = json.load(formatFile)
+        format_data: dict[str, str] = json.load(formatFile)
+
     def tokenize(prompt):
-        result = shared.tokenizer(prompt, truncation=True, max_length=cutoffLen + 1, padding="max_length")
+        result = shared.tokenizer(prompt, truncation=True, max_length=cutoff_len + 1, padding="max_length")
         return {
             "input_ids": result["input_ids"][:-1],
             "attention_mask": result["attention_mask"][:-1],
         }
+
     def generate_prompt(data_point: dict[str, str]):
-        for options, data in formatData.items():
+        for options, data in format_data.items():
             if set(options.split(',')) == set(x[0] for x in data_point.items() if len(x[1].strip()) > 0):
                 for key, val in data_point.items():
                     data = data.replace(f'%{key}%', val)
             return data
-        raise RuntimeError(f'Data-point "{data_point}" has no keyset match within format "{list(formatData.keys())}"')
+        raise RuntimeError(f'Data-point "{data_point}" has no keyset match within format "{list(format_data.keys())}"')
+
     def generate_and_tokenize_prompt(data_point):
         prompt = generate_prompt(data_point)
         return tokenize(prompt)
+
     print("Loading datasets...")
     data = load_dataset("json", data_files=cleanPath('training/datasets', f'{dataset}.json'))
     train_data = data['train'].shuffle().map(generate_and_tokenize_prompt)
-    if evalDataset == 'None':
-        evalData = None
+
+    if eval_dataset == 'None':
+        eval_data = None
     else:
-        evalData = load_dataset("json", data_files=cleanPath('training/datasets', f'{evalDataset}.json'))
-        evalData = evalData['train'].shuffle().map(generate_and_tokenize_prompt)
+        eval_data = load_dataset("json", data_files=cleanPath('training/datasets', f'{eval_dataset}.json'))
+        eval_data = eval_data['train'].shuffle().map(generate_and_tokenize_prompt)
+    
     # == Start prepping the model itself ==
     if not hasattr(shared.model, 'lm_head') or hasattr(shared.model.lm_head, 'weight'):
         print("Getting model ready...")
         prepare_model_for_int8_training(shared.model)
+    
     print("Prepping for training...")
     config = LoraConfig(
-        r=loraRank,
-        lora_alpha=loraAlpha,
+        r=lora_rank,
+        lora_alpha=lora_alpha,
         # TODO: Should target_modules be configurable?
         target_modules=[ "q_proj", "v_proj" ],
-        lora_dropout=loraDropout,
+        lora_dropout=lora_dropout,
         bias="none",
         task_type="CAUSAL_LM"
     )
-    loraModel = get_peft_model(shared.model, config)
+    lora_model = get_peft_model(shared.model, config)
     trainer = transformers.Trainer(
-        model=loraModel,
+        model=lora_model,
         train_dataset=train_data,
-        eval_dataset=evalData,
+        eval_dataset=eval_data,
         args=transformers.TrainingArguments(
-            per_device_train_batch_size=microBatchSize,
-            gradient_accumulation_steps=gradientAccumulationSteps,
+            per_device_train_batch_size=micro_batch_size,
+            gradient_accumulation_steps=gradient_accumulation_steps,
             # TODO: Should more of these be configurable? Probably.
             warmup_steps=100,
             num_train_epochs=epochs,
-            learning_rate=actualLR,
+            learning_rate=actual_lr,
             fp16=True,
             logging_steps=20,
-            evaluation_strategy="steps" if evalData is not None else "no",
+            evaluation_strategy="steps" if eval_data is not None else "no",
             save_strategy="steps",
-            eval_steps=200 if evalData is not None else None,
+            eval_steps=200 if eval_data is not None else None,
             save_steps=200,
-            output_dir=loraName,
+            output_dir=lora_name,
             save_total_limit=3,
-            load_best_model_at_end=True if evalData is not None else False,
+            load_best_model_at_end=True if eval_data is not None else False,
             # TODO: Enable multi-device support
             ddp_find_unused_parameters=None
         ),
         data_collator=transformers.DataCollatorForLanguageModeling(shared.tokenizer, mlm=False),
         callbacks=list([Callbacks()])
     )
-    loraModel.config.use_cache = False
-    old_state_dict = loraModel.state_dict
-    loraModel.state_dict = (
+
+    lora_model.config.use_cache = False
+    old_state_dict = lora_model.state_dict
+    lora_model.state_dict = (
         lambda self, *_, **__: get_peft_model_state_dict(self, old_state_dict())
-    ).__get__(loraModel, type(loraModel))
+    ).__get__(lora_model, type(lora_model))
+
     if torch.__version__ >= "2" and sys.platform != "win32":
-        loraModel = torch.compile(loraModel)
+        lora_model = torch.compile(lora_model)
 
     # == Main run and monitor loop ==
     # TODO: save/load checkpoints to resume from?
@@ -210,11 +226,11 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le
             yield f"Running... **{CURRENT_STEPS}** / **{MAX_STEPS}** ... {timerInfo}, `{timeElapsed:.0f}`/`{totalTimeEstimate:.0f}` seconds"
 
     print("Training complete, saving...")
-    loraModel.save_pretrained(loraName)
+    lora_model.save_pretrained(lora_name)
 
     if WANT_INTERRUPT:
         print("Training interrupted.")
-        yield f"Interrupted. Incomplete LoRA saved to `{loraName}`"
+        yield f"Interrupted. Incomplete LoRA saved to `{lora_name}`"
     else:
         print("Training complete!")
-        yield f"Done! LoRA saved to `{loraName}`"
+        yield f"Done! LoRA saved to `{lora_name}`"

From 7fab7ea1b64a262f63535d06b5e418910bed7edd Mon Sep 17 00:00:00 2001
From: "Alex \"mcmonkey\" Goodwin" <git_commits@alexgoodwin.dev>
Date: Mon, 27 Mar 2023 18:19:06 -0700
Subject: [PATCH 37/80] couple missed camelCases

---
 modules/training.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/modules/training.py b/modules/training.py
index f63f2990..e3976d8f 100644
--- a/modules/training.py
+++ b/modules/training.py
@@ -58,9 +58,9 @@ def create_train_interface():
 
         output = gr.Markdown(value="Ready")
         startEvent = start_button.click(do_train, [lora_name, micro_batch_size, batch_size, epochs, learning_rate, lora_rank, lora_alpha, lora_dropout, cutoff_len, dataset, eval_dataset, format], [output])
-        stop_button.click(doInterrupt, [], [], cancels=[], queue=False)
+        stop_button.click(do_interrupt, [], [], cancels=[], queue=False)
 
-def doInterrupt():
+def do_interrupt():
     global WANT_INTERRUPT
     WANT_INTERRUPT = True
 
@@ -79,7 +79,7 @@ class Callbacks(transformers.TrainerCallback):
             control.should_epoch_stop = True
             control.should_training_stop = True
 
-def cleanPath(base_path: str, path: str):
+def clean_path(base_path: str, path: str):
     """"Strips unusual symbols and forcibly builds a path as relative to the intended directory."""
     # TODO: Probably could do with a security audit to guarantee there's no ways this can be bypassed to target an unwanted path.
     # Or swap it to a strict whitelist of [a-zA-Z_0-9]
@@ -97,7 +97,7 @@ def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int
     # == Input validation / processing ==
     yield "Prepping..."
     # TODO: --lora-dir PR once pulled will need to be applied here
-    lora_name = f"loras/{cleanPath(None, lora_name)}"
+    lora_name = f"loras/{clean_path(None, lora_name)}"
     if dataset is None:
         return "**Missing dataset choice input, cannot continue.**"
     if format is None:
@@ -109,7 +109,7 @@ def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int
     shared.tokenizer.padding_side = "left"
 
     # == Prep the dataset, format, etc ==
-    with open(cleanPath('training/formats', f'{format}.json'), 'r') as formatFile:
+    with open(clean_path('training/formats', f'{format}.json'), 'r') as formatFile:
         format_data: dict[str, str] = json.load(formatFile)
 
     def tokenize(prompt):
@@ -132,13 +132,13 @@ def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int
         return tokenize(prompt)
 
     print("Loading datasets...")
-    data = load_dataset("json", data_files=cleanPath('training/datasets', f'{dataset}.json'))
+    data = load_dataset("json", data_files=clean_path('training/datasets', f'{dataset}.json'))
     train_data = data['train'].shuffle().map(generate_and_tokenize_prompt)
 
     if eval_dataset == 'None':
         eval_data = None
     else:
-        eval_data = load_dataset("json", data_files=cleanPath('training/datasets', f'{eval_dataset}.json'))
+        eval_data = load_dataset("json", data_files=clean_path('training/datasets', f'{eval_dataset}.json'))
         eval_data = eval_data['train'].shuffle().map(generate_and_tokenize_prompt)
     
     # == Start prepping the model itself ==

From 1e02f75f2bc7917de83b89f2d04de1df157f01e7 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 28 Mar 2023 01:19:34 +0000
Subject: [PATCH 38/80] Bump accelerate from 0.17.1 to 0.18.0

Bumps [accelerate](https://github.com/huggingface/accelerate) from 0.17.1 to 0.18.0.
- [Release notes](https://github.com/huggingface/accelerate/releases)
- [Commits](https://github.com/huggingface/accelerate/compare/v0.17.1...v0.18.0)

---
updated-dependencies:
- dependency-name: accelerate
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 0c6ed793..77557a74 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-accelerate==0.17.1
+accelerate==0.18.0
 bitsandbytes==0.37.2
 flexgen==0.1.7
 gradio==3.23.0

From 8a97f6ba293228f7e33fd96670db81b0d2001f23 Mon Sep 17 00:00:00 2001
From: "Alex \"mcmonkey\" Goodwin" <git_commits@alexgoodwin.dev>
Date: Mon, 27 Mar 2023 18:39:06 -0700
Subject: [PATCH 39/80] corrections per the PR comments

---
 modules/training.py | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/modules/training.py b/modules/training.py
index e3976d8f..52ecc55e 100644
--- a/modules/training.py
+++ b/modules/training.py
@@ -19,9 +19,7 @@ MAX_STEPS = 0
 CURRENT_GRADIENT_ACCUM = 1
 
 def get_json_dataset(path: str):
-    def get_set():
-        return ['None'] + sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path(path).glob('*.json'))), key=str.lower)
-    return get_set
+    return ['None'] + sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path(path).glob('*.json'))), key=str.lower)
 
 def create_train_interface():
     with gr.Tab('Train LoRA', elem_id='lora-train-tab'):
@@ -32,7 +30,7 @@ def create_train_interface():
             batch_size = gr.Slider(label='Batch Size', value=128, minimum=1, maximum=1024, step=4, info='Global batch size. The two batch sizes together determine gradient accumulation (gradientAccum = batch / microBatch). Higher gradient accum values lead to better quality training.')
 
         with gr.Row():
-            epochs = gr.Number(label='Epochs', value=1, info='Number of times every entry in the dataset should be fed into training. So 1 means feed each item in once, 5 means feed it in five times, etc.')
+            epochs = gr.Number(label='Epochs', value=3, info='Number of times every entry in the dataset should be fed into training. So 1 means feed each item in once, 5 means feed it in five times, etc.')
             learning_rate = gr.Textbox(label='Learning Rate', value='3e-4', info='Learning rate, in scientific notation. 3e-4 is a good starting base point. 1e-2 is extremely high, 1e-6 is extremely low.')
 
         # TODO: What is the actual maximum rank? Likely distinct per model. This might be better to somehow be on a log scale.
@@ -43,21 +41,19 @@ def create_train_interface():
         cutoff_len = gr.Slider(label='Cutoff Length', minimum=1,maximum=2048, value=256, step=32, info='Cutoff length for text input. Essentially, how long of a line of text to feed in at a time. Higher values require drastically more VRAM.')
 
         with gr.Row():
-            dataset_function = get_json_dataset('training/datasets')
-            dataset = gr.Dropdown(choices=dataset_function(), value='None', label='Dataset', info='The dataset file to use for training.')
-            ui.create_refresh_button(dataset, lambda : None, lambda : {'choices': dataset_function()}, 'refresh-button')
-            eval_dataset = gr.Dropdown(choices=dataset_function(), value='None', label='Evaluation Dataset', info='The dataset file used to evaluate the model after training.')
-            ui.create_refresh_button(eval_dataset, lambda : None, lambda : {'choices': dataset_function()}, 'refresh-button')
-            formats_function = get_json_dataset('training/formats')
-            format = gr.Dropdown(choices=formats_function(), value='None', label='Data Format', info='The format file used to decide how to format the dataset input.')
-            ui.create_refresh_button(format, lambda : None, lambda : {'choices': formats_function()}, 'refresh-button')
+            dataset = gr.Dropdown(choices=get_json_dataset('training/datasets'), value='None', label='Dataset', info='The dataset file to use for training.')
+            ui.create_refresh_button(dataset, lambda : None, lambda : {'choices': get_json_dataset('training/datasets')}, 'refresh-button')
+            eval_dataset = gr.Dropdown(choices=get_json_dataset('training/datasets'), value='None', label='Evaluation Dataset', info='The dataset file used to evaluate the model after training.')
+            ui.create_refresh_button(eval_dataset, lambda : None, lambda : {'choices': get_json_dataset('training/datasets')}, 'refresh-button')
+            format = gr.Dropdown(choices=get_json_dataset('training/formats'), value='None', label='Data Format', info='The format file used to decide how to format the dataset input.')
+            ui.create_refresh_button(format, lambda : None, lambda : {'choices': get_json_dataset('training/formats')}, 'refresh-button')
 
         with gr.Row():
             start_button = gr.Button("Start LoRA Training")
             stop_button = gr.Button("Interrupt")
 
         output = gr.Markdown(value="Ready")
-        startEvent = start_button.click(do_train, [lora_name, micro_batch_size, batch_size, epochs, learning_rate, lora_rank, lora_alpha, lora_dropout, cutoff_len, dataset, eval_dataset, format], [output])
+        start_button.click(do_train, [lora_name, micro_batch_size, batch_size, epochs, learning_rate, lora_rank, lora_alpha, lora_dropout, cutoff_len, dataset, eval_dataset, format], [output])
         stop_button.click(do_interrupt, [], [], cancels=[], queue=False)
 
 def do_interrupt():

From 005f552ea311e9bf932b91337da101a490bdd5ff Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 27 Mar 2023 23:29:52 -0300
Subject: [PATCH 40/80] Some simplifications

---
 modules/shared.py |  6 +++---
 server.py         | 17 +++++++++++------
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/modules/shared.py b/modules/shared.py
index d9bcf241..71829a01 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -107,14 +107,14 @@ parser.add_argument('--rwkv-cuda-on', action='store_true', help='RWKV: Compile t
 parser.add_argument('--no-stream', action='store_true', help='Don\'t stream the text output in real time.')
 parser.add_argument('--settings', type=str, help='Load the default interface settings from this json file. See settings-template.json for an example. If you create a file called settings.json, this file will be loaded by default without the need to use the --settings flag.')
 parser.add_argument('--extensions', type=str, nargs="+", help='The list of extensions to load. If you want to load more than one extension, write the names separated by spaces.')
+parser.add_argument("--model-dir", type=str, default='models/', help="Path to directory with all the models")
+parser.add_argument("--lora-dir", type=str, default='loras/', help="Path to directory with all the loras")
+parser.add_argument('--verbose', action='store_true', help='Print the prompts to the terminal.')
 parser.add_argument('--listen', action='store_true', help='Make the web UI reachable from your local network.')
 parser.add_argument('--listen-port', type=int, help='The listening port that the server will use.')
 parser.add_argument('--share', action='store_true', help='Create a public URL. This is useful for running the web UI on Google Colab or similar.')
 parser.add_argument('--auto-launch', action='store_true', default=False, help='Open the web UI in the default browser upon launch.')
-parser.add_argument('--verbose', action='store_true', help='Print the prompts to the terminal.')
 parser.add_argument("--gradio-auth-path", type=str, help='set gradio authentication file path ex. "/path/to/auth/file" with format like "u1:p1,u2:p2,u3:p3"', default=None)
-parser.add_argument("--model-dir", type=str, default='models/', help="Path to directory with all the models")
-parser.add_argument("--lora-dir", type=str, default='loras/', help="Path to directory with all the loras")
 args = parser.parse_args()
 
 # Provisional, this will be deleted later
diff --git a/server.py b/server.py
index 15aa84bb..66f60074 100644
--- a/server.py
+++ b/server.py
@@ -498,16 +498,21 @@ def create_interface():
         if shared.args.extensions is not None:
             extensions_module.create_extensions_block()
 
+    # Authentication
+    auth = None
+    if shared.args.gradio_auth_path is not None:
+        gradio_auth_creds = []
+        with open(shared.args.gradio_auth_path, 'r', encoding="utf8") as file:
+            for line in file.readlines():
+                gradio_auth_creds += [x.strip() for x in line.split(',') if x.strip()]
+        auth = [tuple(cred.split(':')) for cred in gradio_auth_creds]
+
     # Launch the interface
-    gradio_auth_creds = []
-    with open(shared.args.gradio_auth_path, 'r', encoding="utf8") as file:
-        for line in file.readlines():
-            gradio_auth_creds += [x.strip() for x in line.split(',') if x.strip()]
     shared.gradio['interface'].queue()
     if shared.args.listen:
-        shared.gradio['interface'].launch(prevent_thread_lock=True, share=shared.args.share, server_name='0.0.0.0', server_port=shared.args.listen_port, inbrowser=shared.args.auto_launch, auth=[tuple(cred.split(':')) for cred in gradio_auth_creds] if gradio_auth_creds else None)
+        shared.gradio['interface'].launch(prevent_thread_lock=True, share=shared.args.share, server_name='0.0.0.0', server_port=shared.args.listen_port, inbrowser=shared.args.auto_launch, auth=auth)
     else:
-        shared.gradio['interface'].launch(prevent_thread_lock=True, share=shared.args.share, server_port=shared.args.listen_port, inbrowser=shared.args.auto_launch, auth=[tuple(cred.split(':')) for cred in gradio_auth_creds] if gradio_auth_creds else None)
+        shared.gradio['interface'].launch(prevent_thread_lock=True, share=shared.args.share, server_port=shared.args.listen_port, inbrowser=shared.args.auto_launch, auth=auth)
 
 create_interface()
 

From 30585b3e716e646ffabb8d590e5fe3b53863656d Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 27 Mar 2023 23:35:01 -0300
Subject: [PATCH 41/80] Update README

---
 README.md | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 3bfbc72f..cd75284c 100644
--- a/README.md
+++ b/README.md
@@ -198,12 +198,15 @@ Optionally, you can use the following command-line flags:
 |  `--rwkv-cuda-on` |   RWKV: Compile the CUDA kernel for better performance. |
 | `--no-stream`    | Don't stream the text output in real time. |
 | `--settings SETTINGS_FILE` | Load the default interface settings from this json file. See `settings-template.json` for an example. If you create a file called `settings.json`, this file will be loaded by default without the need to use the `--settings` flag.|
-|  `--extensions EXTENSIONS [EXTENSIONS ...]` |  The list of extensions to load. If you want to load more than one extension, write the names separated by spaces. |
-| `--listen`       | Make the web UI reachable from your local network.|
-|  `--listen-port LISTEN_PORT` | The listening port that the server will use. |
-| `--share`        | Create a public URL. This is useful for running the web UI on Google Colab or similar. |
-| `--auto-launch`  | Open the web UI in the default browser upon launch. |
-| `--verbose`      | Print the prompts to the terminal. |
+|  `--extensions EXTENSIONS [EXTENSIONS ...]` | The list of extensions to load. If you want to load more than one extension, write the names separated by spaces. |
+|  `--model-dir MODEL_DIR`                    | Path to directory with all the models |
+|  `--lora-dir LORA_DIR`                      | Path to directory with all the loras |
+|  `--verbose`                                | Print the prompts to the terminal. |
+|  `--listen`                                 | Make the web UI reachable from your local network. |
+|  `--listen-port LISTEN_PORT`                | The listening port that the server will use. |
+|  `--share`                                  | Create a public URL. This is useful for running the web UI on Google Colab or similar. |
+|  `--auto-launch`                            | Open the web UI in the default browser upon launch. |
+|  `--gradio-auth-path GRADIO_AUTH_PATH`      | set gradio authentication file path ex. "/path/to/auth/file" with format like "u1:p1,u2:p2,u3:p3" |
 
 Out of memory errors? [Check the low VRAM guide](https://github.com/oobabooga/text-generation-webui/wiki/Low-VRAM-guide).
 

From 036163a75134ba88d83754548b992331d2b450f5 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 27 Mar 2023 23:39:26 -0300
Subject: [PATCH 42/80] Change description

---
 README.md         | 2 +-
 modules/shared.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index cd75284c..f6b1d4f5 100644
--- a/README.md
+++ b/README.md
@@ -206,7 +206,7 @@ Optionally, you can use the following command-line flags:
 |  `--listen-port LISTEN_PORT`                | The listening port that the server will use. |
 |  `--share`                                  | Create a public URL. This is useful for running the web UI on Google Colab or similar. |
 |  `--auto-launch`                            | Open the web UI in the default browser upon launch. |
-|  `--gradio-auth-path GRADIO_AUTH_PATH`      | set gradio authentication file path ex. "/path/to/auth/file" with format like "u1:p1,u2:p2,u3:p3" |
+|  `--gradio-auth-path GRADIO_AUTH_PATH`      | Set the gradio authentication file path. The file should contain one or more user:password pairs in this format: "u1:p1,u2:p2,u3:p3" |
 
 Out of memory errors? [Check the low VRAM guide](https://github.com/oobabooga/text-generation-webui/wiki/Low-VRAM-guide).
 
diff --git a/modules/shared.py b/modules/shared.py
index 71829a01..ac9d750c 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -114,7 +114,7 @@ parser.add_argument('--listen', action='store_true', help='Make the web UI reach
 parser.add_argument('--listen-port', type=int, help='The listening port that the server will use.')
 parser.add_argument('--share', action='store_true', help='Create a public URL. This is useful for running the web UI on Google Colab or similar.')
 parser.add_argument('--auto-launch', action='store_true', default=False, help='Open the web UI in the default browser upon launch.')
-parser.add_argument("--gradio-auth-path", type=str, help='set gradio authentication file path ex. "/path/to/auth/file" with format like "u1:p1,u2:p2,u3:p3"', default=None)
+parser.add_argument("--gradio-auth-path", type=str, help='Set the gradio authentication file path. The file should contain one or more user:password pairs in this format: "u1:p1,u2:p2,u3:p3"', default=None)
 args = parser.parse_args()
 
 # Provisional, this will be deleted later

From ee95e55df67468902fc411bbfc51bb961d1953d2 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 27 Mar 2023 23:42:29 -0300
Subject: [PATCH 43/80] Fix RWKV tokenizer

---
 modules/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/models.py b/modules/models.py
index 5aaef800..26a10f7a 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -90,7 +90,7 @@ def load_model(model_name):
         from modules.RWKV import RWKVModel, RWKVTokenizer
 
         model = RWKVModel.from_pretrained(Path(f'{shared.args.model_dir}/{model_name}'), dtype="fp32" if shared.args.cpu else "bf16" if shared.args.bf16 else "fp16", device="cpu" if shared.args.cpu else "cuda")
-        tokenizer = RWKVTokenizer.from_pretrained(Path(shared.model_name))
+        tokenizer = RWKVTokenizer.from_pretrained(Path(shared.args.model_dir))
 
         return model, tokenizer
 

From 53da672315d3914b1af728274f0223e7bac60b7a Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 27 Mar 2023 23:44:21 -0300
Subject: [PATCH 44/80] Fix FlexGen

---
 modules/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/models.py b/modules/models.py
index 26a10f7a..a6839318 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -76,7 +76,7 @@ def load_model(model_name):
                             num_bits=4, group_size=64,
                             group_dim=2, symmetric=False))
 
-        model = OptLM(f"facebook/{shared.model_name}", env, shared.model_name, policy)
+        model = OptLM(f"facebook/{shared.model_name}", env, shared.args.model_dir, policy)
 
     # DeepSpeed ZeRO-3
     elif shared.args.deepspeed:

From ec6224f5561ce200ef8c98f967c3f6edafd2ffb0 Mon Sep 17 00:00:00 2001
From: "Alex \"mcmonkey\" Goodwin" <git_commits@alexgoodwin.dev>
Date: Mon, 27 Mar 2023 20:04:16 -0700
Subject: [PATCH 45/80] use new shared.args.lora_dir

---
 modules/training.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/modules/training.py b/modules/training.py
index 52ecc55e..0d54a251 100644
--- a/modules/training.py
+++ b/modules/training.py
@@ -92,8 +92,7 @@ def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int
 
     # == Input validation / processing ==
     yield "Prepping..."
-    # TODO: --lora-dir PR once pulled will need to be applied here
-    lora_name = f"loras/{clean_path(None, lora_name)}"
+    lora_name = f"{shared.args.lora_dir}/{clean_path(None, lora_name)}"
     if dataset is None:
         return "**Missing dataset choice input, cannot continue.**"
     if format is None:

From b749952fe3de309ca1b5ec98fe114608be4c8dce Mon Sep 17 00:00:00 2001
From: "Alex \"mcmonkey\" Goodwin" <git_commits@alexgoodwin.dev>
Date: Mon, 27 Mar 2023 21:22:43 -0700
Subject: [PATCH 46/80] change number minimums to 0

gradio calculates 'step' relative to the minimum, so at '1' the step values were all offset awkwardly. 0 isn't valid, but, uh, just don't slam the slider to the left.
---
 modules/training.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/modules/training.py b/modules/training.py
index 0d54a251..656a8b3a 100644
--- a/modules/training.py
+++ b/modules/training.py
@@ -27,18 +27,18 @@ def create_train_interface():
         with gr.Row():
             # TODO: Implement multi-device support.
             micro_batch_size = gr.Slider(label='Micro Batch Size', value=4, minimum=1, maximum=128, step=1, info='Per-device batch size (NOTE: multiple devices not yet implemented). Increasing this will increase VRAM usage.')
-            batch_size = gr.Slider(label='Batch Size', value=128, minimum=1, maximum=1024, step=4, info='Global batch size. The two batch sizes together determine gradient accumulation (gradientAccum = batch / microBatch). Higher gradient accum values lead to better quality training.')
+            batch_size = gr.Slider(label='Batch Size', value=128, minimum=0, maximum=1024, step=4, info='Global batch size. The two batch sizes together determine gradient accumulation (gradientAccum = batch / microBatch). Higher gradient accum values lead to better quality training.')
 
         with gr.Row():
             epochs = gr.Number(label='Epochs', value=3, info='Number of times every entry in the dataset should be fed into training. So 1 means feed each item in once, 5 means feed it in five times, etc.')
             learning_rate = gr.Textbox(label='Learning Rate', value='3e-4', info='Learning rate, in scientific notation. 3e-4 is a good starting base point. 1e-2 is extremely high, 1e-6 is extremely low.')
 
         # TODO: What is the actual maximum rank? Likely distinct per model. This might be better to somehow be on a log scale.
-        lora_rank = gr.Slider(label='LoRA Rank', value=8, minimum=1, maximum=1024, step=4, info='LoRA Rank, or dimension count. Higher values produce a larger file with better control over the model\'s content. Smaller values produce a smaller file with less overall control. Small values like 4 or 8 are great for stylistic guidance, high values like 128 or 256 are good for teaching content upgrades. Higher ranks also require higher VRAM.')
-        lora_alpha = gr.Slider(label='LoRA Alpha', value=16, minimum=1, maximum=2048, step=4, info='LoRA Alpha. This divided by the rank becomes the scaling of the LoRA. Higher means stronger. A good standard value is twice your Rank.')
+        lora_rank = gr.Slider(label='LoRA Rank', value=8, minimum=0, maximum=1024, step=4, info='LoRA Rank, or dimension count. Higher values produce a larger file with better control over the model\'s content. Smaller values produce a smaller file with less overall control. Small values like 4 or 8 are great for stylistic guidance, high values like 128 or 256 are good for teaching content upgrades. Higher ranks also require higher VRAM.')
+        lora_alpha = gr.Slider(label='LoRA Alpha', value=16, minimum=0, maximum=2048, step=4, info='LoRA Alpha. This divided by the rank becomes the scaling of the LoRA. Higher means stronger. A good standard value is twice your Rank.')
         # TODO: Better explain what this does, in terms of real world effect especially.
         lora_dropout = gr.Slider(label='LoRA Dropout', minimum=0.0, maximum=1.0, step=0.025, value=0.05, info='Percentage probability for dropout of LoRA layers.')
-        cutoff_len = gr.Slider(label='Cutoff Length', minimum=1,maximum=2048, value=256, step=32, info='Cutoff length for text input. Essentially, how long of a line of text to feed in at a time. Higher values require drastically more VRAM.')
+        cutoff_len = gr.Slider(label='Cutoff Length', minimum=0, maximum=2048, value=256, step=32, info='Cutoff length for text input. Essentially, how long of a line of text to feed in at a time. Higher values require drastically more VRAM.')
 
         with gr.Row():
             dataset = gr.Dropdown(choices=get_json_dataset('training/datasets'), value='None', label='Dataset', info='The dataset file to use for training.')

From 2e08af4edf07b5b79f3e105c0be892e518da28bd Mon Sep 17 00:00:00 2001
From: "Alex \"mcmonkey\" Goodwin" <git_commits@alexgoodwin.dev>
Date: Mon, 27 Mar 2023 22:15:32 -0700
Subject: [PATCH 47/80] implement initial Raw Text File Input

also bump default Rank & Alpha for values that will make sense in testing if you don't know what you're doing and leave the defaults.
---
 modules/training.py | 116 +++++++++++++++++++++++++++++---------------
 1 file changed, 76 insertions(+), 40 deletions(-)

diff --git a/modules/training.py b/modules/training.py
index 656a8b3a..1949fa4e 100644
--- a/modules/training.py
+++ b/modules/training.py
@@ -7,7 +7,7 @@ from pathlib import Path
 import gradio as gr
 import torch
 import transformers
-from datasets import load_dataset
+from datasets import Dataset, load_dataset
 from peft import (LoraConfig, get_peft_model, get_peft_model_state_dict,
                   prepare_model_for_int8_training)
 
@@ -18,8 +18,8 @@ CURRENT_STEPS = 0
 MAX_STEPS = 0
 CURRENT_GRADIENT_ACCUM = 1
 
-def get_json_dataset(path: str):
-    return ['None'] + sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path(path).glob('*.json'))), key=str.lower)
+def get_dataset(path: str, ext: str):
+    return ['None'] + sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path(path).glob(f'*.{ext}'))), key=str.lower)
 
 def create_train_interface():
     with gr.Tab('Train LoRA', elem_id='lora-train-tab'):
@@ -40,20 +40,26 @@ def create_train_interface():
         lora_dropout = gr.Slider(label='LoRA Dropout', minimum=0.0, maximum=1.0, step=0.025, value=0.05, info='Percentage probability for dropout of LoRA layers.')
         cutoff_len = gr.Slider(label='Cutoff Length', minimum=0, maximum=2048, value=256, step=32, info='Cutoff length for text input. Essentially, how long of a line of text to feed in at a time. Higher values require drastically more VRAM.')
 
-        with gr.Row():
-            dataset = gr.Dropdown(choices=get_json_dataset('training/datasets'), value='None', label='Dataset', info='The dataset file to use for training.')
-            ui.create_refresh_button(dataset, lambda : None, lambda : {'choices': get_json_dataset('training/datasets')}, 'refresh-button')
-            eval_dataset = gr.Dropdown(choices=get_json_dataset('training/datasets'), value='None', label='Evaluation Dataset', info='The dataset file used to evaluate the model after training.')
-            ui.create_refresh_button(eval_dataset, lambda : None, lambda : {'choices': get_json_dataset('training/datasets')}, 'refresh-button')
-            format = gr.Dropdown(choices=get_json_dataset('training/formats'), value='None', label='Data Format', info='The format file used to decide how to format the dataset input.')
-            ui.create_refresh_button(format, lambda : None, lambda : {'choices': get_json_dataset('training/formats')}, 'refresh-button')
+        with gr.Tab(label="Formatted Dataset"):
+            with gr.Row():
+                dataset = gr.Dropdown(choices=get_dataset('training/datasets', 'json'), value='None', label='Dataset', info='The dataset file to use for training.')
+                ui.create_refresh_button(dataset, lambda : None, lambda : {'choices': get_dataset('training/datasets', 'json')}, 'refresh-button')
+                eval_dataset = gr.Dropdown(choices=get_dataset('training/datasets', 'json'), value='None', label='Evaluation Dataset', info='The dataset file used to evaluate the model after training.')
+                ui.create_refresh_button(eval_dataset, lambda : None, lambda : {'choices': get_dataset('training/datasets', 'json')}, 'refresh-button')
+                format = gr.Dropdown(choices=get_dataset('training/formats', 'json'), value='None', label='Data Format', info='The format file used to decide how to format the dataset input.')
+                ui.create_refresh_button(format, lambda : None, lambda : {'choices': get_dataset('training/formats', 'json')}, 'refresh-button')
+        with gr.Tab(label="Raw Text File"):
+            with gr.Row():
+                raw_text_file = gr.Dropdown(choices=get_dataset('training/datasets', 'txt'), value='None', label='Text File', info='The raw text file to use for training.')
+                ui.create_refresh_button(raw_text_file, lambda : None, lambda : {'choices': get_dataset('training/datasets', 'txt')}, 'refresh-button')
+                overlap_len = gr.Slider(label='Overlap Length', minimum=0,maximum=512, value=32, step=8, info='Overlap length - ie how many tokens from the prior chunk of text to include into the next chunk. (The chunks themselves will be of a size determined by Cutoff Length above)')
 
         with gr.Row():
             start_button = gr.Button("Start LoRA Training")
             stop_button = gr.Button("Interrupt")
 
         output = gr.Markdown(value="Ready")
-        start_button.click(do_train, [lora_name, micro_batch_size, batch_size, epochs, learning_rate, lora_rank, lora_alpha, lora_dropout, cutoff_len, dataset, eval_dataset, format], [output])
+        start_button.click(do_train, [lora_name, micro_batch_size, batch_size, epochs, learning_rate, lora_rank, lora_alpha, lora_dropout, cutoff_len, dataset, eval_dataset, format, raw_text_file, overlap_len], [output])
         stop_button.click(do_interrupt, [], [], cancels=[], queue=False)
 
 def do_interrupt():
@@ -84,7 +90,8 @@ def clean_path(base_path: str, path: str):
         return path
     return f'{Path(base_path).absolute()}/{path}'
 
-def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int, learning_rate: float, lora_rank: int, lora_alpha: int, lora_dropout: float, cutoff_len: int, dataset: str, eval_dataset: str, format: str):
+def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int, learning_rate: str, lora_rank: int,
+             lora_alpha: int, lora_dropout: float, cutoff_len: int, dataset: str, eval_dataset: str, format: str, raw_text_file: str, overlap_len: int):
     global WANT_INTERRUPT, CURRENT_STEPS, MAX_STEPS, CURRENT_GRADIENT_ACCUM
     WANT_INTERRUPT = False
     CURRENT_STEPS = 0
@@ -93,20 +100,17 @@ def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int
     # == Input validation / processing ==
     yield "Prepping..."
     lora_name = f"{shared.args.lora_dir}/{clean_path(None, lora_name)}"
-    if dataset is None:
-        return "**Missing dataset choice input, cannot continue.**"
-    if format is None:
-        return "**Missing format choice input, cannot continue.**"
+    actual_lr = float(learning_rate)
+
+    if cutoff_len <= 0 or micro_batch_size <= 0 or batch_size <= 0 or actual_lr <= 0 or lora_rank <= 0 or lora_alpha <= 0:
+        yield f"Cannot input zeroes."
+        return
+
     gradient_accumulation_steps = batch_size // micro_batch_size
     CURRENT_GRADIENT_ACCUM = gradient_accumulation_steps
-    actual_lr = float(learning_rate)
     shared.tokenizer.pad_token = 0
     shared.tokenizer.padding_side = "left"
 
-    # == Prep the dataset, format, etc ==
-    with open(clean_path('training/formats', f'{format}.json'), 'r') as formatFile:
-        format_data: dict[str, str] = json.load(formatFile)
-
     def tokenize(prompt):
         result = shared.tokenizer(prompt, truncation=True, max_length=cutoff_len + 1, padding="max_length")
         return {
@@ -114,27 +118,55 @@ def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int
             "attention_mask": result["attention_mask"][:-1],
         }
 
-    def generate_prompt(data_point: dict[str, str]):
-        for options, data in format_data.items():
-            if set(options.split(',')) == set(x[0] for x in data_point.items() if len(x[1].strip()) > 0):
-                for key, val in data_point.items():
-                    data = data.replace(f'%{key}%', val)
-            return data
-        raise RuntimeError(f'Data-point "{data_point}" has no keyset match within format "{list(format_data.keys())}"')
-
-    def generate_and_tokenize_prompt(data_point):
-        prompt = generate_prompt(data_point)
-        return tokenize(prompt)
-
-    print("Loading datasets...")
-    data = load_dataset("json", data_files=clean_path('training/datasets', f'{dataset}.json'))
-    train_data = data['train'].shuffle().map(generate_and_tokenize_prompt)
-
-    if eval_dataset == 'None':
+    # == Prep the dataset, format, etc ==
+    if raw_text_file is not None:
+        print("Loading raw text file dataset...")
+        with open(clean_path('training/datasets', f'{raw_text_file}.txt'), 'r') as file:
+            raw_text = file.read()
+        tokens = shared.tokenizer.encode(raw_text)
+        del raw_text # Note: could be a gig for a large dataset, so delete redundant data as we go to be safe on RAM
+        tokens = list(split_chunks(tokens, cutoff_len - overlap_len))
+        for i in range(1, len(tokens)):
+            tokens[i] = tokens[i - 1][-overlap_len:] + tokens[i]
+        text_chunks = [shared.tokenizer.decode(x) for x in tokens]
+        del tokens
+        data = Dataset.from_list([tokenize(x) for x in text_chunks])
+        train_data = data.shuffle()
         eval_data = None
+        del text_chunks
+
     else:
-        eval_data = load_dataset("json", data_files=clean_path('training/datasets', f'{eval_dataset}.json'))
-        eval_data = eval_data['train'].shuffle().map(generate_and_tokenize_prompt)
+        with open(clean_path('training/formats', f'{format}.json'), 'r') as formatFile:
+            format_data: dict[str, str] = json.load(formatFile)
+
+        if dataset is None:
+            yield "**Missing dataset choice input, cannot continue.**"
+            return
+        if format is None:
+            yield "**Missing format choice input, cannot continue.**"
+            return
+
+        def generate_prompt(data_point: dict[str, str]):
+            for options, data in format_data.items():
+                if set(options.split(',')) == set(x[0] for x in data_point.items() if len(x[1].strip()) > 0):
+                    for key, val in data_point.items():
+                        data = data.replace(f'%{key}%', val)
+                return data
+            raise RuntimeError(f'Data-point "{data_point}" has no keyset match within format "{list(format_data.keys())}"')
+
+        def generate_and_tokenize_prompt(data_point):
+            prompt = generate_prompt(data_point)
+            return tokenize(prompt)
+
+        print("Loading JSON datasets...")
+        data = load_dataset("json", data_files=clean_path('training/datasets', f'{dataset}.json'))
+        train_data = data['train'].shuffle().map(generate_and_tokenize_prompt)
+
+        if eval_dataset == 'None':
+            eval_data = None
+        else:
+            eval_data = load_dataset("json", data_files=clean_path('training/datasets', f'{eval_dataset}.json'))
+            eval_data = eval_data['train'].shuffle().map(generate_and_tokenize_prompt)
     
     # == Start prepping the model itself ==
     if not hasattr(shared.model, 'lm_head') or hasattr(shared.model.lm_head, 'weight'):
@@ -229,3 +261,7 @@ def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int
     else:
         print("Training complete!")
         yield f"Done! LoRA saved to `{lora_name}`"
+
+def split_chunks(arr, step):
+    for i in range(0, len(arr), step):
+        yield arr[i:i + step]

From 9cc811a0e6abbc32ef5699255db4127740ea1e8d Mon Sep 17 00:00:00 2001
From: "Alex \"mcmonkey\" Goodwin" <git_commits@alexgoodwin.dev>
Date: Mon, 27 Mar 2023 22:16:40 -0700
Subject: [PATCH 48/80] fix LoRA path typo in #549

---
 server.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/server.py b/server.py
index 311a624f..4d9ee5e8 100644
--- a/server.py
+++ b/server.py
@@ -55,7 +55,7 @@ def get_available_softprompts():
     return ['None'] + sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('softprompts').glob('*.zip'))), key=str.lower)
 
 def get_available_loras():
-    return ['None'] + sorted([item.name for item in list(Path('shared.args.lora_dir').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower)
+    return ['None'] + sorted([item.name for item in list(Path(shared.args.lora_dir).glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower)
 
 def unload_model():
     shared.model = shared.tokenizer = None

From e817fac5424f7f19a5f20071dc08ce4e483d0636 Mon Sep 17 00:00:00 2001
From: "Alex \"mcmonkey\" Goodwin" <git_commits@alexgoodwin.dev>
Date: Mon, 27 Mar 2023 22:29:23 -0700
Subject: [PATCH 49/80] better defaults

---
 modules/training.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/training.py b/modules/training.py
index 1949fa4e..7bcecb38 100644
--- a/modules/training.py
+++ b/modules/training.py
@@ -34,8 +34,8 @@ def create_train_interface():
             learning_rate = gr.Textbox(label='Learning Rate', value='3e-4', info='Learning rate, in scientific notation. 3e-4 is a good starting base point. 1e-2 is extremely high, 1e-6 is extremely low.')
 
         # TODO: What is the actual maximum rank? Likely distinct per model. This might be better to somehow be on a log scale.
-        lora_rank = gr.Slider(label='LoRA Rank', value=8, minimum=0, maximum=1024, step=4, info='LoRA Rank, or dimension count. Higher values produce a larger file with better control over the model\'s content. Smaller values produce a smaller file with less overall control. Small values like 4 or 8 are great for stylistic guidance, high values like 128 or 256 are good for teaching content upgrades. Higher ranks also require higher VRAM.')
-        lora_alpha = gr.Slider(label='LoRA Alpha', value=16, minimum=0, maximum=2048, step=4, info='LoRA Alpha. This divided by the rank becomes the scaling of the LoRA. Higher means stronger. A good standard value is twice your Rank.')
+        lora_rank = gr.Slider(label='LoRA Rank', value=32, minimum=0, maximum=1024, step=4, info='LoRA Rank, or dimension count. Higher values produce a larger file with better control over the model\'s content. Smaller values produce a smaller file with less overall control. Small values like 4 or 8 are great for stylistic guidance, high values like 128 or 256 are good for teaching content upgrades. Higher ranks also require higher VRAM.')
+        lora_alpha = gr.Slider(label='LoRA Alpha', value=64, minimum=0, maximum=2048, step=4, info='LoRA Alpha. This divided by the rank becomes the scaling of the LoRA. Higher means stronger. A good standard value is twice your Rank.')
         # TODO: Better explain what this does, in terms of real world effect especially.
         lora_dropout = gr.Slider(label='LoRA Dropout', minimum=0.0, maximum=1.0, step=0.025, value=0.05, info='Percentage probability for dropout of LoRA layers.')
         cutoff_len = gr.Slider(label='Cutoff Length', minimum=0, maximum=2048, value=256, step=32, info='Cutoff length for text input. Essentially, how long of a line of text to feed in at a time. Higher values require drastically more VRAM.')
@@ -52,7 +52,7 @@ def create_train_interface():
             with gr.Row():
                 raw_text_file = gr.Dropdown(choices=get_dataset('training/datasets', 'txt'), value='None', label='Text File', info='The raw text file to use for training.')
                 ui.create_refresh_button(raw_text_file, lambda : None, lambda : {'choices': get_dataset('training/datasets', 'txt')}, 'refresh-button')
-                overlap_len = gr.Slider(label='Overlap Length', minimum=0,maximum=512, value=32, step=8, info='Overlap length - ie how many tokens from the prior chunk of text to include into the next chunk. (The chunks themselves will be of a size determined by Cutoff Length above)')
+                overlap_len = gr.Slider(label='Overlap Length', minimum=0,maximum=512, value=128, step=16, info='Overlap length - ie how many tokens from the prior chunk of text to include into the next chunk. (The chunks themselves will be of a size determined by Cutoff Length above). Setting overlap to exactly half the cutoff length may be ideal.')
 
         with gr.Row():
             start_button = gr.Button("Start LoRA Training")

From b0f05046b307ce484c8fe8300a10e1909d94904d Mon Sep 17 00:00:00 2001
From: "Alex \"mcmonkey\" Goodwin" <git_commits@alexgoodwin.dev>
Date: Mon, 27 Mar 2023 22:50:37 -0700
Subject: [PATCH 50/80] remove duplicate import

---
 requirements.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 8400250f..79da715d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,6 +10,5 @@ rwkv==0.7.1
 safetensors==0.3.0
 sentencepiece
 tqdm
-peft
 datasets
 git+https://github.com/huggingface/transformers

From 8579fe51dd09651ce7168d2191f9d741540881a5 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 28 Mar 2023 12:59:34 -0300
Subject: [PATCH 51/80] Fix new lines in the HTML tab

---
 modules/html_generator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/html_generator.py b/modules/html_generator.py
index ff18c913..48d2e02e 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -34,7 +34,7 @@ def convert_to_markdown(string):
     string = string.replace('\\begin{blockquote}', '> ')
     string = string.replace('\\end{blockquote}', '')
     string = re.sub(r"(.)```", r"\1\n```", string)
-#    string = fix_newlines(string)
+    string = fix_newlines(string)
     return markdown.markdown(string, extensions=['fenced_code']) 
 
 def generate_basic_html(string):

From 91aa5b460ed1f330e35b02fd7f5368912ea6526c Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 28 Mar 2023 13:08:38 -0300
Subject: [PATCH 52/80] If both .pt and .safetensors are present, download only
 safetensors

---
 download-model.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/download-model.py b/download-model.py
index 25386e5f..dce7e749 100644
--- a/download-model.py
+++ b/download-model.py
@@ -100,6 +100,7 @@ def get_download_links_from_huggingface(model, branch):
     links = []
     classifications = []
     has_pytorch = False
+    has_pt = False
     has_safetensors = False
     is_lora = False
     while True:
@@ -115,7 +116,7 @@ def get_download_links_from_huggingface(model, branch):
                 is_lora = True
 
             is_pytorch = re.match("(pytorch|adapter)_model.*\.bin", fname)
-            is_safetensors = re.match("model.*\.safetensors", fname)
+            is_safetensors = re.match(".*\.safetensors", fname)
             is_pt = re.match(".*\.pt", fname)
             is_tokenizer = re.match("tokenizer.*\.model", fname)
             is_text = re.match(".*\.(txt|json|py|md)", fname) or is_tokenizer
@@ -134,6 +135,7 @@ def get_download_links_from_huggingface(model, branch):
                         has_pytorch = True
                         classifications.append('pytorch')
                     elif is_pt:
+                        has_pt = True
                         classifications.append('pt')
 
         cursor = base64.b64encode(f'{{"file_name":"{dict[-1]["path"]}"}}'.encode()) + b':50'
@@ -141,9 +143,9 @@ def get_download_links_from_huggingface(model, branch):
         cursor = cursor.replace(b'=', b'%3D')
 
     # If both pytorch and safetensors are available, download safetensors only
-    if has_pytorch and has_safetensors:
+    if (has_pytorch or has_pt) and has_safetensors:
         for i in range(len(classifications)-1, -1, -1):
-            if classifications[i] == 'pytorch':
+            if classifications[i] in ['pytorch', 'pt']:
                 links.pop(i)
 
     return links, is_lora

From 88ad86249d59b2984a99c7366e89728e8a6cc19e Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 28 Mar 2023 13:19:52 -0300
Subject: [PATCH 53/80] Remove unnecessary file

---
 training/formats/put-trainer-formats-here.txt | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 training/formats/put-trainer-formats-here.txt

diff --git a/training/formats/put-trainer-formats-here.txt b/training/formats/put-trainer-formats-here.txt
deleted file mode 100644
index e69de29b..00000000

From cac577d99f3ebf864ec8d4701211ec94cf32c4fa Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 28 Mar 2023 13:25:58 -0300
Subject: [PATCH 54/80] Fix interface reloading

---
 server.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/server.py b/server.py
index 4d9ee5e8..6023451b 100644
--- a/server.py
+++ b/server.py
@@ -494,7 +494,7 @@ def create_interface():
             shared.gradio['reset_interface'] = gr.Button("Apply and restart the interface", type="primary")
 
             shared.gradio['reset_interface'].click(set_interface_arguments, [shared.gradio[k] for k in ['interface_modes_menu', 'extensions_menu', 'cmd_arguments_menu']], None)
-            shared.gradio['reset_interface'].click(lambda : None, None, None, _js='() => {document.body.innerHTML=\'<h1 style="font-family:monospace;margin-top:20%;color:lightgray;text-align:center;">Reloading...</h1>\'; setTimeout(function(){location.reload()},2500)}')
+            shared.gradio['reset_interface'].click(lambda : None, None, None, _js='() => {document.body.innerHTML=\'<h1 style="font-family:monospace;margin-top:20%;color:lightgray;text-align:center;">Reloading...</h1>\'; setTimeout(function(){location.reload()},2500); return []}')
 
         if shared.args.extensions is not None:
             extensions_module.create_extensions_block()

From c8207d474f9c5365ab5a1c269eb71bff05a31988 Mon Sep 17 00:00:00 2001
From: Maya Eary <maya@pabloader.ru>
Date: Tue, 28 Mar 2023 20:38:55 +0300
Subject: [PATCH 55/80] Generalized load_quantized

---
 modules/GPTQ_loader.py | 54 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 44 insertions(+), 10 deletions(-)

diff --git a/modules/GPTQ_loader.py b/modules/GPTQ_loader.py
index afb5695f..351d658d 100644
--- a/modules/GPTQ_loader.py
+++ b/modules/GPTQ_loader.py
@@ -4,13 +4,48 @@ from pathlib import Path
 
 import accelerate
 import torch
+import transformers
+from transformers import AutoConfig, AutoModelForCausalLM 
 
 import modules.shared as shared
 
 sys.path.insert(0, str(Path("repositories/GPTQ-for-LLaMa")))
-import llama
 import llama_inference_offload
-import opt
+from quant import make_quant
+from modelutils import find_layers
+
+def _load_quant(model, checkpoint, wbits, groupsize=-1, faster_kernel=False, exclude_layers=['lm_head']):
+    config = AutoConfig.from_pretrained(model)
+    def noop(*args, **kwargs):
+        pass
+    torch.nn.init.kaiming_uniform_ = noop 
+    torch.nn.init.uniform_ = noop 
+    torch.nn.init.normal_ = noop 
+
+    torch.set_default_dtype(torch.half)
+    transformers.modeling_utils._init_weights = False
+    torch.set_default_dtype(torch.half)
+    model = AutoModelForCausalLM.from_config(config)
+    torch.set_default_dtype(torch.float)
+    model = model.eval()
+    layers = find_layers(model)
+    for name in exclude_layers:
+        if name in layers:
+            del layers[name]
+    make_quant(model, layers, wbits, groupsize, faster=faster_kernel)
+
+    del layers
+    
+    print('Loading model ...')
+    if checkpoint.endswith('.safetensors'):
+        from safetensors.torch import load_file as safe_load
+        model.load_state_dict(safe_load(checkpoint))
+    else:
+        model.load_state_dict(torch.load(checkpoint))
+    model.seqlen = 2048
+    print('Done.')
+
+    return model
 
 
 def load_quantized(model_name):
@@ -20,6 +55,8 @@ def load_quantized(model_name):
             model_type = 'llama'
         elif model_name.lower().startswith(('opt', 'galactica')):
             model_type = 'opt'
+        elif model_name.lower().startswith(('gpt-j', 'pygmalion-6b')):
+            model_type = 'gptj'
         else:
             print("Can't determine model type from model name. Please specify it manually using --model_type "
                   "argument")
@@ -27,15 +64,12 @@ def load_quantized(model_name):
     else:
         model_type = shared.args.model_type.lower()
 
-    if model_type == 'llama':
-        if not shared.args.pre_layer:
-            load_quant = llama.load_quant
-        else:
-            load_quant = llama_inference_offload.load_quant
-    elif model_type == 'opt':
-        load_quant = opt.load_quant
+    if model_type == 'llama' and shared.args.pre_layer:
+        oad_quant = llama_inference_offload.load_quant
+    elif model_type in ('llama', 'opt', 'gptj'):
+        load_quant = _load_quant
     else:
-        print("Unknown pre-quantized model type specified. Only 'llama' and 'opt' are supported")
+        print("Unknown pre-quantized model type specified. Only 'llama', 'opt' and 'gptj' are supported")
         exit()
 
     # Now we are going to try to locate the quantized model file.

From 1c075d8d219b5fd2bfeba1b4bad8f912b22a26da Mon Sep 17 00:00:00 2001
From: Maya Eary <maya@pabloader.ru>
Date: Tue, 28 Mar 2023 20:43:50 +0300
Subject: [PATCH 56/80] Fix typo

---
 modules/GPTQ_loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/GPTQ_loader.py b/modules/GPTQ_loader.py
index 351d658d..1fdd23c0 100644
--- a/modules/GPTQ_loader.py
+++ b/modules/GPTQ_loader.py
@@ -65,7 +65,7 @@ def load_quantized(model_name):
         model_type = shared.args.model_type.lower()
 
     if model_type == 'llama' and shared.args.pre_layer:
-        oad_quant = llama_inference_offload.load_quant
+        load_quant = llama_inference_offload.load_quant
     elif model_type in ('llama', 'opt', 'gptj'):
         load_quant = _load_quant
     else:

From d1377c37af2bb29c97d06ec996b5a0a66010d005 Mon Sep 17 00:00:00 2001
From: Maya Eary <maya@pabloader.ru>
Date: Tue, 28 Mar 2023 20:57:16 +0300
Subject: [PATCH 57/80] Fixes for api server - chat mode and integer
 temperature

---
 extensions/api/script.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/extensions/api/script.py b/extensions/api/script.py
index bd7c1900..dd48f58f 100644
--- a/extensions/api/script.py
+++ b/extensions/api/script.py
@@ -43,14 +43,14 @@ class Handler(BaseHTTPRequestHandler):
 
             generator = generate_reply(
                 question = prompt, 
-                max_new_tokens = body.get('max_length', 200), 
+                max_new_tokens = int(body.get('max_length', 200)), 
                 do_sample=True, 
-                temperature=body.get('temperature', 0.5), 
-                top_p=body.get('top_p', 1), 
-                typical_p=body.get('typical', 1), 
-                repetition_penalty=body.get('rep_pen', 1.1), 
+                temperature=float(body.get('temperature', 0.5)), 
+                top_p=float(body.get('top_p', 1)), 
+                typical_p=float(body.get('typical', 1)), 
+                repetition_penalty=float(body.get('rep_pen', 1.1)), 
                 encoder_repetition_penalty=1, 
-                top_k=body.get('top_k', 0), 
+                top_k=int(body.get('top_k', 0)), 
                 min_length=0, 
                 no_repeat_ngram_size=0, 
                 num_beams=1, 
@@ -62,7 +62,10 @@ class Handler(BaseHTTPRequestHandler):
 
             answer = ''
             for a in generator:
-                answer = a[0]
+                if isinstance(a, str):
+                    answer = a
+                else:
+                    answer = a[0]
 
             response = json.dumps({
                 'results': [{

From 41ec682834de3e7b79cd8e27aeec98690bc209ac Mon Sep 17 00:00:00 2001
From: Maya Eary <maya@pabloader.ru>
Date: Tue, 28 Mar 2023 22:45:38 +0300
Subject: [PATCH 58/80] Disable kernel threshold for gpt-j

---
 modules/GPTQ_loader.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/modules/GPTQ_loader.py b/modules/GPTQ_loader.py
index 1fdd23c0..2a9039a3 100644
--- a/modules/GPTQ_loader.py
+++ b/modules/GPTQ_loader.py
@@ -14,7 +14,7 @@ import llama_inference_offload
 from quant import make_quant
 from modelutils import find_layers
 
-def _load_quant(model, checkpoint, wbits, groupsize=-1, faster_kernel=False, exclude_layers=['lm_head']):
+def _load_quant(model, checkpoint, wbits, groupsize=-1, faster_kernel=False, exclude_layers=['lm_head'], kernel_switch_threshold=128):
     config = AutoConfig.from_pretrained(model)
     def noop(*args, **kwargs):
         pass
@@ -32,7 +32,7 @@ def _load_quant(model, checkpoint, wbits, groupsize=-1, faster_kernel=False, exc
     for name in exclude_layers:
         if name in layers:
             del layers[name]
-    make_quant(model, layers, wbits, groupsize, faster=faster_kernel)
+    make_quant(model, layers, wbits, groupsize, faster=faster_kernel, kernel_switch_threshold=kernel_switch_threshold)
 
     del layers
     
@@ -109,7 +109,8 @@ def load_quantized(model_name):
     if shared.args.pre_layer:
         model = load_quant(str(path_to_model), str(pt_path), shared.args.wbits, shared.args.groupsize, shared.args.pre_layer)
     else:
-        model = load_quant(str(path_to_model), str(pt_path), shared.args.wbits, shared.args.groupsize)
+        threshold = False if model_type == 'gptj' else 128
+        model = load_quant(str(path_to_model), str(pt_path), shared.args.wbits, shared.args.groupsize, kernel_switch_threshold=threshold)
 
         # accelerate offload (doesn't work properly)
         if shared.args.gpu_memory:

From 0bec15ebcd1571155a54e87b371dc40534864f2e Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 28 Mar 2023 17:34:15 -0300
Subject: [PATCH 59/80] Reorder imports

---
 modules/GPTQ_loader.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/modules/GPTQ_loader.py b/modules/GPTQ_loader.py
index 2a9039a3..c99a63f3 100644
--- a/modules/GPTQ_loader.py
+++ b/modules/GPTQ_loader.py
@@ -5,14 +5,15 @@ from pathlib import Path
 import accelerate
 import torch
 import transformers
-from transformers import AutoConfig, AutoModelForCausalLM 
+from transformers import AutoConfig, AutoModelForCausalLM
 
 import modules.shared as shared
 
 sys.path.insert(0, str(Path("repositories/GPTQ-for-LLaMa")))
 import llama_inference_offload
-from quant import make_quant
 from modelutils import find_layers
+from quant import make_quant
+
 
 def _load_quant(model, checkpoint, wbits, groupsize=-1, faster_kernel=False, exclude_layers=['lm_head'], kernel_switch_threshold=128):
     config = AutoConfig.from_pretrained(model)

From 010b259dde859b5703a6ea4cf2ea6c0aa4f25343 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 28 Mar 2023 17:46:00 -0300
Subject: [PATCH 60/80] Update documentation

---
 README.md              | 2 +-
 modules/GPTQ_loader.py | 1 -
 modules/shared.py      | 2 +-
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index f6b1d4f5..ba386852 100644
--- a/README.md
+++ b/README.md
@@ -177,7 +177,7 @@ Optionally, you can use the following command-line flags:
 | `--cpu`          | Use the CPU to generate text.|
 | `--load-in-8bit` | Load the model with 8-bit precision.|
 | `--wbits WBITS`            | GPTQ: Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported. |
-| `--model_type MODEL_TYPE`  | GPTQ: Model type of pre-quantized model. Currently only LLaMA and OPT are supported. |
+| `--model_type MODEL_TYPE`  | GPTQ: Model type of pre-quantized model. Currently LLaMA, OPT, and GPT-J are supported. |
 | `--groupsize GROUPSIZE`    | GPTQ: Group size. |
 | `--pre_layer PRE_LAYER`    | GPTQ: The number of layers to preload. |
 | `--bf16`         | Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU. |
diff --git a/modules/GPTQ_loader.py b/modules/GPTQ_loader.py
index c99a63f3..7926d0ab 100644
--- a/modules/GPTQ_loader.py
+++ b/modules/GPTQ_loader.py
@@ -48,7 +48,6 @@ def _load_quant(model, checkpoint, wbits, groupsize=-1, faster_kernel=False, exc
 
     return model
 
-
 def load_quantized(model_name):
     if not shared.args.model_type:
         # Try to determine model type from model name
diff --git a/modules/shared.py b/modules/shared.py
index ac9d750c..5d1b42d4 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -84,7 +84,7 @@ parser.add_argument('--gptq-bits', type=int, default=0, help='DEPRECATED: use --
 parser.add_argument('--gptq-model-type', type=str, help='DEPRECATED: use --model_type instead.')
 parser.add_argument('--gptq-pre-layer', type=int, default=0, help='DEPRECATED: use --pre_layer instead.')
 parser.add_argument('--wbits', type=int, default=0, help='GPTQ: Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported.')
-parser.add_argument('--model_type', type=str, help='GPTQ: Model type of pre-quantized model. Currently only LLaMA and OPT are supported.')
+parser.add_argument('--model_type', type=str, help='GPTQ: Model type of pre-quantized model. Currently LLaMA, OPT, and GPT-J are supported.')
 parser.add_argument('--groupsize', type=int, default=-1, help='GPTQ: Group size.')
 parser.add_argument('--pre_layer', type=int, default=0, help='GPTQ: The number of layers to preload.')
 

From 4d8e10100686a680026f76e9854be90ef279a797 Mon Sep 17 00:00:00 2001
From: Nikita Skakun <nikita@skakun-family.com>
Date: Tue, 28 Mar 2023 14:24:23 -0700
Subject: [PATCH 61/80] Refactor download process to use multiprocessing

The previous implementation used threads to download files in parallel, which could lead to performance issues due to the Global Interpreter Lock (GIL).
This commit refactors the download process to use multiprocessing instead,
which allows for true parallelism across multiple CPUs.
This results in significantly faster downloads, particularly for large models.
---
 download-model.py | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/download-model.py b/download-model.py
index dce7e749..48ae449e 100644
--- a/download-model.py
+++ b/download-model.py
@@ -17,13 +17,6 @@ from pathlib import Path
 import requests
 import tqdm
 
-parser = argparse.ArgumentParser()
-parser.add_argument('MODEL', type=str, default=None, nargs='?')
-parser.add_argument('--branch', type=str, default='main', help='Name of the Git branch to download from.')
-parser.add_argument('--threads', type=int, default=1, help='Number of files to download simultaneously.')
-parser.add_argument('--text-only', action='store_true', help='Only download text files (txt/json).')
-args = parser.parse_args()
-
 def get_file(args):
     url = args[0]
     output_folder = args[1]
@@ -150,7 +143,22 @@ def get_download_links_from_huggingface(model, branch):
 
     return links, is_lora
 
+def download_files(file_list, output_folder, num_processes=8):
+    with multiprocessing.Pool(processes=num_processes) as pool:
+        args = [(url, output_folder, idx+1, len(file_list)) for idx, url in enumerate(file_list)]
+        for _ in tqdm.tqdm(pool.imap_unordered(get_file, args), total=len(args)):
+            pass
+        pool.close()
+        pool.join()
+
 if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('MODEL', type=str, default=None, nargs='?')
+    parser.add_argument('--branch', type=str, default='main', help='Name of the Git branch to download from.')
+    parser.add_argument('--threads', type=int, default=1, help='Number of files to download simultaneously.')
+    parser.add_argument('--text-only', action='store_true', help='Only download text files (txt/json).')
+    args = parser.parse_args()
+
     model = args.MODEL
     branch = args.branch
     if model is None:
@@ -179,7 +187,4 @@ if __name__ == '__main__':
 
     # Downloading the files
     print(f"Downloading the model to {output_folder}")
-    pool = multiprocessing.Pool(processes=args.threads)
-    results = pool.map(get_file, [[links[i], output_folder, i+1, len(links)] for i in range(len(links))])
-    pool.close()
-    pool.join()
+    download_files(links, output_folder, num_processes=args.threads)

From 304f812c637f5494e6c42d296040f0506d9194a1 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 28 Mar 2023 19:20:50 -0300
Subject: [PATCH 62/80] Gracefully handle CUDA out of memory errors with
 streaming

---
 modules/callbacks.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/modules/callbacks.py b/modules/callbacks.py
index d85f406d..aa92f9cb 100644
--- a/modules/callbacks.py
+++ b/modules/callbacks.py
@@ -1,4 +1,5 @@
 import gc
+import traceback
 from queue import Queue
 from threading import Thread
 
@@ -63,6 +64,10 @@ class Iteratorize:
                 ret = self.mfunc(callback=_callback, **self.kwargs)
             except ValueError:
                 pass
+            except:
+                traceback.print_exc()
+                pass
+
             clear_torch_cache()
             self.q.put(self.sentinel)
             if self.c_callback:

From ff515ec2fe693cee7ea1d86d5e3f5bf0397aca2f Mon Sep 17 00:00:00 2001
From: Nikita Skakun <nikita@skakun-family.com>
Date: Tue, 28 Mar 2023 18:29:20 -0700
Subject: [PATCH 63/80] Improve progress bar visual style

This commit reverts the performance improvements of the previous commit for for improved visual style of multithreaded progress bars. The style of the progress bar has been modified to take up the same amount of size to align them.
---
 download-model.py | 31 ++++++++++---------------------
 1 file changed, 10 insertions(+), 21 deletions(-)

diff --git a/download-model.py b/download-model.py
index 48ae449e..2954f4b1 100644
--- a/download-model.py
+++ b/download-model.py
@@ -16,23 +16,17 @@ from pathlib import Path
 
 import requests
 import tqdm
+from tqdm.contrib.concurrent import thread_map
 
-def get_file(args):
-    url = args[0]
-    output_folder = args[1]
-    idx = args[2]
-    tot = args[3]
-
-    print(f"Downloading file {idx} of {tot}...")
+def get_file(url, output_folder):
     r = requests.get(url, stream=True)
-    with open(output_folder / Path(url.split('/')[-1]), 'wb') as f:
+    with open(output_folder / Path(url.rsplit('/', 1)[1]), 'wb') as f:
         total_size = int(r.headers.get('content-length', 0))
         block_size = 1024
-        t = tqdm.tqdm(total=total_size, unit='iB', unit_scale=True)
-        for data in r.iter_content(block_size):
-            t.update(len(data))
-            f.write(data)
-        t.close()
+        with tqdm.tqdm(total=total_size, unit='iB', unit_scale=True, bar_format='{l_bar}{bar}| {n_fmt:6}/{total_fmt:6} {rate_fmt:6}') as t:
+            for data in r.iter_content(block_size):
+                t.update(len(data))
+                f.write(data)
 
 def sanitize_branch_name(branch_name):
     pattern = re.compile(r"^[a-zA-Z0-9._-]+$")
@@ -143,13 +137,8 @@ def get_download_links_from_huggingface(model, branch):
 
     return links, is_lora
 
-def download_files(file_list, output_folder, num_processes=8):
-    with multiprocessing.Pool(processes=num_processes) as pool:
-        args = [(url, output_folder, idx+1, len(file_list)) for idx, url in enumerate(file_list)]
-        for _ in tqdm.tqdm(pool.imap_unordered(get_file, args), total=len(args)):
-            pass
-        pool.close()
-        pool.join()
+def download_files(file_list, output_folder, num_threads=8):
+    thread_map(lambda url: get_file(url, output_folder), file_list, max_workers=num_threads, verbose=False)
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
@@ -187,4 +176,4 @@ if __name__ == '__main__':
 
     # Downloading the files
     print(f"Downloading the model to {output_folder}")
-    download_files(links, output_folder, num_processes=args.threads)
+    download_files(links, output_folder, args.threads)

From aaa218a10216483b48cec068d73a1f891efb55ec Mon Sep 17 00:00:00 2001
From: Nikita Skakun <nikita@skakun-family.com>
Date: Tue, 28 Mar 2023 18:32:49 -0700
Subject: [PATCH 64/80] Remove unused import.

---
 download-model.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/download-model.py b/download-model.py
index 2954f4b1..a2d3a6d6 100644
--- a/download-model.py
+++ b/download-model.py
@@ -9,7 +9,6 @@ python download-model.py facebook/opt-1.3b
 import argparse
 import base64
 import json
-import multiprocessing
 import re
 import sys
 from pathlib import Path

From 1edfb9677840b03ce321a450aed87961af24a361 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 28 Mar 2023 23:27:02 -0300
Subject: [PATCH 65/80] Fix loading extensions from within the interface

---
 modules/extensions.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/modules/extensions.py b/modules/extensions.py
index c3cf4de4..fe6a3945 100644
--- a/modules/extensions.py
+++ b/modules/extensions.py
@@ -7,7 +7,7 @@ import modules.shared as shared
 
 state = {}
 available_extensions = []
-setup_called = False
+setup_called = set()
 
 def load_extensions():
     global state
@@ -53,13 +53,12 @@ def create_extensions_block():
     should_display_ui = False
 
     # Running setup function
-    if not setup_called:
-        for extension, name in iterator():
-            if hasattr(extension, "setup"):
-                extension.setup()
-            if hasattr(extension, "ui"):
-                should_display_ui = True
-        setup_called = True
+    for extension, name in iterator():
+        if hasattr(extension, "ui"):
+            should_display_ui = True
+        if extension not in setup_called and hasattr(extension, "setup"):
+            setup_called.add(extension)
+            extension.setup()
 
     # Creating the extension ui elements
     if should_display_ui:

From c2a863f87deee8b9a314e3c58d93b6b2703cf0d9 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 29 Mar 2023 02:11:51 -0300
Subject: [PATCH 66/80] Mention the updated one-click installer

---
 README.md | 41 +++++++++++++++++++++++------------------
 1 file changed, 23 insertions(+), 18 deletions(-)

diff --git a/README.md b/README.md
index ba386852..241d0e03 100644
--- a/README.md
+++ b/README.md
@@ -36,7 +36,28 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.
 
 ## Installation
 
-The recommended installation methods are the following:
+### One-click installers
+
+[oobabooga-windows.zip](https://github.com/oobabooga/text-generation-webui/releases/download/installers/oobabooga-windows.zip)
+
+Just download the zip above, extract it, and double click on "install". The web UI and all its dependencies will be installed in the same folder.
+
+* To download a model, double click on "download-model"
+* To start the web UI, double click on "start-webui" 
+
+Source codes: https://github.com/oobabooga/one-click-installers
+
+> **Note**
+> 
+> Thanks to [@jllllll](https://github.com/jllllll) and [@ClayShoaf](https://github.com/ClayShoaf), the Windows 1-click installer now sets up 8-bit and 4-bit requirements out of the box. No additional installation steps are necessary.
+
+> **Note**
+> 
+> There is no need to run the installer as admin.
+
+### Manual installation using Conda
+
+These are the recommended installation methods:
 
 * Linux and MacOS: using conda natively.
 * Windows: using conda on WSL ([WSL installation guide](https://github.com/oobabooga/text-generation-webui/wiki/Windows-Subsystem-for-Linux-(Ubuntu)-Installation-Guide)).
@@ -84,24 +105,8 @@ pip install -r requirements.txt
 > 
 > For bitsandbytes and `--load-in-8bit` to work on Linux/WSL, this dirty fix is currently necessary: https://github.com/oobabooga/text-generation-webui/issues/400#issuecomment-1474876859
 
-### Alternative: one-click installers
 
-[oobabooga-windows.zip](https://github.com/oobabooga/one-click-installers/archive/refs/heads/oobabooga-windows.zip)
-
-[oobabooga-linux.zip](https://github.com/oobabooga/one-click-installers/archive/refs/heads/oobabooga-linux.zip)
-
-Just download the zip above, extract it, and double click on "install". The web UI and all its dependencies will be installed in the same folder.
-
-* To download a model, double click on "download-model"
-* To start the web UI, double click on "start-webui" 
-
-Source codes: https://github.com/oobabooga/one-click-installers
-
-> **Note**
-> 
-> To get 8-bit and 4-bit models working in your 1-click Windows installation, you can use the [one-click-bandaid](https://github.com/ClayShoaf/oobabooga-one-click-bandaid).
-
-### Alternative: native Windows installation
+### Alternative: manual Windows installation
 
 As an alternative to the recommended WSL method, you can install the web UI natively on Windows using this guide. It will be a lot harder and the performance may be slower: [Installation instructions for human beings](https://github.com/oobabooga/text-generation-webui/wiki/Installation-instructions-for-human-beings).
 

From 5d0b83c341804bcdffe73d8876468012a2edc78b Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 29 Mar 2023 02:22:19 -0300
Subject: [PATCH 67/80] Update README.md

---
 README.md | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 241d0e03..965c9d15 100644
--- a/README.md
+++ b/README.md
@@ -57,10 +57,9 @@ Source codes: https://github.com/oobabooga/one-click-installers
 
 ### Manual installation using Conda
 
-These are the recommended installation methods:
+Recommended if you have some experience with the command-line.
 
-* Linux and MacOS: using conda natively.
-* Windows: using conda on WSL ([WSL installation guide](https://github.com/oobabooga/text-generation-webui/wiki/Windows-Subsystem-for-Linux-(Ubuntu)-Installation-Guide)).
+On Windows, I additionally recommend carrying out the installation on WSL instead of the base system: [WSL installation guide](https://github.com/oobabooga/text-generation-webui/wiki/Windows-Subsystem-for-Linux-(Ubuntu)-Installation-Guide).
 
 Conda can be downloaded here: https://docs.conda.io/en/latest/miniconda.html
 

From 3b4447a4fe2ef7c99322a626b750ea1aa43083e8 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 29 Mar 2023 02:24:11 -0300
Subject: [PATCH 68/80] Update README.md

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index 965c9d15..87367877 100644
--- a/README.md
+++ b/README.md
@@ -61,6 +61,8 @@ Recommended if you have some experience with the command-line.
 
 On Windows, I additionally recommend carrying out the installation on WSL instead of the base system: [WSL installation guide](https://github.com/oobabooga/text-generation-webui/wiki/Windows-Subsystem-for-Linux-(Ubuntu)-Installation-Guide).
 
+#### 0. Install Conda
+
 Conda can be downloaded here: https://docs.conda.io/en/latest/miniconda.html
 
 On Linux or WSL, it can be automatically installed with these two commands:

From 41b58bc47e84458b880386e57d0d17e2bfe6f76c Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 29 Mar 2023 11:02:29 -0300
Subject: [PATCH 69/80] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 87367877..97f26ccb 100644
--- a/README.md
+++ b/README.md
@@ -109,7 +109,7 @@ pip install -r requirements.txt
 
 ### Alternative: manual Windows installation
 
-As an alternative to the recommended WSL method, you can install the web UI natively on Windows using this guide. It will be a lot harder and the performance may be slower: [Installation instructions for human beings](https://github.com/oobabooga/text-generation-webui/wiki/Installation-instructions-for-human-beings).
+As an alternative to the recommended WSL method, you can install the web UI natively on Windows using this guide. It will be a lot harder and the performance may be slower: [Windows installation guide](https://github.com/oobabooga/text-generation-webui/wiki/Windows-installation-guide).
 
 ### Alternative: Docker
 

From a6d03730639463eb261b40ec5dad380f5df791ed Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 29 Mar 2023 11:48:17 -0300
Subject: [PATCH 70/80] Fix training dataset loading #636

---
 modules/training.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/modules/training.py b/modules/training.py
index 7bcecb38..913866d9 100644
--- a/modules/training.py
+++ b/modules/training.py
@@ -119,7 +119,7 @@ def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int
         }
 
     # == Prep the dataset, format, etc ==
-    if raw_text_file is not None:
+    if raw_text_file not in ['None', '']:
         print("Loading raw text file dataset...")
         with open(clean_path('training/datasets', f'{raw_text_file}.txt'), 'r') as file:
             raw_text = file.read()
@@ -136,16 +136,17 @@ def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int
         del text_chunks
 
     else:
-        with open(clean_path('training/formats', f'{format}.json'), 'r') as formatFile:
-            format_data: dict[str, str] = json.load(formatFile)
-
-        if dataset is None:
+        if dataset in ['None', '']:
             yield "**Missing dataset choice input, cannot continue.**"
             return
-        if format is None:
+
+        if format in ['None', '']:
             yield "**Missing format choice input, cannot continue.**"
             return
 
+        with open(clean_path('training/formats', f'{format}.json'), 'r') as formatFile:
+            format_data: dict[str, str] = json.load(formatFile)
+
         def generate_prompt(data_point: dict[str, str]):
             for options, data in format_data.items():
                 if set(options.split(',')) == set(x[0] for x in data_point.items() if len(x[1].strip()) > 0):

From 58349f44a0924671e65de7cb42764fb846653afe Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 29 Mar 2023 11:55:34 -0300
Subject: [PATCH 71/80] Handle training exception for unsupported models

---
 modules/training.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/modules/training.py b/modules/training.py
index 913866d9..62ba181c 100644
--- a/modules/training.py
+++ b/modules/training.py
@@ -2,6 +2,7 @@ import json
 import sys
 import threading
 import time
+import traceback
 from pathlib import Path
 
 import gradio as gr
@@ -184,7 +185,13 @@ def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int
         bias="none",
         task_type="CAUSAL_LM"
     )
-    lora_model = get_peft_model(shared.model, config)
+
+    try:
+        lora_model = get_peft_model(shared.model, config)
+    except:
+        yield traceback.format_exc()
+        return
+
     trainer = transformers.Trainer(
         model=lora_model,
         train_dataset=train_data,

From 1445ea86f7c2a0c8e3f88337ab15d4e076accc70 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 29 Mar 2023 20:26:44 -0300
Subject: [PATCH 72/80] Add --output and better metadata for downloading models

---
 download-model.py               | 21 +++++++++++++++++----
 loras/place-your-loras-here.txt |  0
 2 files changed, 17 insertions(+), 4 deletions(-)
 delete mode 100644 loras/place-your-loras-here.txt

diff --git a/download-model.py b/download-model.py
index dce7e749..05d9dca4 100644
--- a/download-model.py
+++ b/download-model.py
@@ -8,6 +8,7 @@ python download-model.py facebook/opt-1.3b
 
 import argparse
 import base64
+import datetime
 import json
 import multiprocessing
 import re
@@ -22,6 +23,7 @@ parser.add_argument('MODEL', type=str, default=None, nargs='?')
 parser.add_argument('--branch', type=str, default='main', help='Name of the Git branch to download from.')
 parser.add_argument('--threads', type=int, default=1, help='Number of files to download simultaneously.')
 parser.add_argument('--text-only', action='store_true', help='Only download text files (txt/json).')
+parser.add_argument('--output', type=str, default=None, help='The folder where the model should be saved.')
 args = parser.parse_args()
 
 def get_file(args):
@@ -169,13 +171,24 @@ if __name__ == '__main__':
                 sys.exit()
 
     links, is_lora = get_download_links_from_huggingface(model, branch)
-    base_folder = 'models' if not is_lora else 'loras'
-    if branch != 'main':
-        output_folder = Path(base_folder) / (model.split('/')[-1] + f'_{branch}')
+
+    if args.output is not None:
+        base_folder = args.output
     else:
-        output_folder = Path(base_folder) / model.split('/')[-1]
+        base_folder = 'models' if not is_lora else 'loras'
+
+    output_folder = f"{'_'.join(model.split('/')[-2:])}"
+    if branch != 'main':
+        output_folder += f'_{branch}'
+
+    # Creating the folder and writing the metadata
+    output_folder = Path(base_folder) / output_folder
     if not output_folder.exists():
         output_folder.mkdir()
+    with open(output_folder / 'huggingface-metadata.txt', 'w') as f:
+        f.write(f'url: https://huggingface.co/{model}\n')
+        f.write(f'branch: {branch}\n')
+        f.write(f'download date: {str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))}\n')
 
     # Downloading the files
     print(f"Downloading the model to {output_folder}")
diff --git a/loras/place-your-loras-here.txt b/loras/place-your-loras-here.txt
deleted file mode 100644
index e69de29b..00000000

From 37754164eb44338e9f9bf7642a49cc6f0a9802b9 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 29 Mar 2023 20:47:36 -0300
Subject: [PATCH 73/80] Move argparse

---
 download-model.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/download-model.py b/download-model.py
index f67055ba..dc6f3a9d 100644
--- a/download-model.py
+++ b/download-model.py
@@ -149,13 +149,6 @@ def download_files(file_list, output_folder, num_threads=8):
     thread_map(lambda url: get_file(url, output_folder), file_list, max_workers=num_threads, verbose=False)
 
 if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('MODEL', type=str, default=None, nargs='?')
-    parser.add_argument('--branch', type=str, default='main', help='Name of the Git branch to download from.')
-    parser.add_argument('--threads', type=int, default=1, help='Number of files to download simultaneously.')
-    parser.add_argument('--text-only', action='store_true', help='Only download text files (txt/json).')
-    args = parser.parse_args()
-
     model = args.MODEL
     branch = args.branch
     if model is None:

From 0345e042492d25d907b592bdce6cee47eebd2d0c Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 29 Mar 2023 21:17:48 -0300
Subject: [PATCH 74/80] Fix "Unknown argument(s): {'verbose': False}"

---
 download-model.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/download-model.py b/download-model.py
index dc6f3a9d..6f0751d8 100644
--- a/download-model.py
+++ b/download-model.py
@@ -146,7 +146,7 @@ def get_download_links_from_huggingface(model, branch):
     return links, is_lora
 
 def download_files(file_list, output_folder, num_threads=8):
-    thread_map(lambda url: get_file(url, output_folder), file_list, max_workers=num_threads, verbose=False)
+    thread_map(lambda url: get_file(url, output_folder), file_list, max_workers=num_threads)
 
 if __name__ == '__main__':
     model = args.MODEL
@@ -189,3 +189,4 @@ if __name__ == '__main__':
     # Downloading the files
     print(f"Downloading the model to {output_folder}")
     download_files(links, output_folder, args.threads)
+    print()

From 1cb9246160bafca2599b20b69e7c4e9afff410e6 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 29 Mar 2023 21:47:36 -0300
Subject: [PATCH 75/80] Adapt to the new model names

---
 modules/GPTQ_loader.py     |  7 ++++---
 modules/models.py          |  4 ++--
 modules/shared.py          |  4 ----
 modules/text_generation.py |  6 +++---
 server.py                  | 13 ++++++-------
 settings-template.json     |  9 +++------
 6 files changed, 18 insertions(+), 25 deletions(-)

diff --git a/modules/GPTQ_loader.py b/modules/GPTQ_loader.py
index 7926d0ab..e7877de7 100644
--- a/modules/GPTQ_loader.py
+++ b/modules/GPTQ_loader.py
@@ -51,11 +51,12 @@ def _load_quant(model, checkpoint, wbits, groupsize=-1, faster_kernel=False, exc
 def load_quantized(model_name):
     if not shared.args.model_type:
         # Try to determine model type from model name
-        if model_name.lower().startswith(('llama', 'alpaca')):
+        name = model_name.lower()
+        if any((k in name for k in ['llama', 'alpaca'])):
             model_type = 'llama'
-        elif model_name.lower().startswith(('opt', 'galactica')):
+        elif any((k in name for k in ['opt-', 'galactica'])):
             model_type = 'opt'
-        elif model_name.lower().startswith(('gpt-j', 'pygmalion-6b')):
+        elif any((k in name for k in ['gpt-j', 'pygmalion-6b'])):
             model_type = 'gptj'
         else:
             print("Can't determine model type from model name. Please specify it manually using --model_type "
diff --git a/modules/models.py b/modules/models.py
index a6839318..b19507db 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -41,7 +41,7 @@ def load_model(model_name):
     print(f"Loading {model_name}...")
     t0 = time.time()
 
-    shared.is_RWKV = model_name.lower().startswith('rwkv-')
+    shared.is_RWKV = 'rwkv-' in model_name.lower()
 
     # Default settings
     if not any([shared.args.cpu, shared.args.load_in_8bit, shared.args.wbits, shared.args.auto_devices, shared.args.disk, shared.args.gpu_memory is not None, shared.args.cpu_memory is not None, shared.args.deepspeed, shared.args.flexgen, shared.is_RWKV]):
@@ -159,7 +159,7 @@ def load_model(model_name):
         model = AutoModelForCausalLM.from_pretrained(checkpoint, **params)
 
     # Loading the tokenizer
-    if shared.model_name.lower().startswith(('gpt4chan', 'gpt-4chan', '4chan')) and Path(f"{shared.args.model_dir}/gpt-j-6B/").exists():
+    if any((k in shared.model_name.lower() for k in ['gpt4chan', 'gpt-4chan'])) and Path(f"{shared.args.model_dir}/gpt-j-6B/").exists():
         tokenizer = AutoTokenizer.from_pretrained(Path(f"{shared.args.model_dir}/gpt-j-6B/"))
     else:
         tokenizer = AutoTokenizer.from_pretrained(Path(f"{shared.args.model_dir}/{shared.model_name}/"))
diff --git a/modules/shared.py b/modules/shared.py
index 5d1b42d4..8bbf3b69 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -37,10 +37,6 @@ settings = {
     'chat_generation_attempts': 1,
     'chat_generation_attempts_min': 1,
     'chat_generation_attempts_max': 5,
-    'name1_pygmalion': 'You',
-    'name2_pygmalion': 'Kawaii',
-    'context_pygmalion': "Kawaii's persona: Kawaii is a cheerful person who loves to make others smile. She is an optimist who loves to spread happiness and positivity wherever she goes.\n<START>",
-    'stop_at_newline_pygmalion': False,
     'default_extensions': [],
     'chat_default_extensions': ["gallery"],
     'presets': {
diff --git a/modules/text_generation.py b/modules/text_generation.py
index 20a07ca3..7b5fcd6a 100644
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@@ -42,7 +42,7 @@ def encode(prompt, tokens_to_generate=0, add_special_tokens=True):
 
 def decode(output_ids):
     # Open Assistant relies on special tokens like <|endoftext|>
-    if re.match('(oasst|galactica)-*', shared.model_name.lower()):
+    if re.match('.*(oasst|galactica)-*', shared.model_name.lower()):
         return shared.tokenizer.decode(output_ids, skip_special_tokens=False)
     else:
         reply = shared.tokenizer.decode(output_ids, skip_special_tokens=True)
@@ -77,10 +77,10 @@ def fix_galactica(s):
 
 def formatted_outputs(reply, model_name):
     if not (shared.args.chat or shared.args.cai_chat):
-        if model_name.lower().startswith('galactica'):
+        if 'galactica' in model_name.lower():
             reply = fix_galactica(reply)
             return reply, reply, generate_basic_html(reply)
-        elif model_name.lower().startswith(('gpt4chan', 'gpt-4chan', '4chan')):
+        elif any((k in shared.model_name.lower() for k in ['gpt4chan', 'gpt-4chan'])):
             reply = fix_gpt4chan(reply)
             return reply, 'Only applicable for GALACTICA models.', generate_4chan_html(reply)
         else:
diff --git a/server.py b/server.py
index 6023451b..62a7ebfb 100644
--- a/server.py
+++ b/server.py
@@ -282,7 +282,6 @@ else:
     default_text = shared.settings['prompts'][next((k for k in shared.settings['prompts'] if re.match(k.lower(), shared.model_name.lower())), 'default')]
 title ='Text generation web UI'
 description = '\n\n# Text generation lab\nGenerate text using Large Language Models.\n'
-suffix = '_pygmalion' if 'pygmalion' in shared.model_name.lower() else ''
 
 def create_interface():
 
@@ -294,7 +293,7 @@ def create_interface():
         if shared.args.chat or shared.args.cai_chat:
             with gr.Tab("Text generation", elem_id="main"):
                 if shared.args.cai_chat:
-                    shared.gradio['display'] = gr.HTML(value=generate_chat_html(shared.history['visible'], shared.settings[f'name1{suffix}'], shared.settings[f'name2{suffix}'], shared.character))
+                    shared.gradio['display'] = gr.HTML(value=generate_chat_html(shared.history['visible'], shared.settings['name1'], shared.settings['name2'], shared.character))
                 else:
                     shared.gradio['display'] = gr.Chatbot(value=shared.history['visible']).style(color_map=("#326efd", "#212528"))
                 shared.gradio['textbox'] = gr.Textbox(label='Input')
@@ -314,9 +313,9 @@ def create_interface():
                     shared.gradio['Clear history-cancel'] = gr.Button('Cancel', visible=False)
 
             with gr.Tab("Character", elem_id="chat-settings"):
-                shared.gradio['name1'] = gr.Textbox(value=shared.settings[f'name1{suffix}'], lines=1, label='Your name')
-                shared.gradio['name2'] = gr.Textbox(value=shared.settings[f'name2{suffix}'], lines=1, label='Bot\'s name')
-                shared.gradio['context'] = gr.Textbox(value=shared.settings[f'context{suffix}'], lines=5, label='Context')
+                shared.gradio['name1'] = gr.Textbox(value=shared.settings['name1'], lines=1, label='Your name')
+                shared.gradio['name2'] = gr.Textbox(value=shared.settings['name2'], lines=1, label='Bot\'s name')
+                shared.gradio['context'] = gr.Textbox(value=shared.settings['context'], lines=5, label='Context')
                 with gr.Row():
                     shared.gradio['character_menu'] = gr.Dropdown(choices=available_characters, value='None', label='Character', elem_id='character-menu')
                     ui.create_refresh_button(shared.gradio['character_menu'], lambda : None, lambda : {'choices': get_available_characters()}, 'refresh-button')
@@ -354,7 +353,7 @@ def create_interface():
                             shared.gradio['chat_prompt_size_slider'] = gr.Slider(minimum=shared.settings['chat_prompt_size_min'], maximum=shared.settings['chat_prompt_size_max'], step=1, label='Maximum prompt size in tokens', value=shared.settings['chat_prompt_size'])
                         with gr.Column():
                             shared.gradio['chat_generation_attempts'] = gr.Slider(minimum=shared.settings['chat_generation_attempts_min'], maximum=shared.settings['chat_generation_attempts_max'], value=shared.settings['chat_generation_attempts'], step=1, label='Generation attempts (for longer replies)')
-                            shared.gradio['check'] = gr.Checkbox(value=shared.settings[f'stop_at_newline{suffix}'], label='Stop generating at new line character?')
+                            shared.gradio['check'] = gr.Checkbox(value=shared.settings['stop_at_newline'], label='Stop generating at new line character?')
 
                 create_settings_menus(default_preset)
 
@@ -401,7 +400,7 @@ def create_interface():
             shared.gradio['Stop'].click(reload_func, reload_inputs, [shared.gradio['display']])
 
             shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.main_js+ui.chat_js}}}")
-            shared.gradio['interface'].load(lambda : chat.load_default_history(shared.settings[f'name1{suffix}'], shared.settings[f'name2{suffix}']), None, None)
+            shared.gradio['interface'].load(lambda : chat.load_default_history(shared.settings['name1'], shared.settings['name2']), None, None)
             shared.gradio['interface'].load(reload_func, reload_inputs, [shared.gradio['display']], show_progress=True)
 
         elif shared.args.notebook:
diff --git a/settings-template.json b/settings-template.json
index 79fd5023..2a2aaed9 100644
--- a/settings-template.json
+++ b/settings-template.json
@@ -12,10 +12,6 @@
     "chat_generation_attempts": 1,
     "chat_generation_attempts_min": 1,
     "chat_generation_attempts_max": 5,
-    "name1_pygmalion": "You",
-    "name2_pygmalion": "Kawaii",
-    "context_pygmalion": "Kawaii's persona: Kawaii is a cheerful person who loves to make others smile. She is an optimist who loves to spread happiness and positivity wherever she goes.\n<START>",
-    "stop_at_newline_pygmalion": false,
     "default_extensions": [],
     "chat_default_extensions": [
         "gallery"
@@ -29,10 +25,11 @@
         "default": "Common sense questions and answers\n\nQuestion: \nFactual answer:",
         "^(gpt4chan|gpt-4chan|4chan)": "-----\n--- 865467536\nInput text\n--- 865467537\n",
         "(rosey|chip|joi)_.*_instruct.*": "User: \n",
-        "oasst-*": "<|prompter|>Write a story about future of AI development<|endoftext|><|assistant|>"
+        "oasst-*": "<|prompter|>Write a story about future of AI development<|endoftext|><|assistant|>",
+        "alpaca-*": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction:\nWrite a poem about the transformers Python library. \nMention the word \"large language models\" in that poem.\n### Response:\n"
     },
     "lora_prompts": {
         "default": "Common sense questions and answers\n\nQuestion: \nFactual answer:",
-        "alpaca-lora-7b": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction:\nWrite a poem about the transformers Python library. \nMention the word \"large language models\" in that poem.\n### Response:\n"
+        "(alpaca-lora-7b|alpaca-lora-13b|alpaca-lora-30b)": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction:\nWrite a poem about the transformers Python library. \nMention the word \"large language models\" in that poem.\n### Response:\n"
     }
 }

From 55755e27b9ddb98e48d5b2dba08c3fb728d7d680 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 29 Mar 2023 22:40:04 -0300
Subject: [PATCH 76/80] Don't hardcode prompts in the settings dict/json

---
 modules/shared.py      | 17 ++++++++---------
 prompts/GPT-4chan.txt  |  6 ++++++
 server.py              | 16 ++++++++--------
 settings-template.json | 17 ++++++++---------
 4 files changed, 30 insertions(+), 26 deletions(-)
 create mode 100644 prompts/GPT-4chan.txt

diff --git a/modules/shared.py b/modules/shared.py
index 8bbf3b69..348defa3 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -41,19 +41,18 @@ settings = {
     'chat_default_extensions': ["gallery"],
     'presets': {
         'default': 'NovelAI-Sphinx Moth',
-        'pygmalion-*': 'Pygmalion',
-        'RWKV-*': 'Naive',
+        '.*pygmalion': 'Pygmalion',
+        '.*RWKV': 'Naive',
     },
     'prompts': {
-        'default': 'Common sense questions and answers\n\nQuestion: \nFactual answer:',
-        '^(gpt4chan|gpt-4chan|4chan)': '-----\n--- 865467536\nInput text\n--- 865467537\n',
-        '(rosey|chip|joi)_.*_instruct.*': 'User: \n',
-        'oasst-*': '<|prompter|>Write a story about future of AI development<|endoftext|><|assistant|>',
-        'alpaca-*': "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction:\nWrite a poem about the transformers Python library. \nMention the word \"large language models\" in that poem.\n### Response:\n",
+        'default': 'QA',
+        '.*(gpt4chan|gpt-4chan|4chan)': 'GPT-4chan',
+        '.*oasst': 'Open Assistant',
+        '.*alpaca': "Alpaca",
     },
     'lora_prompts': {
-        'default': 'Common sense questions and answers\n\nQuestion: \nFactual answer:',
-        '(alpaca-lora-7b|alpaca-lora-13b|alpaca-lora-30b)': "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction:\nWrite a poem about the transformers Python library. \nMention the word \"large language models\" in that poem.\n### Response:\n"
+        'default': 'QA',
+        '.*(alpaca-lora-7b|alpaca-lora-13b|alpaca-lora-30b)': "Alpaca",
     }
 }
 
diff --git a/prompts/GPT-4chan.txt b/prompts/GPT-4chan.txt
new file mode 100644
index 00000000..1bc8c7f4
--- /dev/null
+++ b/prompts/GPT-4chan.txt
@@ -0,0 +1,6 @@
+-----
+--- 865467536
+Hello, AI frens!
+How are you doing on this fine day?
+--- 865467537
+
diff --git a/server.py b/server.py
index 62a7ebfb..50af759e 100644
--- a/server.py
+++ b/server.py
@@ -73,9 +73,7 @@ def load_model_wrapper(selected_model):
 
 def load_lora_wrapper(selected_lora):
     add_lora_to_model(selected_lora)
-    default_text = shared.settings['lora_prompts'][next((k for k in shared.settings['lora_prompts'] if re.match(k.lower(), shared.lora_name.lower())), 'default')]
-
-    return selected_lora, default_text
+    return selected_lora
 
 def load_preset_values(preset_menu, return_dict=False):
     generate_params = {
@@ -141,7 +139,10 @@ def load_prompt(fname):
         return ''
     else:
         with open(Path(f'prompts/{fname}.txt'), 'r', encoding='utf-8') as f:
-            return f.read()
+            text = f.read()
+            if text[-1] == '\n':
+                text = text[:-1]
+            return text
         
 def create_prompt_menus():
     with gr.Row():
@@ -212,7 +213,7 @@ def create_settings_menus(default_preset):
 
     shared.gradio['model_menu'].change(load_model_wrapper, [shared.gradio['model_menu']], [shared.gradio['model_menu']], show_progress=True)
     shared.gradio['preset_menu'].change(load_preset_values, [shared.gradio['preset_menu']], [shared.gradio[k] for k in ['do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']])
-    shared.gradio['lora_menu'].change(load_lora_wrapper, [shared.gradio['lora_menu']], [shared.gradio['lora_menu'], shared.gradio['textbox']], show_progress=True)
+    shared.gradio['lora_menu'].change(load_lora_wrapper, [shared.gradio['lora_menu']], [shared.gradio['lora_menu']], show_progress=True)
     shared.gradio['softprompts_menu'].change(load_soft_prompt, [shared.gradio['softprompts_menu']], [shared.gradio['softprompts_menu']], show_progress=True)
     shared.gradio['upload_softprompt'].upload(upload_soft_prompt, [shared.gradio['upload_softprompt']], [shared.gradio['softprompts_menu']])
 
@@ -277,11 +278,10 @@ if shared.args.lora:
 # Default UI settings
 default_preset = shared.settings['presets'][next((k for k in shared.settings['presets'] if re.match(k.lower(), shared.model_name.lower())), 'default')]
 if shared.lora_name != "None":
-    default_text = shared.settings['lora_prompts'][next((k for k in shared.settings['lora_prompts'] if re.match(k.lower(), shared.lora_name.lower())), 'default')]
+    default_text = load_prompt(shared.settings['lora_prompts'][next((k for k in shared.settings['lora_prompts'] if re.match(k.lower(), shared.lora_name.lower())), 'default')])
 else:
-    default_text = shared.settings['prompts'][next((k for k in shared.settings['prompts'] if re.match(k.lower(), shared.model_name.lower())), 'default')]
+    default_text = load_prompt(shared.settings['prompts'][next((k for k in shared.settings['prompts'] if re.match(k.lower(), shared.model_name.lower())), 'default')])
 title ='Text generation web UI'
-description = '\n\n# Text generation lab\nGenerate text using Large Language Models.\n'
 
 def create_interface():
 
diff --git a/settings-template.json b/settings-template.json
index 2a2aaed9..da767cda 100644
--- a/settings-template.json
+++ b/settings-template.json
@@ -18,18 +18,17 @@
     ],
     "presets": {
         "default": "NovelAI-Sphinx Moth",
-        "pygmalion-*": "Pygmalion",
-        "RWKV-*": "Naive"
+        ".*pygmalion": "Pygmalion",
+        ".*RWKV": "Naive"
     },
     "prompts": {
-        "default": "Common sense questions and answers\n\nQuestion: \nFactual answer:",
-        "^(gpt4chan|gpt-4chan|4chan)": "-----\n--- 865467536\nInput text\n--- 865467537\n",
-        "(rosey|chip|joi)_.*_instruct.*": "User: \n",
-        "oasst-*": "<|prompter|>Write a story about future of AI development<|endoftext|><|assistant|>",
-        "alpaca-*": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction:\nWrite a poem about the transformers Python library. \nMention the word \"large language models\" in that poem.\n### Response:\n"
+        "default": "QA",
+        ".*(gpt4chan|gpt-4chan|4chan)": "GPT-4chan",
+        ".*oasst": "Open Assistant",
+        ".*alpaca": "Alpaca"
     },
     "lora_prompts": {
-        "default": "Common sense questions and answers\n\nQuestion: \nFactual answer:",
-        "(alpaca-lora-7b|alpaca-lora-13b|alpaca-lora-30b)": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction:\nWrite a poem about the transformers Python library. \nMention the word \"large language models\" in that poem.\n### Response:\n"
+        "default": "QA",
+        ".*(alpaca-lora-7b|alpaca-lora-13b|alpaca-lora-30b)": "Alpaca"
     }
 }

From a21e5807821a8aee7ecddfdbcc9f8e3bde8c83a3 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 29 Mar 2023 22:50:58 -0300
Subject: [PATCH 77/80] Move an import

---
 modules/LoRA.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/modules/LoRA.py b/modules/LoRA.py
index 1d36c333..8c30e609 100644
--- a/modules/LoRA.py
+++ b/modules/LoRA.py
@@ -1,6 +1,7 @@
 from pathlib import Path
 
 import torch
+from peft import PeftModel
 
 import modules.shared as shared
 from modules.models import load_model
@@ -14,8 +15,6 @@ def reload_model():
 
 def add_lora_to_model(lora_name):
 
-    from peft import PeftModel
-
     # If a LoRA had been previously loaded, or if we want
     # to unload a LoRA, reload the model
     if shared.lora_name not in ['None', ''] or lora_name in ['None', '']:

From 131753fcf5e3c3b22a8e2e0ac67fb44c3e1dfd4e Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 29 Mar 2023 23:28:16 -0300
Subject: [PATCH 78/80] Save the sha256sum of downloaded models

---
 download-model.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/download-model.py b/download-model.py
index 6f0751d8..7e5f61b2 100644
--- a/download-model.py
+++ b/download-model.py
@@ -93,6 +93,7 @@ def get_download_links_from_huggingface(model, branch):
     cursor = b""
 
     links = []
+    sha256 = []
     classifications = []
     has_pytorch = False
     has_pt = False
@@ -117,6 +118,8 @@ def get_download_links_from_huggingface(model, branch):
             is_text = re.match(".*\.(txt|json|py|md)", fname) or is_tokenizer
 
             if any((is_pytorch, is_safetensors, is_pt, is_tokenizer, is_text)):
+                if 'lfs' in dict[i]:
+                    sha256.append([fname, dict[i]['lfs']['oid']])
                 if is_text:
                     links.append(f"https://huggingface.co/{model}/resolve/{branch}/{fname}")
                     classifications.append('text')
@@ -143,7 +146,7 @@ def get_download_links_from_huggingface(model, branch):
             if classifications[i] in ['pytorch', 'pt']:
                 links.pop(i)
 
-    return links, is_lora
+    return links, sha256, is_lora
 
 def download_files(file_list, output_folder, num_threads=8):
     thread_map(lambda url: get_file(url, output_folder), file_list, max_workers=num_threads)
@@ -166,7 +169,7 @@ if __name__ == '__main__':
                 print(f"Error: {err_branch}")
                 sys.exit()
 
-    links, is_lora = get_download_links_from_huggingface(model, branch)
+    links, sha256, is_lora = get_download_links_from_huggingface(model, branch)
 
     if args.output is not None:
         base_folder = args.output
@@ -185,6 +188,11 @@ if __name__ == '__main__':
         f.write(f'url: https://huggingface.co/{model}\n')
         f.write(f'branch: {branch}\n')
         f.write(f'download date: {str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))}\n')
+        sha256_str = ''
+        for i in range(len(sha256)):
+            sha256_str += f'    {sha256[i][1]} {sha256[i][0]}\n'
+        if sha256_str != '':
+            f.write(f'sha256sum:\n{sha256_str}')
 
     # Downloading the files
     print(f"Downloading the model to {output_folder}")

From bd65940a48c8cb25dfdda8f64aa3bfc3bdf1b10b Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 30 Mar 2023 00:43:49 -0300
Subject: [PATCH 79/80] Increase --chat box height

---
 css/chat.css | 3 +++
 server.py    | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/css/chat.css b/css/chat.css
index 1e703530..dee76beb 100644
--- a/css/chat.css
+++ b/css/chat.css
@@ -29,3 +29,6 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
   padding: 0;
 }
 
+#gradio-chatbot {
+  height: 66.67vh;
+}
diff --git a/server.py b/server.py
index 50af759e..27223f84 100644
--- a/server.py
+++ b/server.py
@@ -295,7 +295,7 @@ def create_interface():
                 if shared.args.cai_chat:
                     shared.gradio['display'] = gr.HTML(value=generate_chat_html(shared.history['visible'], shared.settings['name1'], shared.settings['name2'], shared.character))
                 else:
-                    shared.gradio['display'] = gr.Chatbot(value=shared.history['visible']).style(color_map=("#326efd", "#212528"))
+                    shared.gradio['display'] = gr.Chatbot(value=shared.history['visible'], elem_id="gradio-chatbot")
                 shared.gradio['textbox'] = gr.Textbox(label='Input')
                 with gr.Row():
                     shared.gradio['Generate'] = gr.Button('Generate')

From f0fdab08d31d4c00e2e15c5871413ce847ca842b Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 30 Mar 2023 01:02:11 -0300
Subject: [PATCH 80/80] Increase --chat height

---
 css/chat.css | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/css/chat.css b/css/chat.css
index dee76beb..c8a9d70a 100644
--- a/css/chat.css
+++ b/css/chat.css
@@ -32,3 +32,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
 #gradio-chatbot {
   height: 66.67vh;
 }
+
+.wrap.svelte-6roggh.svelte-6roggh {
+  max-height: 92.5%;
+}