From b705b4210c273f5b1bf383aabf8af26f09da5c8c Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 16 Apr 2023 03:08:37 -0300
Subject: [PATCH] Minor changes to training.py

---
 modules/training.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/modules/training.py b/modules/training.py
index 409d7102..3546d9ea 100644
--- a/modules/training.py
+++ b/modules/training.py
@@ -68,6 +68,7 @@ def create_train_interface():
         # TODO: What is the actual maximum rank? Likely distinct per model. This might be better to somehow be on a log scale.
         lora_rank = gr.Slider(label='LoRA Rank', value=32, minimum=0, maximum=1024, step=4, info='LoRA Rank, or dimension count. Higher values produce a larger file with better control over the model\'s content. Smaller values produce a smaller file with less overall control. Small values like 4 or 8 are great for stylistic guidance, higher values like 128 or 256 are good for teaching content upgrades, extremely high values (1024+) are difficult to train but may improve fine-detail learning for large datasets. Higher ranks also require higher VRAM.')
         lora_alpha = gr.Slider(label='LoRA Alpha', value=64, minimum=0, maximum=2048, step=4, info='LoRA Alpha. This divided by the rank becomes the scaling of the LoRA. Higher means stronger. A good standard value is twice your Rank.')
+
         # TODO: Better explain what this does, in terms of real world effect especially.
         lora_dropout = gr.Slider(label='LoRA Dropout', minimum=0.0, maximum=1.0, step=0.025, value=0.05, info='Percentage probability for dropout of LoRA layers. This can help reduce overfitting. Most users should leave at default.')
         cutoff_len = gr.Slider(label='Cutoff Length', minimum=0, maximum=2048, value=256, step=32, info='Cutoff length for text input. Essentially, how long of a line of text to feed in at a time. Higher values require drastically more VRAM.')
@@ -100,9 +101,6 @@ def create_train_interface():
             stop_button = gr.Button("Interrupt")
 
         output = gr.Markdown(value="Ready")
-        all_params = [lora_name, always_override, save_steps, micro_batch_size, batch_size, epochs, learning_rate, lora_rank, lora_alpha, lora_dropout, cutoff_len, dataset, eval_dataset, format, eval_steps, raw_text_file, overlap_len, newline_favor_len, do_shuffle, higher_rank_limit]
-        start_button.click(do_train, all_params, [output])
-        stop_button.click(do_interrupt, [], [], cancels=[], queue=False)
 
         def do_copy_params(lora_name: str):
             with open(f"{shared.args.lora_dir}/{clean_path(None, lora_name)}/training_parameters.json", 'r', encoding='utf-8') as formatFile:
@@ -110,12 +108,14 @@ def create_train_interface():
 
             return [params[x] for x in PARAMETERS]
 
-        copy_from.change(do_copy_params, [copy_from], all_params)
-
         def change_rank_limit(use_higher_ranks: bool):
             mult = 2 if use_higher_ranks else 1
             return {"maximum": 1024 * mult, "__type__": "update"}, {"maximum": 2048 * mult, "__type__": "update"}
 
+        all_params = [lora_name, always_override, save_steps, micro_batch_size, batch_size, epochs, learning_rate, lora_rank, lora_alpha, lora_dropout, cutoff_len, dataset, eval_dataset, format, eval_steps, raw_text_file, overlap_len, newline_favor_len, do_shuffle, higher_rank_limit]
+        copy_from.change(do_copy_params, copy_from, all_params)
+        start_button.click(do_train, all_params, output)
+        stop_button.click(do_interrupt, None, None, queue=False)
         higher_rank_limit.change(change_rank_limit, [higher_rank_limit], [lora_rank, lora_alpha])
 
 
@@ -144,8 +144,8 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
     yield "Prepping..."
     lora_file_path = f"{shared.args.lora_dir}/{clean_path(None, lora_name)}"
     actual_lr = float(learning_rate)
-
     model_type = type(shared.model).__name__
+
     if model_type in MODEL_CLASSES:
         model_id = MODEL_CLASSES[model_type]
     else: