mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-11-29 19:09:32 +01:00
Add Tensorboard/Weights and biases integration for training (#2624)
This commit is contained in:
parent
5d513eea22
commit
3f19e94c93
@ -1,7 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
os.environ["WANDB_MODE"] = "offline"
|
os.environ["WANDB_MODE"] = "offline"
|
||||||
os.environ["WANDB_DISABLED"] = "true"
|
# os.environ["WANDB_DISABLED"] = "true"
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import math
|
import math
|
||||||
@ -60,7 +60,7 @@ train_log = {}
|
|||||||
train_template = {}
|
train_template = {}
|
||||||
|
|
||||||
WANT_INTERRUPT = False
|
WANT_INTERRUPT = False
|
||||||
PARAMETERS = ["lora_name", "always_override", "save_steps", "micro_batch_size", "batch_size", "epochs", "learning_rate", "lr_scheduler_type", "lora_rank", "lora_alpha", "lora_dropout", "cutoff_len", "dataset", "eval_dataset", "format", "eval_steps", "raw_text_file", "overlap_len", "newline_favor_len", "higher_rank_limit", "warmup_steps", "optimizer", "hard_cut_string", "train_only_after", "stop_at_loss"]
|
PARAMETERS = ["lora_name", "always_override", "save_steps", "micro_batch_size", "batch_size", "epochs", "learning_rate", "lr_scheduler_type", "lora_rank", "lora_alpha", "lora_dropout", "cutoff_len", "dataset", "eval_dataset", "format", "eval_steps", "raw_text_file", "overlap_len", "newline_favor_len", "higher_rank_limit", "warmup_steps", "optimizer", "hard_cut_string", "train_only_after", "stop_at_loss", "report_to"]
|
||||||
|
|
||||||
|
|
||||||
def create_train_interface():
|
def create_train_interface():
|
||||||
@ -122,6 +122,8 @@ def create_train_interface():
|
|||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
higher_rank_limit = gr.Checkbox(label='Enable higher ranks', value=False, info='If checked, changes Rank/Alpha slider above to go much higher. This will not work without a datacenter-class GPU.')
|
higher_rank_limit = gr.Checkbox(label='Enable higher ranks', value=False, info='If checked, changes Rank/Alpha slider above to go much higher. This will not work without a datacenter-class GPU.')
|
||||||
|
with gr.Row():
|
||||||
|
report_to = gr.Radio(label="Save detailed logs with", value="None", choices=["None", "wandb", "tensorboard"], interactive=True)
|
||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
start_button = gr.Button("Start LoRA Training")
|
start_button = gr.Button("Start LoRA Training")
|
||||||
@ -152,7 +154,8 @@ def create_train_interface():
|
|||||||
refresh_table = gr.Button('Refresh the table', elem_classes="small-button")
|
refresh_table = gr.Button('Refresh the table', elem_classes="small-button")
|
||||||
|
|
||||||
# Training events
|
# Training events
|
||||||
all_params = [lora_name, always_override, save_steps, micro_batch_size, batch_size, epochs, learning_rate, lr_scheduler_type, lora_rank, lora_alpha, lora_dropout, cutoff_len, dataset, eval_dataset, format, eval_steps, raw_text_file, overlap_len, newline_favor_len, higher_rank_limit, warmup_steps, optimizer, hard_cut_string, train_only_after, stop_at_loss]
|
all_params = [lora_name, always_override, save_steps, micro_batch_size, batch_size, epochs, learning_rate, lr_scheduler_type, lora_rank, lora_alpha, lora_dropout, cutoff_len, dataset, eval_dataset, format, eval_steps, raw_text_file, overlap_len, newline_favor_len, higher_rank_limit, warmup_steps, optimizer, hard_cut_string, train_only_after, stop_at_loss, report_to]
|
||||||
|
|
||||||
copy_from.change(do_copy_params, [copy_from] + all_params, all_params)
|
copy_from.change(do_copy_params, [copy_from] + all_params, all_params)
|
||||||
start_button.click(do_train, all_params, output)
|
start_button.click(do_train, all_params, output)
|
||||||
stop_button.click(do_interrupt, None, None, queue=False)
|
stop_button.click(do_interrupt, None, None, queue=False)
|
||||||
@ -261,7 +264,7 @@ def calc_trainable_parameters(model):
|
|||||||
return trainable_params, all_param
|
return trainable_params, all_param
|
||||||
|
|
||||||
|
|
||||||
def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch_size: int, batch_size: int, epochs: int, learning_rate: str, lr_scheduler_type: str, lora_rank: int, lora_alpha: int, lora_dropout: float, cutoff_len: int, dataset: str, eval_dataset: str, format: str, eval_steps: int, raw_text_file: str, overlap_len: int, newline_favor_len: int, higher_rank_limit: bool, warmup_steps: int, optimizer: str, hard_cut_string: str, train_only_after: str, stop_at_loss: float):
|
def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch_size: int, batch_size: int, epochs: int, learning_rate: str, lr_scheduler_type: str, lora_rank: int, lora_alpha: int, lora_dropout: float, cutoff_len: int, dataset: str, eval_dataset: str, format: str, eval_steps: int, raw_text_file: str, overlap_len: int, newline_favor_len: int, higher_rank_limit: bool, warmup_steps: int, optimizer: str, hard_cut_string: str, train_only_after: str, stop_at_loss: float, report_to: str):
|
||||||
|
|
||||||
if shared.args.monkey_patch:
|
if shared.args.monkey_patch:
|
||||||
from monkeypatch.peft_tuners_lora_monkey_patch import (
|
from monkeypatch.peft_tuners_lora_monkey_patch import (
|
||||||
@ -534,7 +537,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
|
|||||||
train_dataset=train_data,
|
train_dataset=train_data,
|
||||||
eval_dataset=eval_data,
|
eval_dataset=eval_data,
|
||||||
args=transformers.TrainingArguments(
|
args=transformers.TrainingArguments(
|
||||||
report_to=None,
|
report_to=report_to if report_to != "None" else None,
|
||||||
per_device_train_batch_size=micro_batch_size,
|
per_device_train_batch_size=micro_batch_size,
|
||||||
gradient_accumulation_steps=gradient_accumulation_steps,
|
gradient_accumulation_steps=gradient_accumulation_steps,
|
||||||
warmup_steps=math.ceil(warmup_steps / gradient_accumulation_steps),
|
warmup_steps=math.ceil(warmup_steps / gradient_accumulation_steps),
|
||||||
@ -551,7 +554,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
|
|||||||
load_best_model_at_end=eval_data is not None,
|
load_best_model_at_end=eval_data is not None,
|
||||||
# TODO: Enable multi-device support
|
# TODO: Enable multi-device support
|
||||||
ddp_find_unused_parameters=None,
|
ddp_find_unused_parameters=None,
|
||||||
no_cuda=shared.args.cpu
|
no_cuda=shared.args.cpu,
|
||||||
),
|
),
|
||||||
data_collator=transformers.DataCollatorForLanguageModeling(shared.tokenizer, mlm=False),
|
data_collator=transformers.DataCollatorForLanguageModeling(shared.tokenizer, mlm=False),
|
||||||
callbacks=list([Callbacks()])
|
callbacks=list([Callbacks()])
|
||||||
|
@ -16,6 +16,8 @@ safetensors==0.3.1
|
|||||||
sentencepiece
|
sentencepiece
|
||||||
tqdm
|
tqdm
|
||||||
scipy
|
scipy
|
||||||
|
tensorboard
|
||||||
|
wandb
|
||||||
transformers==4.30.2
|
transformers==4.30.2
|
||||||
git+https://github.com/huggingface/peft@03eb378eb914fbee709ff7c86ba5b1d033b89524
|
git+https://github.com/huggingface/peft@03eb378eb914fbee709ff7c86ba5b1d033b89524
|
||||||
bitsandbytes==0.40.0; platform_system != "Windows"
|
bitsandbytes==0.40.0; platform_system != "Windows"
|
||||||
|
Loading…
Reference in New Issue
Block a user