mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2025-01-23 01:59:21 +01:00
LoRA trainer improvements part 5 (#1546)
* full dynamic model type support on modern peft * remove shuffle option
This commit is contained in:
parent
65beb51b0b
commit
312cb7dda6
@ -23,21 +23,22 @@ try:
|
||||
from peft.utils.other import \
|
||||
TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING as \
|
||||
model_to_lora_modules
|
||||
from transformers.models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
|
||||
MODEL_CLASSES = {v: k for k, v in MODEL_FOR_CAUSAL_LM_MAPPING_NAMES}
|
||||
except:
|
||||
standard_modules = ["q_proj", "v_proj"]
|
||||
model_to_lora_modules = {"llama": standard_modules, "opt": standard_modules, "gptj": standard_modules, "gpt_neox": ["query_key_value"]}
|
||||
MODEL_CLASSES = {
|
||||
"LlamaForCausalLM": "llama",
|
||||
"OPTForCausalLM": "opt",
|
||||
"GPTJForCausalLM": "gptj",
|
||||
"GPTNeoXForCausalLM": "gpt_neox"
|
||||
}
|
||||
|
||||
WANT_INTERRUPT = False
|
||||
|
||||
PARAMETERS = ["lora_name", "always_override", "save_steps", "micro_batch_size", "batch_size", "epochs", "learning_rate", "lr_scheduler_type", "lora_rank", "lora_alpha", "lora_dropout", "cutoff_len", "dataset", "eval_dataset", "format", "eval_steps", "raw_text_file", "overlap_len", "newline_favor_len", "do_shuffle", "higher_rank_limit", "warmup_steps", "optimizer"]
|
||||
PARAMETERS = ["lora_name", "always_override", "save_steps", "micro_batch_size", "batch_size", "epochs", "learning_rate", "lr_scheduler_type", "lora_rank", "lora_alpha", "lora_dropout", "cutoff_len", "dataset", "eval_dataset", "format", "eval_steps", "raw_text_file", "overlap_len", "newline_favor_len", "higher_rank_limit", "warmup_steps", "optimizer"]
|
||||
|
||||
# Mapping of Python class names to peft IDs
|
||||
MODEL_CLASSES = {
|
||||
"LlamaForCausalLM": "llama",
|
||||
"OPTForCausalLM": "opt",
|
||||
"GPTJForCausalLM": "gptj",
|
||||
"GPTNeoXForCausalLM": "gpt_neox"
|
||||
}
|
||||
|
||||
|
||||
def get_datasets(path: str, ext: str):
|
||||
@ -99,7 +100,6 @@ def create_train_interface():
|
||||
optimizer = gr.Dropdown(label='Optimizer', value='adamw_torch', choices=['adamw_hf', 'adamw_torch', 'adamw_torch_fused', 'adamw_torch_xla', 'adamw_apex_fused', 'adafactor', 'adamw_bnb_8bit', 'adamw_anyprecision', 'sgd', 'adagrad'], info='Different optimizer implementation options, for advanced users. Effects of different options are not well documented yet.')
|
||||
|
||||
with gr.Row():
|
||||
do_shuffle = gr.Checkbox(label='Shuffle Dataset', value=True, info='If checked, the dataset will be randomly shuffled. This can help reduce overfitting.')
|
||||
higher_rank_limit = gr.Checkbox(label='Enable higher ranks', value=False, info='If checked, changes Rank/Alpha slider above to go much higher. This will not work without a datacenter-class GPU.')
|
||||
|
||||
with gr.Row():
|
||||
@ -129,7 +129,7 @@ def create_train_interface():
|
||||
save_comments = gr.Button('Save comments')
|
||||
|
||||
# Training events
|
||||
all_params = [lora_name, always_override, save_steps, micro_batch_size, batch_size, epochs, learning_rate, lr_scheduler_type, lora_rank, lora_alpha, lora_dropout, cutoff_len, dataset, eval_dataset, format, eval_steps, raw_text_file, overlap_len, newline_favor_len, do_shuffle, higher_rank_limit, warmup_steps, optimizer]
|
||||
all_params = [lora_name, always_override, save_steps, micro_batch_size, batch_size, epochs, learning_rate, lr_scheduler_type, lora_rank, lora_alpha, lora_dropout, cutoff_len, dataset, eval_dataset, format, eval_steps, raw_text_file, overlap_len, newline_favor_len, higher_rank_limit, warmup_steps, optimizer]
|
||||
copy_from.change(do_copy_params, [copy_from] + all_params, all_params)
|
||||
start_button.click(do_train, all_params, output)
|
||||
stop_button.click(do_interrupt, None, None, queue=False)
|
||||
@ -192,7 +192,7 @@ def clean_path(base_path: str, path: str):
|
||||
return f'{Path(base_path).absolute()}/{path}'
|
||||
|
||||
|
||||
def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch_size: int, batch_size: int, epochs: int, learning_rate: str, lr_scheduler_type: str, lora_rank: int, lora_alpha: int, lora_dropout: float, cutoff_len: int, dataset: str, eval_dataset: str, format: str, eval_steps: int, raw_text_file: str, overlap_len: int, newline_favor_len: int, do_shuffle: bool, higher_rank_limit: bool, warmup_steps: int, optimizer: str):
|
||||
def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch_size: int, batch_size: int, epochs: int, learning_rate: str, lr_scheduler_type: str, lora_rank: int, lora_alpha: int, lora_dropout: float, cutoff_len: int, dataset: str, eval_dataset: str, format: str, eval_steps: int, raw_text_file: str, overlap_len: int, newline_favor_len: int, higher_rank_limit: bool, warmup_steps: int, optimizer: str):
|
||||
|
||||
if shared.args.monkey_patch:
|
||||
from monkeypatch.peft_tuners_lora_monkey_patch import \
|
||||
@ -308,11 +308,6 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
|
||||
else:
|
||||
eval_data = load_dataset("json", data_files=clean_path('training/datasets', f'{eval_dataset}.json'))
|
||||
eval_data = eval_data['train'].map(generate_and_tokenize_prompt)
|
||||
if do_shuffle:
|
||||
eval_data = eval_data.shuffle()
|
||||
|
||||
if do_shuffle:
|
||||
train_data = train_data.shuffle()
|
||||
|
||||
# == Start prepping the model itself ==
|
||||
if not hasattr(shared.model, 'lm_head') or hasattr(shared.model.lm_head, 'weight'):
|
||||
|
Loading…
Reference in New Issue
Block a user