From f1ba2196b1a640bd094623120486b847ca59ccf5 Mon Sep 17 00:00:00 2001 From: "Alex \"mcmonkey\" Goodwin" Date: Sat, 25 Mar 2023 12:57:36 -0700 Subject: [PATCH] make 'model' variables less ambiguous --- modules/training.py | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/modules/training.py b/modules/training.py index 250093a0..f9f0790f 100644 --- a/modules/training.py +++ b/modules/training.py @@ -59,15 +59,13 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le return "**Missing format choice input, cannot continue.**" gradientAccumulationSteps = batchSize // microBatchSize actualLR = float(learningRate) - model = shared.model - tokenizer = shared.tokenizer - tokenizer.pad_token = 0 - tokenizer.padding_side = "left" + shared.tokenizer.pad_token = 0 + shared.tokenizer.padding_side = "left" # Prep the dataset, format, etc with open(cleanPath('training/formats', f'{format}.json'), 'r') as formatFile: formatData: dict[str, str] = json.load(formatFile) def tokenize(prompt): - result = tokenizer(prompt, truncation=True, max_length=cutoffLen + 1, padding="max_length") + result = shared.tokenizer(prompt, truncation=True, max_length=cutoffLen + 1, padding="max_length") return { "input_ids": result["input_ids"][:-1], "attention_mask": result["attention_mask"][:-1], @@ -90,8 +88,8 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le evalData = load_dataset("json", data_files=cleanPath('training/datasets', f'{evalDataset}.json')) evalData = evalData['train'].shuffle().map(generate_and_tokenize_prompt) # Start prepping the model itself - if not hasattr(model, 'lm_head') or hasattr(model.lm_head, 'weight'): - model = prepare_model_for_int8_training(model) + if not hasattr(shared.model, 'lm_head') or hasattr(shared.model.lm_head, 'weight'): + prepare_model_for_int8_training(shared.model) config = LoraConfig( r=loraRank, lora_alpha=loraAlpha, @@ -101,9 +99,9 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le bias="none", task_type="CAUSAL_LM" ) - model = get_peft_model(model, config) + loraModel = get_peft_model(shared.model, config) trainer = transformers.Trainer( - model=model, + model=loraModel, train_dataset=train_data, eval_dataset=evalData, args=transformers.TrainingArguments( @@ -125,16 +123,16 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le # TODO: Enable multi-device support ddp_find_unused_parameters=None, ), - data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False), + data_collator=transformers.DataCollatorForLanguageModeling(shared.tokenizer, mlm=False), ) - model.config.use_cache = False - old_state_dict = model.state_dict - model.state_dict = ( + loraModel.config.use_cache = False + old_state_dict = loraModel.state_dict + loraModel.state_dict = ( lambda self, *_, **__: get_peft_model_state_dict(self, old_state_dict()) - ).__get__(model, type(model)) + ).__get__(loraModel, type(loraModel)) if torch.__version__ >= "2" and sys.platform != "win32": - model = torch.compile(model) + loraModel = torch.compile(loraModel) # Actually start and run and save at the end trainer.train() - model.save_pretrained(loraName) + loraModel.save_pretrained(loraName) return "Done!"