From a6d03730639463eb261b40ec5dad380f5df791ed Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 29 Mar 2023 11:48:17 -0300 Subject: [PATCH] Fix training dataset loading #636 --- modules/training.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/modules/training.py b/modules/training.py index 7bcecb38..913866d9 100644 --- a/modules/training.py +++ b/modules/training.py @@ -119,7 +119,7 @@ def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int } # == Prep the dataset, format, etc == - if raw_text_file is not None: + if raw_text_file not in ['None', '']: print("Loading raw text file dataset...") with open(clean_path('training/datasets', f'{raw_text_file}.txt'), 'r') as file: raw_text = file.read() @@ -136,16 +136,17 @@ def do_train(lora_name: str, micro_batch_size: int, batch_size: int, epochs: int del text_chunks else: - with open(clean_path('training/formats', f'{format}.json'), 'r') as formatFile: - format_data: dict[str, str] = json.load(formatFile) - - if dataset is None: + if dataset in ['None', '']: yield "**Missing dataset choice input, cannot continue.**" return - if format is None: + + if format in ['None', '']: yield "**Missing format choice input, cannot continue.**" return + with open(clean_path('training/formats', f'{format}.json'), 'r') as formatFile: + format_data: dict[str, str] = json.load(formatFile) + def generate_prompt(data_point: dict[str, str]): for options, data in format_data.items(): if set(options.split(',')) == set(x[0] for x in data_point.items() if len(x[1].strip()) > 0):