Add back .replace('\r', '')

This commit is contained in:
oobabooga 2023-07-12 09:52:20 -07:00
parent 987d0fe023
commit 30f37530d5

View File

@ -369,12 +369,12 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
for file_path in file_paths: for file_path in file_paths:
if file_path.is_file(): if file_path.is_file():
with file_path.open('r', encoding='utf-8') as file: with file_path.open('r', encoding='utf-8') as file:
raw_text += file.read() raw_text += file.read().replace('\r', '')
logger.info(f"Loaded training file: {file_path.name}") logger.info(f"Loaded training file: {file_path.name}")
else: else:
with open(clean_path('training/datasets', f'{raw_text_file}.txt'), 'r', encoding='utf-8') as file: with open(clean_path('training/datasets', f'{raw_text_file}.txt'), 'r', encoding='utf-8') as file:
raw_text = file.read() raw_text = file.read().replace('\r', '')
cut_string = hard_cut_string.replace('\\n', '\n') cut_string = hard_cut_string.replace('\\n', '\n')
out_tokens = [] out_tokens = []