automatically strip empty data entries (for better alpaca dataset compat)

This commit is contained in:
Alex "mcmonkey" Goodwin 2023-03-25 12:28:46 -07:00
parent 566898a79a
commit 7bf601107c

View File

@ -74,7 +74,7 @@ def do_train(loraName: str, microBatchSize: int, batchSize: int, epochs: int, le
} }
def generate_prompt(data_point: dict[str, str]): def generate_prompt(data_point: dict[str, str]):
for options, data in formatData.items(): for options, data in formatData.items():
if set(options.split(',')) == set(data_point.keys()): if set(options.split(',')) == set(x[0] for x in data_point.items() if len(x[1].strip()) > 0):
for key, val in data_point.items(): for key, val in data_point.items():
data = data.replace(f'%{key}%', val) data = data.replace(f'%{key}%', val)
return data return data