Fix regex bug in loading character jsons with special characters

This commit is contained in:
oobabooga 2023-02-20 19:38:19 -03:00
parent f867285e3d
commit 444cd69c67

View File

@ -733,7 +733,7 @@ def tokenize_dialogue(dialogue, name1, name2):
dialogue = re.sub('<start>', '', dialogue) dialogue = re.sub('<start>', '', dialogue)
dialogue = re.sub('(\n|^)[Aa]non:', '\\1You:', dialogue) dialogue = re.sub('(\n|^)[Aa]non:', '\\1You:', dialogue)
dialogue = re.sub('(\n|^)\[CHARACTER\]:', f'\\g<1>{name2}:', dialogue) dialogue = re.sub('(\n|^)\[CHARACTER\]:', f'\\g<1>{name2}:', dialogue)
idx = [m.start() for m in re.finditer(f"(^|\n)({name1}|{name2}):", dialogue)] idx = [m.start() for m in re.finditer(f"(^|\n)({re.escape(name1)}|{re.escape(name2)}):", dialogue)]
if len(idx) == 0: if len(idx) == 0:
return _history return _history