Fix a regex issue in tokenize_dialogue.

The existing regex would fail if using character names that start with
numbers, for example: 9S or 2B.
This commit is contained in:
Martin J 2023-02-05 07:42:57 +01:00
parent 2fe235738e
commit 06a4664805

View File

@ -532,7 +532,7 @@ if args.chat or args.cai_chat:
dialogue = re.sub('<START>', '', dialogue)
dialogue = re.sub('<start>', '', dialogue)
dialogue = re.sub('(\n|^)[Aa]non:', '\\1You:', dialogue)
dialogue = re.sub('(\n|^)\[CHARACTER\]:', f'\\1{name2}:', dialogue)
dialogue = re.sub('(\n|^)\[CHARACTER\]:', f'\\g<1>{name2}:', dialogue)
idx = [m.start() for m in re.finditer(f"(^|\n)({name1}|{name2}):", dialogue)]
if len(idx) == 0:
return _history