Fix missing initial space for LlamaTokenizer

This commit is contained in:
oobabooga 2023-04-25 22:47:23 -03:00
parent 92cdb4f22b
commit 15940e762e

View File

@ -69,6 +69,11 @@ def get_reply_from_output_ids(output_ids, input_ids, original_question, state):
else: else:
new_tokens = len(output_ids) - len(input_ids[0]) new_tokens = len(output_ids) - len(input_ids[0])
reply = decode(output_ids[-new_tokens:], state['skip_special_tokens']) reply = decode(output_ids[-new_tokens:], state['skip_special_tokens'])
if type(shared.tokenizer) is transformers.LlamaTokenizer:
if len(original_question) > 0 and original_question[-1] not in [' ', '\n']:
reply = ' ' + reply
if not shared.is_chat(): if not shared.is_chat():
reply = original_question + apply_extensions('output', reply) reply = original_question + apply_extensions('output', reply)