Fix missing spaces tokenizer issue (closes #4834)

2024-11-22 08:07:56 +01:00 · 2023-12-08 05:16:23 -08:00 · 2023-12-08 05:16:23 -08:00 · 181743fd97
commit 181743fd97
parent 00aedf9209
1 changed files with 2 additions and 3 deletions
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@ -265,9 +265,8 @@ def apply_stopping_strings(reply, all_stop_strings):
 def get_reply_from_output_ids(output_ids, state, starting_from=0):
    reply = decode(output_ids[starting_from:], state['skip_special_tokens'])
-    if type(shared.tokenizer) in [transformers.LlamaTokenizer, transformers.LlamaTokenizerFast] and len(output_ids) > starting_from:
+    if hasattr(shared.tokenizer, 'convert_ids_to_tokens') and len(output_ids) > starting_from and shared.tokenizer.convert_ids_to_tokens(int(output_ids[starting_from])).startswith('▁'):
-        if shared.tokenizer.convert_ids_to_tokens(int(output_ids[starting_from])).startswith('▁'):
+        reply = ' ' + reply
            reply = ' ' + reply
    return reply