llama.cpp: implement ban_eos_token via logits_processor (#2765)

2024-11-22 16:17:57 +01:00 · 2023-06-19 20:31:19 -04:00 · 2023-06-19 20:31:19 -04:00 · 59e7ecb198
commit 59e7ecb198
parent 0d9d70ec7e
1 changed files with 11 additions and 2 deletions
--- a/modules/llamacpp_model.py
+++ b/modules/llamacpp_model.py
@ -7,14 +7,20 @@ https://abetlen.github.io/llama-cpp-python/
 '''
 import re
 from functools import partial
-from llama_cpp import Llama, LlamaCache
+from llama_cpp import Llama, LlamaCache, LogitsProcessorList
 from modules import shared
 from modules.callbacks import Iteratorize
 from modules.logging_colors import logger
 def ban_eos_logits_processor(eos_token, input_ids, logits):
    logits[eos_token] = -float('inf')
    return logits
 class LlamaCppModel:
    def __init__(self):
        self.initialized = False
@ -72,7 +78,10 @@ class LlamaCppModel:
            mirostat_mode=int(state['mirostat_mode']),
            mirostat_tau=state['mirostat_tau'],
            mirostat_eta=state['mirostat_eta'],
-            stream=True
+            stream=True,
            logits_processor=LogitsProcessorList([
                partial(ban_eos_logits_processor, self.model.token_eos()),
            ]) if state['ban_eos_token'] else None,
        )
        output = ""