Fix ExLlama truncation

2025-02-03 15:43:05 +01:00 · 2023-08-20 08:50:32 -07:00 · 2023-08-20 08:50:32 -07:00 · ef17da70af
commit ef17da70af
parent ee964bcce9
1 changed files with 3 additions and 3 deletions
--- a/modules/exllama.py
+++ b/modules/exllama.py
@ -111,7 +111,7 @@ class ExllamaModel:
            self.generator.end_beam_search()

            # Tokenizing the input
-            ids = self.generator.tokenizer.encode(prompt)
+            ids = self.generator.tokenizer.encode(prompt, max_seq_len=self.model.config.max_seq_len)
            ids = ids[:, -get_max_prompt_length(state):]
            if state['auto_max_new_tokens']:
                max_new_tokens = state['truncation_length'] - ids.shape[-1]
@ -141,7 +141,7 @@ class ExllamaModel:
            alpha = state['guidance_scale']
            prompts = [prompt, state['negative_prompt'] or '']

-            ids, mask = self.tokenizer.encode(prompts, return_mask=True)
+            ids, mask = self.tokenizer.encode(prompts, return_mask=True, max_seq_len=self.model.config.max_seq_len)
            if state['auto_max_new_tokens']:
                max_new_tokens = state['truncation_length'] - ids[0].shape[-1]
            else:
@ -181,7 +181,7 @@ class ExllamaModel:
        return output

    def encode(self, string, **kwargs):
-        return self.tokenizer.encode(string)
+        return self.tokenizer.encode(string, max_seq_len=self.model.config.max_seq_len)

    def decode(self, string, **kwargs):
        return self.tokenizer.decode(string)[0]