Reorder some functions

2024-11-22 08:07:56 +01:00 · 2023-09-19 13:13:03 -07:00 · 2023-09-19 13:13:03 -07:00 · 13ac55fa18
commit 13ac55fa18
parent e2fddd9584
4 changed files with 33 additions and 34 deletions
--- a/modules/exllama.py
+++ b/modules/exllama.py
@ -85,6 +85,22 @@ class ExllamaModel:
        result.generator = generator
        return result, result

+    def encode(self, string, **kwargs):
+        return self.tokenizer.encode(string, max_seq_len=self.model.config.max_seq_len, add_bos=True)
+
+    def decode(self, ids, **kwargs):
+        if isinstance(ids, list):
+            ids = torch.tensor([ids])
+        elif isinstance(ids, torch.Tensor) and ids.numel() == 1:
+            ids = ids.view(1, -1)
+
+        return self.tokenizer.decode(ids)[0]
+
+    def get_logits(self, token_ids, **kwargs):
+        self.cache.current_seq_len = 0
+        self.model.forward(token_ids[:, :-1], self.cache, input_mask=None, preprocess_only=True)
+        return self.model.forward(token_ids[:, -1:], self.cache, **kwargs).float().cpu()
+
    def generate_with_streaming(self, prompt, state):

        # The cache batch size must be 2 for CFG and 1 otherwise
@ -200,19 +216,3 @@ class ExllamaModel:
            pass

        return output
-
-    def encode(self, string, **kwargs):
-        return self.tokenizer.encode(string, max_seq_len=self.model.config.max_seq_len, add_bos=True)
-
-    def decode(self, ids, **kwargs):
-        if isinstance(ids, list):
-            ids = torch.tensor([ids])
-        elif isinstance(ids, torch.Tensor) and ids.numel() == 1:
-            ids = ids.view(1, -1)
-
-        return self.tokenizer.decode(ids)[0]
-
-    def get_logits(self, token_ids, **kwargs):
-        self.cache.current_seq_len = 0
-        self.model.forward(token_ids[:, :-1], self.cache, input_mask=None, preprocess_only=True)
-        return self.model.forward(token_ids[:, -1:], self.cache, **kwargs).float().cpu()
--- a/modules/exllamav2.py
+++ b/modules/exllamav2.py
@ -62,6 +62,22 @@ class Exllamav2Model:
        result.generator = generator
        return result, result

+    def encode(self, string, **kwargs):
+        return self.tokenizer.encode(string, add_bos=True)
+
+    def decode(self, ids, **kwargs):
+        if isinstance(ids, list):
+            ids = torch.tensor([ids])
+        elif isinstance(ids, torch.Tensor) and ids.numel() == 1:
+            ids = ids.view(1, -1)
+
+        return self.tokenizer.decode(ids)[0]
+
+    def get_logits(self, token_ids, **kwargs):
+        self.cache.current_seq_len = 0
+        self.model.forward(token_ids[:, :-1], self.cache, input_mask=None, preprocess_only=True)
+        return self.model.forward(token_ids[:, -1:], self.cache, input_mask=None, **kwargs).float().cpu()
+
    def generate_with_streaming(self, prompt, state):
        settings = ExLlamaV2Sampler.Settings()
        settings.temperature = state['temperature']
@ -114,19 +130,3 @@ class Exllamav2Model:
            pass

        return output
-
-    def encode(self, string, **kwargs):
-        return self.tokenizer.encode(string, add_bos=True)
-
-    def decode(self, ids, **kwargs):
-        if isinstance(ids, list):
-            ids = torch.tensor([ids])
-        elif isinstance(ids, torch.Tensor) and ids.numel() == 1:
-            ids = ids.view(1, -1)
-
-        return self.tokenizer.decode(ids)[0]
-
-    def get_logits(self, token_ids, **kwargs):
-        self.cache.current_seq_len = 0
-        self.model.forward(token_ids[:, :-1], self.cache, input_mask=None, preprocess_only=True)
-        return self.model.forward(token_ids[:, -1:], self.cache, input_mask=None, **kwargs).float().cpu()
--- a/modules/models.py
+++ b/modules/models.py
@ -1,5 +1,4 @@
 import gc
-import hashlib
 import os
 import re
 import time