From ad8ac545a58fde3a26bbe3a04489131fe1b3ec88 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 17 Sep 2023 07:01:34 -0700 Subject: [PATCH] Tokenization improvements --- modules/exllama.py | 4 ++-- modules/exllamav2.py | 4 ++-- modules/llamacpp_model.py | 4 ++-- modules/logits.py | 7 ++----- modules/text_generation.py | 15 +++++++++++---- 5 files changed, 19 insertions(+), 15 deletions(-) diff --git a/modules/exllama.py b/modules/exllama.py index f5cd2ae7..7f32c746 100644 --- a/modules/exllama.py +++ b/modules/exllama.py @@ -202,8 +202,8 @@ class ExllamaModel: return self.tokenizer.encode(string, max_seq_len=self.model.config.max_seq_len, add_bos=True) def decode(self, ids, **kwargs): - if isinstance(ids, int): - ids = torch.tensor([[ids]]) + if isinstance(ids, list): + ids = torch.tensor([ids]) elif isinstance(ids, torch.Tensor) and ids.numel() == 1: ids = ids.view(1, -1) diff --git a/modules/exllamav2.py b/modules/exllamav2.py index 55903d80..9d2a0fc4 100644 --- a/modules/exllamav2.py +++ b/modules/exllamav2.py @@ -107,8 +107,8 @@ class Exllamav2Model: return self.tokenizer.encode(string, add_bos=True) def decode(self, ids, **kwargs): - if isinstance(ids, int): - ids = torch.tensor([[ids]]) + if isinstance(ids, list): + ids = torch.tensor([ids]) elif isinstance(ids, torch.Tensor) and ids.numel() == 1: ids = ids.view(1, -1) diff --git a/modules/llamacpp_model.py b/modules/llamacpp_model.py index fc3a79f5..ed1f5b9e 100644 --- a/modules/llamacpp_model.py +++ b/modules/llamacpp_model.py @@ -98,8 +98,8 @@ class LlamaCppModel: return self.model.tokenize(string) - def decode(self, tokens): - return self.model.detokenize(tokens) + def decode(self, ids): + return self.model.detokenize(ids).decode('utf-8') def get_logits(self, tokens): self.model.eval(tokens) diff --git a/modules/logits.py b/modules/logits.py index d3b36a44..d95dd8c0 100644 --- a/modules/logits.py +++ b/modules/logits.py @@ -46,17 +46,14 @@ def get_next_logits(prompt, state, use_samplers, previous): scores = output['logits'][-1][-1] probs = torch.softmax(scores, dim=-1, dtype=torch.float) - topk_values, topk_indices = torch.topk(probs, k=25, largest=True, sorted=True) + topk_values, topk_indices = torch.topk(probs, k=50, largest=True, sorted=True) topk_values = [f"{float(i):.5f}" for i in topk_values] if is_non_hf_exllamav1 or is_non_hf_llamacpp: topk_indices = [i.expand((1, 1)) for i in topk_indices] tokens = [shared.tokenizer.decode(i) for i in topk_indices] - if is_non_hf_llamacpp: - tokens = [i.decode('utf-8') for i in tokens] # llamacpp returns bytes, not str - output = '' for row in list(zip(topk_values, tokens)): - output += f"{row[0]} - {repr(row[1])[1:-1]}\n" + output += f"{row[0]} - {repr(row[1])}\n" return output, previous diff --git a/modules/text_generation.py b/modules/text_generation.py index 41f12ddc..5296c1b4 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -39,8 +39,7 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap if generate_func is None: if shared.model_name == 'None' or shared.model is None: logger.error("No model is loaded! Select one in the Model tab.") - yield '' - return + raise ValueError('No model is loaded! Select one in the Model tab.') if shared.model.__class__.__name__ in ['LlamaCppModel', 'RWKVModel', 'ExllamaModel', 'Exllamav2Model', 'CtransformersModel']: generate_func = generate_reply_custom @@ -106,6 +105,10 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap def encode(prompt, add_special_tokens=True, add_bos_token=True, truncation_length=None): + if shared.tokenizer is None: + logger.error('No tokenizer is loaded') + raise ValueError('No tokenizer is loaded') + if shared.model.__class__.__name__ in ['LlamaCppModel', 'RWKVModel', 'CtransformersModel', 'Exllamav2Model']: input_ids = shared.tokenizer.encode(str(prompt)) if shared.model.__class__.__name__ not in ['Exllamav2Model']: @@ -133,6 +136,10 @@ def encode(prompt, add_special_tokens=True, add_bos_token=True, truncation_lengt def decode(output_ids, skip_special_tokens=True): + if shared.tokenizer is None: + logger.error('No tokenizer is loaded') + raise ValueError('No tokenizer is loaded') + return shared.tokenizer.decode(output_ids, skip_special_tokens) @@ -146,11 +153,11 @@ def get_encoded_length(prompt): def get_token_ids(prompt): tokens = encode(prompt)[0] - decoded_tokens = [shared.tokenizer.decode(i) for i in tokens] + decoded_tokens = [shared.tokenizer.decode([i]) for i in tokens] output = '' for row in list(zip(tokens, decoded_tokens)): - output += f"{str(int(row[0])).ljust(5)} - {repr(row[1])[1:-1]}\n" + output += f"{str(int(row[0])).ljust(5)} - {repr(row[1])}\n" return output