Fix llama.cpp truncation (#3400)

---------

Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com>
This commit is contained in:
Pete 2023-08-03 19:01:15 -04:00 committed by GitHub
parent 4e6dc6d99d
commit f4005164f4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 7 additions and 1 deletions

View File

@ -6,6 +6,7 @@ import torch
from modules import shared
from modules.callbacks import Iteratorize
from modules.logging_colors import logger
from modules.text_generation import get_max_prompt_length
import llama_cpp
@ -91,6 +92,12 @@ class LlamaCppModel:
LogitsProcessorList = llama_cpp_lib().LogitsProcessorList
prompt = prompt if type(prompt) is str else prompt.decode()
# Handle truncation
prompt = self.encode(prompt)
prompt = prompt[-get_max_prompt_length(state):]
prompt = self.decode(prompt).decode('utf-8')
completion_chunks = self.model.create_completion(
prompt=prompt,
max_tokens=state['max_new_tokens'],

View File

@ -39,7 +39,6 @@ def encode(prompt, add_special_tokens=True, add_bos_token=True, truncation_lengt
if shared.model.__class__.__name__ in ['LlamaCppModel', 'RWKVModel']:
input_ids = shared.tokenizer.encode(str(prompt))
input_ids = np.array(input_ids).reshape(1, len(input_ids))
return input_ids
else:
input_ids = shared.tokenizer.encode(str(prompt), return_tensors='pt', add_special_tokens=add_special_tokens)