mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-11-25 17:29:22 +01:00
Fix llama.cpp truncation (#3400)
--------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com>
This commit is contained in:
parent
4e6dc6d99d
commit
f4005164f4
@ -6,6 +6,7 @@ import torch
|
|||||||
from modules import shared
|
from modules import shared
|
||||||
from modules.callbacks import Iteratorize
|
from modules.callbacks import Iteratorize
|
||||||
from modules.logging_colors import logger
|
from modules.logging_colors import logger
|
||||||
|
from modules.text_generation import get_max_prompt_length
|
||||||
|
|
||||||
import llama_cpp
|
import llama_cpp
|
||||||
|
|
||||||
@ -91,6 +92,12 @@ class LlamaCppModel:
|
|||||||
LogitsProcessorList = llama_cpp_lib().LogitsProcessorList
|
LogitsProcessorList = llama_cpp_lib().LogitsProcessorList
|
||||||
|
|
||||||
prompt = prompt if type(prompt) is str else prompt.decode()
|
prompt = prompt if type(prompt) is str else prompt.decode()
|
||||||
|
|
||||||
|
# Handle truncation
|
||||||
|
prompt = self.encode(prompt)
|
||||||
|
prompt = prompt[-get_max_prompt_length(state):]
|
||||||
|
prompt = self.decode(prompt).decode('utf-8')
|
||||||
|
|
||||||
completion_chunks = self.model.create_completion(
|
completion_chunks = self.model.create_completion(
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
max_tokens=state['max_new_tokens'],
|
max_tokens=state['max_new_tokens'],
|
||||||
|
@ -39,7 +39,6 @@ def encode(prompt, add_special_tokens=True, add_bos_token=True, truncation_lengt
|
|||||||
if shared.model.__class__.__name__ in ['LlamaCppModel', 'RWKVModel']:
|
if shared.model.__class__.__name__ in ['LlamaCppModel', 'RWKVModel']:
|
||||||
input_ids = shared.tokenizer.encode(str(prompt))
|
input_ids = shared.tokenizer.encode(str(prompt))
|
||||||
input_ids = np.array(input_ids).reshape(1, len(input_ids))
|
input_ids = np.array(input_ids).reshape(1, len(input_ids))
|
||||||
return input_ids
|
|
||||||
else:
|
else:
|
||||||
input_ids = shared.tokenizer.encode(str(prompt), return_tensors='pt', add_special_tokens=add_special_tokens)
|
input_ids = shared.tokenizer.encode(str(prompt), return_tensors='pt', add_special_tokens=add_special_tokens)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user