2023-09-27 02:30:19 +02:00
|
|
|
"""
|
|
|
|
This module is responsible for handling and modifying the notebook text.
|
|
|
|
"""
|
|
|
|
import re
|
|
|
|
|
|
|
|
import extensions.superboogav2.parameters as parameters
|
|
|
|
from extensions.superboogav2.utils import create_context_text
|
2024-03-07 19:03:18 +01:00
|
|
|
from modules.logging_colors import logger
|
2023-09-27 02:30:19 +02:00
|
|
|
|
|
|
|
from .data_processor import preprocess_text
|
|
|
|
|
2024-03-07 19:03:18 +01:00
|
|
|
|
2023-09-27 02:30:19 +02:00
|
|
|
def _remove_special_tokens(string):
|
|
|
|
pattern = r'(<\|begin-user-input\|>|<\|end-user-input\|>|<\|injection-point\|>)'
|
|
|
|
return re.sub(pattern, '', string)
|
|
|
|
|
|
|
|
|
2023-12-31 06:03:23 +01:00
|
|
|
def input_modifier_internal(string, collector, is_chat):
|
2023-09-27 02:30:19 +02:00
|
|
|
# Sanity check.
|
2023-12-31 06:03:23 +01:00
|
|
|
if is_chat:
|
2023-09-27 02:30:19 +02:00
|
|
|
return string
|
|
|
|
|
|
|
|
# Find the user input
|
|
|
|
pattern = re.compile(r"<\|begin-user-input\|>(.*?)<\|end-user-input\|>", re.DOTALL)
|
|
|
|
match = re.search(pattern, string)
|
|
|
|
if match:
|
|
|
|
# Preprocess the user prompt.
|
|
|
|
user_input = match.group(1).strip()
|
|
|
|
user_input = preprocess_text(user_input)
|
|
|
|
|
|
|
|
logger.debug(f"Preprocessed User Input: {user_input}")
|
|
|
|
|
|
|
|
# Get the most similar chunks
|
|
|
|
results = collector.get_sorted_by_dist(user_input, n_results=parameters.get_chunk_count(), max_token_count=int(parameters.get_max_token_count()))
|
|
|
|
|
|
|
|
# Make the injection
|
|
|
|
string = string.replace('<|injection-point|>', create_context_text(results))
|
|
|
|
|
2024-03-07 19:03:18 +01:00
|
|
|
return _remove_special_tokens(string)
|