diff --git a/extensions/openai/typing.py b/extensions/openai/typing.py index 3ae02e68..af7b094f 100644 --- a/extensions/openai/typing.py +++ b/extensions/openai/typing.py @@ -103,10 +103,11 @@ class ChatCompletionRequestParams(BaseModel): instruction_template_str: str | None = Field(default=None, description="A Jinja2 instruction template. If set, will take precedence over everything else.") character: str | None = Field(default=None, description="A character defined under text-generation-webui/characters. If not set, the default \"Assistant\" character will be used.") - user_name: str | None = Field(default=None, description="Your name (the user). By default, it's \"You\".", alias="name1") bot_name: str | None = Field(default=None, description="Overwrites the value set by character field.", alias="name2") context: str | None = Field(default=None, description="Overwrites the value set by character field.") greeting: str | None = Field(default=None, description="Overwrites the value set by character field.") + user_name: str | None = Field(default=None, description="Your name (the user). By default, it's \"You\".", alias="name1") + user_bio: str | None = Field(default=None, description="The user description/personality.") chat_template_str: str | None = Field(default=None, description="Jinja2 template for chat.") chat_instruct_command: str | None = None diff --git a/modules/cache_utils.py b/modules/cache_utils.py index 3f5a0f31..0d1368a2 100644 --- a/modules/cache_utils.py +++ b/modules/cache_utils.py @@ -19,28 +19,29 @@ def process_llamacpp_cache(model, new_sequence, past_sequence): past_sequence = torch.tensor(past_sequence) prefix_length = find_prefix_length(past_sequence[:i1], new_sequence[:j1]) - sink_length = prefix_length - if sink_length < shared.args.attention_sink_size: - sink_length = shared.args.attention_sink_size - + sink_length = max(prefix_length, shared.args.attention_sink_size) removed_length = i1 - sink_length + if removed_length <= 0: + return past_sequence.tolist() + matching_prefix = past_sequence[:prefix_length] removed_chunk = past_sequence[sink_length:i1] overlapping_sequence = new_sequence[j1:j2 + 1] added_chunk = new_sequence[j2 + 1:] - # print(past_sequence) - # print(new_sequence) + # print(past_sequence.tolist()) + # print(new_sequence.tolist()) print() print('MATCHING PREFIX=', repr(shared.tokenizer.decode(matching_prefix))) print('ADDED CHUNK=', repr(shared.tokenizer.decode(added_chunk))) print('REMOVED CHUNK=', repr(shared.tokenizer.decode(removed_chunk))) + print('REMOVED LENGTH=', removed_length) print() # Remove interval [sink_length, sink_length + removed_length) from the context - # Subtract removed_length from model.n_tokens + # Update model.n_tokens model._ctx.kv_cache_seq_rm(0, sink_length, sink_length + removed_length) model._ctx.kv_cache_seq_shift(0, sink_length + removed_length, -1, -removed_length) diff --git a/modules/chat.py b/modules/chat.py index a1fcb6b0..c8516c59 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -86,10 +86,16 @@ def generate_chat_prompt(user_input, state, **kwargs): if state['mode'] != 'instruct': chat_template_str = replace_character_names(chat_template_str, state['name1'], state['name2']) - chat_template = jinja_env.from_string(chat_template_str) instruction_template = jinja_env.from_string(state['instruction_template_str']) - chat_renderer = partial(chat_template.render, add_generation_prompt=False, name1=state['name1'], name2=state['name2']) instruct_renderer = partial(instruction_template.render, add_generation_prompt=False) + chat_template = jinja_env.from_string(chat_template_str) + chat_renderer = partial( + chat_template.render, + add_generation_prompt=False, + name1=state['name1'], + name2=state['name2'], + user_bio=replace_character_names(state['user_bio'], state['name1'], state['name2']), + ) messages = [] @@ -99,7 +105,7 @@ def generate_chat_prompt(user_input, state, **kwargs): messages.append({"role": "system", "content": state['custom_system_message']}) else: renderer = chat_renderer - if state['context'].strip() != '': + if state['context'].strip() != '' or state['user_bio'].strip() != '': context = replace_character_names(state['context'], state['name1'], state['name2']) messages.append({"role": "system", "content": context}) @@ -140,6 +146,7 @@ def generate_chat_prompt(user_input, state, **kwargs): command = state['chat-instruct_command'] command = command.replace('<|character|>', state['name2'] if not impersonate else state['name1']) command = command.replace('<|prompt|>', prompt) + command = replace_character_names(command, state['name1'], state['name2']) if _continue: prefix = get_generation_prompt(renderer, impersonate=impersonate, strip_trailing_spaces=False)[0] diff --git a/modules/html_generator.py b/modules/html_generator.py index e3dd453e..278f1632 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -1,3 +1,4 @@ +import functools import html import os import re @@ -47,6 +48,7 @@ def replace_blockquote(m): return m.group().replace('\n', '\n> ').replace('\\begin{blockquote}', '').replace('\\end{blockquote}', '') +@functools.lru_cache(maxsize=4096) def convert_to_markdown(string): # Blockquote @@ -99,6 +101,17 @@ def convert_to_markdown(string): return html_output +def convert_to_markdown_wrapped(string, use_cache=True): + ''' + Used to avoid caching convert_to_markdown calls during streaming. + ''' + + if use_cache: + return convert_to_markdown(string) + + return convert_to_markdown.__wrapped__(string) + + def generate_basic_html(string): string = convert_to_markdown(string) string = f'
{string}
' @@ -194,7 +207,7 @@ def get_image_cache(path): def generate_instruct_html(history): output = f'
' for i, _row in enumerate(history): - row = [convert_to_markdown(entry) for entry in _row] + row = [convert_to_markdown_wrapped(entry, use_cache=i != len(history) - 1) for entry in _row] if row[0]: # don't display empty user messages output += f""" @@ -230,7 +243,7 @@ def generate_cai_chat_html(history, name1, name2, style, character, reset_cache= img_me = f'' if Path("cache/pfp_me.png").exists() else '' for i, _row in enumerate(history): - row = [convert_to_markdown(entry) for entry in _row] + row = [convert_to_markdown_wrapped(entry, use_cache=i != len(history) - 1) for entry in _row] if row[0]: # don't display empty user messages output += f""" @@ -273,7 +286,7 @@ def generate_chat_html(history, name1, name2, reset_cache=False): output = f'
' for i, _row in enumerate(history): - row = [convert_to_markdown(entry) for entry in _row] + row = [convert_to_markdown_wrapped(entry, use_cache=i != len(history) - 1) for entry in _row] if row[0]: # don't display empty user messages output += f""" diff --git a/modules/shared.py b/modules/shared.py index 69ad0cfd..c2a44eb8 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -57,9 +57,10 @@ settings = { 'stream': True, 'character': 'Assistant', 'name1': 'You', + 'user_bio': '', 'custom_system_message': '', 'instruction_template_str': "{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'system' -%}\n {%- set ns.found = true -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if not ns.found -%}\n {{- '' + 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' + '\\n\\n' -}}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' -%}\n {{- '' + message['content'] + '\\n\\n' -}}\n {%- else -%}\n {%- if message['role'] == 'user' -%}\n {{-'### Instruction:\\n' + message['content'] + '\\n\\n'-}}\n {%- else -%}\n {{-'### Response:\\n' + message['content'] + '\\n\\n' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{-'### Response:\\n'-}}\n{%- endif -%}", - 'chat_template_str': "{%- for message in messages %}\n {%- if message['role'] == 'system' -%}\n {{- message['content'] + '\\n\\n' -}}\n {%- else -%}\n {%- if message['role'] == 'user' -%}\n {{- name1 + ': ' + message['content'] + '\\n'-}}\n {%- else -%}\n {{- name2 + ': ' + message['content'] + '\\n' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}", + 'chat_template_str': "{%- for message in messages %}\n {%- if message['role'] == 'system' -%}\n {%- if message['content'] -%}\n {{- message['content'] + '\\n\\n' -}}\n {%- endif -%}\n {%- if user_bio -%}\n {{- user_bio + '\\n\\n' -}}\n {%- endif -%}\n {%- else -%}\n {%- if message['role'] == 'user' -%}\n {{- name1 + ': ' + message['content'] + '\\n'-}}\n {%- else -%}\n {{- name2 + ': ' + message['content'] + '\\n' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}", 'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>', 'autoload_model': False, 'gallery-items_per_page': 50, diff --git a/modules/ui.py b/modules/ui.py index 4a03f843..f973fa6f 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -35,7 +35,8 @@ theme = gr.themes.Default( border_color_primary='#c5c5d2', button_large_padding='6px 12px', body_text_color_subdued='#484848', - background_fill_secondary='#eaeaea' + background_fill_secondary='#eaeaea', + background_fill_primary='#fafafa', ) if Path("notification.mp3").exists(): @@ -170,6 +171,7 @@ def list_interface_input_elements(): 'character_menu', 'history', 'name1', + 'user_bio', 'name2', 'greeting', 'context', @@ -220,7 +222,7 @@ def apply_interface_values(state, use_persistent=False): def save_settings(state, preset, extensions_list, show_controls, theme_state): output = copy.deepcopy(shared.settings) - exclude = ['name2', 'greeting', 'context', 'turn_template'] + exclude = ['name2', 'greeting', 'context', 'turn_template', 'truncation_length'] for k in state: if k in shared.settings and k not in exclude: output[k] = state[k] diff --git a/modules/ui_chat.py b/modules/ui_chat.py index 7255bb99..293d253e 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -94,19 +94,50 @@ def create_ui(): def create_chat_settings_ui(): mu = shared.args.multi_user - with gr.Tab('Character'): + with gr.Tab('Chat'): with gr.Row(): with gr.Column(scale=8): - with gr.Row(): - shared.gradio['character_menu'] = gr.Dropdown(value=None, choices=utils.get_available_characters(), label='Character', elem_id='character-menu', info='Used in chat and chat-instruct modes.', elem_classes='slim-dropdown') - ui.create_refresh_button(shared.gradio['character_menu'], lambda: None, lambda: {'choices': utils.get_available_characters()}, 'refresh-button', interactive=not mu) - shared.gradio['save_character'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu) - shared.gradio['delete_character'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu) + with gr.Tab("Character"): + with gr.Row(): + shared.gradio['character_menu'] = gr.Dropdown(value=None, choices=utils.get_available_characters(), label='Character', elem_id='character-menu', info='Used in chat and chat-instruct modes.', elem_classes='slim-dropdown') + ui.create_refresh_button(shared.gradio['character_menu'], lambda: None, lambda: {'choices': utils.get_available_characters()}, 'refresh-button', interactive=not mu) + shared.gradio['save_character'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu) + shared.gradio['delete_character'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu) - shared.gradio['name1'] = gr.Textbox(value=shared.settings['name1'], lines=1, label='Your name') - shared.gradio['name2'] = gr.Textbox(value='', lines=1, label='Character\'s name') - shared.gradio['context'] = gr.Textbox(value='', lines=10, label='Context', elem_classes=['add_scrollbar']) - shared.gradio['greeting'] = gr.Textbox(value='', lines=5, label='Greeting', elem_classes=['add_scrollbar']) + shared.gradio['name2'] = gr.Textbox(value='', lines=1, label='Character\'s name') + shared.gradio['context'] = gr.Textbox(value='', lines=10, label='Context', elem_classes=['add_scrollbar']) + shared.gradio['greeting'] = gr.Textbox(value='', lines=5, label='Greeting', elem_classes=['add_scrollbar']) + + with gr.Tab("User"): + shared.gradio['name1'] = gr.Textbox(value=shared.settings['name1'], lines=1, label='Name') + shared.gradio['user_bio'] = gr.Textbox(value=shared.settings['user_bio'], lines=10, label='Description', info='Here you can optionally write a description of yourself.', placeholder='{{user}}\'s personality: ...', elem_classes=['add_scrollbar']) + + with gr.Tab('Chat history'): + with gr.Row(): + with gr.Column(): + shared.gradio['save_chat_history'] = gr.Button(value='Save history') + + with gr.Column(): + shared.gradio['load_chat_history'] = gr.File(type='binary', file_types=['.json', '.txt'], label='Upload History JSON') + + with gr.Tab('Upload character'): + with gr.Tab('YAML or JSON'): + with gr.Row(): + shared.gradio['upload_json'] = gr.File(type='binary', file_types=['.json', '.yaml'], label='JSON or YAML File', interactive=not mu) + shared.gradio['upload_img_bot'] = gr.Image(type='pil', label='Profile Picture (optional)', interactive=not mu) + + shared.gradio['Submit character'] = gr.Button(value='Submit', interactive=False) + + with gr.Tab('TavernAI PNG'): + with gr.Row(): + with gr.Column(): + shared.gradio['upload_img_tavern'] = gr.Image(type='pil', label='TavernAI PNG File', elem_id='upload_img_tavern', interactive=not mu) + shared.gradio['tavern_json'] = gr.State() + with gr.Column(): + shared.gradio['tavern_name'] = gr.Textbox(value='', lines=1, label='Name', interactive=False) + shared.gradio['tavern_desc'] = gr.Textbox(value='', lines=4, max_lines=4, label='Description', interactive=False) + + shared.gradio['Submit tavern character'] = gr.Button(value='Submit', interactive=False) with gr.Column(scale=1): shared.gradio['character_picture'] = gr.Image(label='Character picture', type='pil', interactive=not mu) @@ -137,33 +168,6 @@ def create_chat_settings_ui(): with gr.Column(): shared.gradio['chat_template_str'] = gr.Textbox(value=shared.settings['chat_template_str'], label='Chat template', lines=22, elem_classes=['add_scrollbar', 'monospace']) - with gr.Tab('Chat history'): - with gr.Row(): - with gr.Column(): - shared.gradio['save_chat_history'] = gr.Button(value='Save history') - - with gr.Column(): - shared.gradio['load_chat_history'] = gr.File(type='binary', file_types=['.json', '.txt'], label='Upload History JSON') - - with gr.Tab('Upload character'): - with gr.Tab('YAML or JSON'): - with gr.Row(): - shared.gradio['upload_json'] = gr.File(type='binary', file_types=['.json', '.yaml'], label='JSON or YAML File', interactive=not mu) - shared.gradio['upload_img_bot'] = gr.Image(type='pil', label='Profile Picture (optional)', interactive=not mu) - - shared.gradio['Submit character'] = gr.Button(value='Submit', interactive=False) - - with gr.Tab('TavernAI PNG'): - with gr.Row(): - with gr.Column(): - shared.gradio['upload_img_tavern'] = gr.Image(type='pil', label='TavernAI PNG File', elem_id='upload_img_tavern', interactive=not mu) - shared.gradio['tavern_json'] = gr.State() - with gr.Column(): - shared.gradio['tavern_name'] = gr.Textbox(value='', lines=1, label='Name', interactive=False) - shared.gradio['tavern_desc'] = gr.Textbox(value='', lines=4, max_lines=4, label='Description', interactive=False) - - shared.gradio['Submit tavern character'] = gr.Button(value='Submit', interactive=False) - def create_event_handlers(): diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index d268770a..a31bbcf5 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -90,7 +90,7 @@ def create_ui(): shared.gradio['quant_type'] = gr.Dropdown(label="quant_type", choices=["nf4", "fp4"], value=shared.args.quant_type) shared.gradio['hqq_backend'] = gr.Dropdown(label="hqq_backend", choices=["PYTORCH", "PYTORCH_COMPILE", "ATEN"], value=shared.args.hqq_backend) - shared.gradio['n_gpu_layers'] = gr.Slider(label="n-gpu-layers", minimum=0, maximum=256, value=shared.args.n_gpu_layers) + shared.gradio['n_gpu_layers'] = gr.Slider(label="n-gpu-layers", minimum=0, maximum=256, value=shared.args.n_gpu_layers, info='Must be set to more than 0 for your GPU to be used.') shared.gradio['n_ctx'] = gr.Slider(minimum=0, maximum=shared.settings['truncation_length_max'], step=256, label="n_ctx", value=shared.args.n_ctx, info='Context length. Try lowering this if you run out of memory while loading the model.') shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 18,17') shared.gradio['n_batch'] = gr.Slider(label="n_batch", minimum=1, maximum=2048, step=1, value=shared.args.n_batch) @@ -118,7 +118,7 @@ def create_ui(): shared.gradio['auto_devices'] = gr.Checkbox(label="auto-devices", value=shared.args.auto_devices) shared.gradio['tensorcores'] = gr.Checkbox(label="tensorcores", value=shared.args.tensorcores, info='NVIDIA only: use llama-cpp-python compiled with tensor cores support. This increases performance on RTX cards.') shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming_llm", value=shared.args.streaming_llm, info='(experimental) Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.') - shared.gradio['attention_sink_size'] = gr.Number(label="attention_sink_size", value=shared.args.attention_sink_size, info='StreamingLLM: number of sink tokens. Only used if the trimmed prompt doesn\'t share a prefix with the old prompt.') + shared.gradio['attention_sink_size'] = gr.Number(label="attention_sink_size", value=shared.args.attention_sink_size, precision=0, info='StreamingLLM: number of sink tokens. Only used if the trimmed prompt doesn\'t share a prefix with the old prompt.') shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='llama.cpp: Use llama-cpp-python compiled without GPU acceleration. Transformers: use PyTorch in CPU mode.') shared.gradio['row_split'] = gr.Checkbox(label="row_split", value=shared.args.row_split, info='Split the model by rows across GPUs. This may improve multi-gpu performance.') shared.gradio['no_offload_kqv'] = gr.Checkbox(label="no_offload_kqv", value=shared.args.no_offload_kqv, info='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.') diff --git a/settings-template.yaml b/settings-template.yaml index 87101116..bf057be7 100644 --- a/settings-template.yaml +++ b/settings-template.yaml @@ -54,7 +54,12 @@ instruction_template_str: |- chat_template_str: |- {%- for message in messages %} {%- if message['role'] == 'system' -%} - {{- message['content'] + '\n\n' -}} + {%- if message['content'] -%} + {{- message['content'] + '\n\n' -}} + {%- endif -%} + {%- if user_bio -%} + {{- user_bio + '\n\n' -}} + {%- endif -%} {%- else -%} {%- if message['role'] == 'user' -%} {{- name1 + ': ' + message['content'] + '\n'-}}