Merge pull request #5716 from oobabooga/dev

Merge dev branch
This commit is contained in:
oobabooga 2024-03-17 12:34:53 -03:00 committed by GitHub
commit 7cf1402bde
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 91 additions and 57 deletions

View File

@ -103,10 +103,11 @@ class ChatCompletionRequestParams(BaseModel):
instruction_template_str: str | None = Field(default=None, description="A Jinja2 instruction template. If set, will take precedence over everything else.") instruction_template_str: str | None = Field(default=None, description="A Jinja2 instruction template. If set, will take precedence over everything else.")
character: str | None = Field(default=None, description="A character defined under text-generation-webui/characters. If not set, the default \"Assistant\" character will be used.") character: str | None = Field(default=None, description="A character defined under text-generation-webui/characters. If not set, the default \"Assistant\" character will be used.")
user_name: str | None = Field(default=None, description="Your name (the user). By default, it's \"You\".", alias="name1")
bot_name: str | None = Field(default=None, description="Overwrites the value set by character field.", alias="name2") bot_name: str | None = Field(default=None, description="Overwrites the value set by character field.", alias="name2")
context: str | None = Field(default=None, description="Overwrites the value set by character field.") context: str | None = Field(default=None, description="Overwrites the value set by character field.")
greeting: str | None = Field(default=None, description="Overwrites the value set by character field.") greeting: str | None = Field(default=None, description="Overwrites the value set by character field.")
user_name: str | None = Field(default=None, description="Your name (the user). By default, it's \"You\".", alias="name1")
user_bio: str | None = Field(default=None, description="The user description/personality.")
chat_template_str: str | None = Field(default=None, description="Jinja2 template for chat.") chat_template_str: str | None = Field(default=None, description="Jinja2 template for chat.")
chat_instruct_command: str | None = None chat_instruct_command: str | None = None

View File

@ -19,28 +19,29 @@ def process_llamacpp_cache(model, new_sequence, past_sequence):
past_sequence = torch.tensor(past_sequence) past_sequence = torch.tensor(past_sequence)
prefix_length = find_prefix_length(past_sequence[:i1], new_sequence[:j1]) prefix_length = find_prefix_length(past_sequence[:i1], new_sequence[:j1])
sink_length = prefix_length sink_length = max(prefix_length, shared.args.attention_sink_size)
if sink_length < shared.args.attention_sink_size:
sink_length = shared.args.attention_sink_size
removed_length = i1 - sink_length removed_length = i1 - sink_length
if removed_length <= 0:
return past_sequence.tolist()
matching_prefix = past_sequence[:prefix_length] matching_prefix = past_sequence[:prefix_length]
removed_chunk = past_sequence[sink_length:i1] removed_chunk = past_sequence[sink_length:i1]
overlapping_sequence = new_sequence[j1:j2 + 1] overlapping_sequence = new_sequence[j1:j2 + 1]
added_chunk = new_sequence[j2 + 1:] added_chunk = new_sequence[j2 + 1:]
# print(past_sequence) # print(past_sequence.tolist())
# print(new_sequence) # print(new_sequence.tolist())
print() print()
print('MATCHING PREFIX=', repr(shared.tokenizer.decode(matching_prefix))) print('MATCHING PREFIX=', repr(shared.tokenizer.decode(matching_prefix)))
print('ADDED CHUNK=', repr(shared.tokenizer.decode(added_chunk))) print('ADDED CHUNK=', repr(shared.tokenizer.decode(added_chunk)))
print('REMOVED CHUNK=', repr(shared.tokenizer.decode(removed_chunk))) print('REMOVED CHUNK=', repr(shared.tokenizer.decode(removed_chunk)))
print('REMOVED LENGTH=', removed_length)
print() print()
# Remove interval [sink_length, sink_length + removed_length) from the context # Remove interval [sink_length, sink_length + removed_length) from the context
# Subtract removed_length from model.n_tokens # Update model.n_tokens
model._ctx.kv_cache_seq_rm(0, sink_length, sink_length + removed_length) model._ctx.kv_cache_seq_rm(0, sink_length, sink_length + removed_length)
model._ctx.kv_cache_seq_shift(0, sink_length + removed_length, -1, -removed_length) model._ctx.kv_cache_seq_shift(0, sink_length + removed_length, -1, -removed_length)

View File

@ -86,10 +86,16 @@ def generate_chat_prompt(user_input, state, **kwargs):
if state['mode'] != 'instruct': if state['mode'] != 'instruct':
chat_template_str = replace_character_names(chat_template_str, state['name1'], state['name2']) chat_template_str = replace_character_names(chat_template_str, state['name1'], state['name2'])
chat_template = jinja_env.from_string(chat_template_str)
instruction_template = jinja_env.from_string(state['instruction_template_str']) instruction_template = jinja_env.from_string(state['instruction_template_str'])
chat_renderer = partial(chat_template.render, add_generation_prompt=False, name1=state['name1'], name2=state['name2'])
instruct_renderer = partial(instruction_template.render, add_generation_prompt=False) instruct_renderer = partial(instruction_template.render, add_generation_prompt=False)
chat_template = jinja_env.from_string(chat_template_str)
chat_renderer = partial(
chat_template.render,
add_generation_prompt=False,
name1=state['name1'],
name2=state['name2'],
user_bio=replace_character_names(state['user_bio'], state['name1'], state['name2']),
)
messages = [] messages = []
@ -99,7 +105,7 @@ def generate_chat_prompt(user_input, state, **kwargs):
messages.append({"role": "system", "content": state['custom_system_message']}) messages.append({"role": "system", "content": state['custom_system_message']})
else: else:
renderer = chat_renderer renderer = chat_renderer
if state['context'].strip() != '': if state['context'].strip() != '' or state['user_bio'].strip() != '':
context = replace_character_names(state['context'], state['name1'], state['name2']) context = replace_character_names(state['context'], state['name1'], state['name2'])
messages.append({"role": "system", "content": context}) messages.append({"role": "system", "content": context})
@ -140,6 +146,7 @@ def generate_chat_prompt(user_input, state, **kwargs):
command = state['chat-instruct_command'] command = state['chat-instruct_command']
command = command.replace('<|character|>', state['name2'] if not impersonate else state['name1']) command = command.replace('<|character|>', state['name2'] if not impersonate else state['name1'])
command = command.replace('<|prompt|>', prompt) command = command.replace('<|prompt|>', prompt)
command = replace_character_names(command, state['name1'], state['name2'])
if _continue: if _continue:
prefix = get_generation_prompt(renderer, impersonate=impersonate, strip_trailing_spaces=False)[0] prefix = get_generation_prompt(renderer, impersonate=impersonate, strip_trailing_spaces=False)[0]

View File

@ -1,3 +1,4 @@
import functools
import html import html
import os import os
import re import re
@ -47,6 +48,7 @@ def replace_blockquote(m):
return m.group().replace('\n', '\n> ').replace('\\begin{blockquote}', '').replace('\\end{blockquote}', '') return m.group().replace('\n', '\n> ').replace('\\begin{blockquote}', '').replace('\\end{blockquote}', '')
@functools.lru_cache(maxsize=4096)
def convert_to_markdown(string): def convert_to_markdown(string):
# Blockquote # Blockquote
@ -99,6 +101,17 @@ def convert_to_markdown(string):
return html_output return html_output
def convert_to_markdown_wrapped(string, use_cache=True):
'''
Used to avoid caching convert_to_markdown calls during streaming.
'''
if use_cache:
return convert_to_markdown(string)
return convert_to_markdown.__wrapped__(string)
def generate_basic_html(string): def generate_basic_html(string):
string = convert_to_markdown(string) string = convert_to_markdown(string)
string = f'<style>{readable_css}</style><div class="readable-container">{string}</div>' string = f'<style>{readable_css}</style><div class="readable-container">{string}</div>'
@ -194,7 +207,7 @@ def get_image_cache(path):
def generate_instruct_html(history): def generate_instruct_html(history):
output = f'<style>{instruct_css}</style><div class="chat" id="chat"><div class="messages">' output = f'<style>{instruct_css}</style><div class="chat" id="chat"><div class="messages">'
for i, _row in enumerate(history): for i, _row in enumerate(history):
row = [convert_to_markdown(entry) for entry in _row] row = [convert_to_markdown_wrapped(entry, use_cache=i != len(history) - 1) for entry in _row]
if row[0]: # don't display empty user messages if row[0]: # don't display empty user messages
output += f""" output += f"""
@ -230,7 +243,7 @@ def generate_cai_chat_html(history, name1, name2, style, character, reset_cache=
img_me = f'<img src="file/cache/pfp_me.png?{time.time() if reset_cache else ""}">' if Path("cache/pfp_me.png").exists() else '' img_me = f'<img src="file/cache/pfp_me.png?{time.time() if reset_cache else ""}">' if Path("cache/pfp_me.png").exists() else ''
for i, _row in enumerate(history): for i, _row in enumerate(history):
row = [convert_to_markdown(entry) for entry in _row] row = [convert_to_markdown_wrapped(entry, use_cache=i != len(history) - 1) for entry in _row]
if row[0]: # don't display empty user messages if row[0]: # don't display empty user messages
output += f""" output += f"""
@ -273,7 +286,7 @@ def generate_chat_html(history, name1, name2, reset_cache=False):
output = f'<style>{chat_styles["wpp"]}</style><div class="chat" id="chat"><div class="messages">' output = f'<style>{chat_styles["wpp"]}</style><div class="chat" id="chat"><div class="messages">'
for i, _row in enumerate(history): for i, _row in enumerate(history):
row = [convert_to_markdown(entry) for entry in _row] row = [convert_to_markdown_wrapped(entry, use_cache=i != len(history) - 1) for entry in _row]
if row[0]: # don't display empty user messages if row[0]: # don't display empty user messages
output += f""" output += f"""

View File

@ -57,9 +57,10 @@ settings = {
'stream': True, 'stream': True,
'character': 'Assistant', 'character': 'Assistant',
'name1': 'You', 'name1': 'You',
'user_bio': '',
'custom_system_message': '', 'custom_system_message': '',
'instruction_template_str': "{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'system' -%}\n {%- set ns.found = true -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if not ns.found -%}\n {{- '' + 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' + '\\n\\n' -}}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' -%}\n {{- '' + message['content'] + '\\n\\n' -}}\n {%- else -%}\n {%- if message['role'] == 'user' -%}\n {{-'### Instruction:\\n' + message['content'] + '\\n\\n'-}}\n {%- else -%}\n {{-'### Response:\\n' + message['content'] + '\\n\\n' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{-'### Response:\\n'-}}\n{%- endif -%}", 'instruction_template_str': "{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'system' -%}\n {%- set ns.found = true -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if not ns.found -%}\n {{- '' + 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' + '\\n\\n' -}}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' -%}\n {{- '' + message['content'] + '\\n\\n' -}}\n {%- else -%}\n {%- if message['role'] == 'user' -%}\n {{-'### Instruction:\\n' + message['content'] + '\\n\\n'-}}\n {%- else -%}\n {{-'### Response:\\n' + message['content'] + '\\n\\n' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{-'### Response:\\n'-}}\n{%- endif -%}",
'chat_template_str': "{%- for message in messages %}\n {%- if message['role'] == 'system' -%}\n {{- message['content'] + '\\n\\n' -}}\n {%- else -%}\n {%- if message['role'] == 'user' -%}\n {{- name1 + ': ' + message['content'] + '\\n'-}}\n {%- else -%}\n {{- name2 + ': ' + message['content'] + '\\n' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}", 'chat_template_str': "{%- for message in messages %}\n {%- if message['role'] == 'system' -%}\n {%- if message['content'] -%}\n {{- message['content'] + '\\n\\n' -}}\n {%- endif -%}\n {%- if user_bio -%}\n {{- user_bio + '\\n\\n' -}}\n {%- endif -%}\n {%- else -%}\n {%- if message['role'] == 'user' -%}\n {{- name1 + ': ' + message['content'] + '\\n'-}}\n {%- else -%}\n {{- name2 + ': ' + message['content'] + '\\n' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}",
'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>', 'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
'autoload_model': False, 'autoload_model': False,
'gallery-items_per_page': 50, 'gallery-items_per_page': 50,

View File

@ -35,7 +35,8 @@ theme = gr.themes.Default(
border_color_primary='#c5c5d2', border_color_primary='#c5c5d2',
button_large_padding='6px 12px', button_large_padding='6px 12px',
body_text_color_subdued='#484848', body_text_color_subdued='#484848',
background_fill_secondary='#eaeaea' background_fill_secondary='#eaeaea',
background_fill_primary='#fafafa',
) )
if Path("notification.mp3").exists(): if Path("notification.mp3").exists():
@ -170,6 +171,7 @@ def list_interface_input_elements():
'character_menu', 'character_menu',
'history', 'history',
'name1', 'name1',
'user_bio',
'name2', 'name2',
'greeting', 'greeting',
'context', 'context',
@ -220,7 +222,7 @@ def apply_interface_values(state, use_persistent=False):
def save_settings(state, preset, extensions_list, show_controls, theme_state): def save_settings(state, preset, extensions_list, show_controls, theme_state):
output = copy.deepcopy(shared.settings) output = copy.deepcopy(shared.settings)
exclude = ['name2', 'greeting', 'context', 'turn_template'] exclude = ['name2', 'greeting', 'context', 'turn_template', 'truncation_length']
for k in state: for k in state:
if k in shared.settings and k not in exclude: if k in shared.settings and k not in exclude:
output[k] = state[k] output[k] = state[k]

View File

@ -94,19 +94,50 @@ def create_ui():
def create_chat_settings_ui(): def create_chat_settings_ui():
mu = shared.args.multi_user mu = shared.args.multi_user
with gr.Tab('Character'): with gr.Tab('Chat'):
with gr.Row(): with gr.Row():
with gr.Column(scale=8): with gr.Column(scale=8):
with gr.Row(): with gr.Tab("Character"):
shared.gradio['character_menu'] = gr.Dropdown(value=None, choices=utils.get_available_characters(), label='Character', elem_id='character-menu', info='Used in chat and chat-instruct modes.', elem_classes='slim-dropdown') with gr.Row():
ui.create_refresh_button(shared.gradio['character_menu'], lambda: None, lambda: {'choices': utils.get_available_characters()}, 'refresh-button', interactive=not mu) shared.gradio['character_menu'] = gr.Dropdown(value=None, choices=utils.get_available_characters(), label='Character', elem_id='character-menu', info='Used in chat and chat-instruct modes.', elem_classes='slim-dropdown')
shared.gradio['save_character'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu) ui.create_refresh_button(shared.gradio['character_menu'], lambda: None, lambda: {'choices': utils.get_available_characters()}, 'refresh-button', interactive=not mu)
shared.gradio['delete_character'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu) shared.gradio['save_character'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
shared.gradio['delete_character'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
shared.gradio['name1'] = gr.Textbox(value=shared.settings['name1'], lines=1, label='Your name') shared.gradio['name2'] = gr.Textbox(value='', lines=1, label='Character\'s name')
shared.gradio['name2'] = gr.Textbox(value='', lines=1, label='Character\'s name') shared.gradio['context'] = gr.Textbox(value='', lines=10, label='Context', elem_classes=['add_scrollbar'])
shared.gradio['context'] = gr.Textbox(value='', lines=10, label='Context', elem_classes=['add_scrollbar']) shared.gradio['greeting'] = gr.Textbox(value='', lines=5, label='Greeting', elem_classes=['add_scrollbar'])
shared.gradio['greeting'] = gr.Textbox(value='', lines=5, label='Greeting', elem_classes=['add_scrollbar'])
with gr.Tab("User"):
shared.gradio['name1'] = gr.Textbox(value=shared.settings['name1'], lines=1, label='Name')
shared.gradio['user_bio'] = gr.Textbox(value=shared.settings['user_bio'], lines=10, label='Description', info='Here you can optionally write a description of yourself.', placeholder='{{user}}\'s personality: ...', elem_classes=['add_scrollbar'])
with gr.Tab('Chat history'):
with gr.Row():
with gr.Column():
shared.gradio['save_chat_history'] = gr.Button(value='Save history')
with gr.Column():
shared.gradio['load_chat_history'] = gr.File(type='binary', file_types=['.json', '.txt'], label='Upload History JSON')
with gr.Tab('Upload character'):
with gr.Tab('YAML or JSON'):
with gr.Row():
shared.gradio['upload_json'] = gr.File(type='binary', file_types=['.json', '.yaml'], label='JSON or YAML File', interactive=not mu)
shared.gradio['upload_img_bot'] = gr.Image(type='pil', label='Profile Picture (optional)', interactive=not mu)
shared.gradio['Submit character'] = gr.Button(value='Submit', interactive=False)
with gr.Tab('TavernAI PNG'):
with gr.Row():
with gr.Column():
shared.gradio['upload_img_tavern'] = gr.Image(type='pil', label='TavernAI PNG File', elem_id='upload_img_tavern', interactive=not mu)
shared.gradio['tavern_json'] = gr.State()
with gr.Column():
shared.gradio['tavern_name'] = gr.Textbox(value='', lines=1, label='Name', interactive=False)
shared.gradio['tavern_desc'] = gr.Textbox(value='', lines=4, max_lines=4, label='Description', interactive=False)
shared.gradio['Submit tavern character'] = gr.Button(value='Submit', interactive=False)
with gr.Column(scale=1): with gr.Column(scale=1):
shared.gradio['character_picture'] = gr.Image(label='Character picture', type='pil', interactive=not mu) shared.gradio['character_picture'] = gr.Image(label='Character picture', type='pil', interactive=not mu)
@ -137,33 +168,6 @@ def create_chat_settings_ui():
with gr.Column(): with gr.Column():
shared.gradio['chat_template_str'] = gr.Textbox(value=shared.settings['chat_template_str'], label='Chat template', lines=22, elem_classes=['add_scrollbar', 'monospace']) shared.gradio['chat_template_str'] = gr.Textbox(value=shared.settings['chat_template_str'], label='Chat template', lines=22, elem_classes=['add_scrollbar', 'monospace'])
with gr.Tab('Chat history'):
with gr.Row():
with gr.Column():
shared.gradio['save_chat_history'] = gr.Button(value='Save history')
with gr.Column():
shared.gradio['load_chat_history'] = gr.File(type='binary', file_types=['.json', '.txt'], label='Upload History JSON')
with gr.Tab('Upload character'):
with gr.Tab('YAML or JSON'):
with gr.Row():
shared.gradio['upload_json'] = gr.File(type='binary', file_types=['.json', '.yaml'], label='JSON or YAML File', interactive=not mu)
shared.gradio['upload_img_bot'] = gr.Image(type='pil', label='Profile Picture (optional)', interactive=not mu)
shared.gradio['Submit character'] = gr.Button(value='Submit', interactive=False)
with gr.Tab('TavernAI PNG'):
with gr.Row():
with gr.Column():
shared.gradio['upload_img_tavern'] = gr.Image(type='pil', label='TavernAI PNG File', elem_id='upload_img_tavern', interactive=not mu)
shared.gradio['tavern_json'] = gr.State()
with gr.Column():
shared.gradio['tavern_name'] = gr.Textbox(value='', lines=1, label='Name', interactive=False)
shared.gradio['tavern_desc'] = gr.Textbox(value='', lines=4, max_lines=4, label='Description', interactive=False)
shared.gradio['Submit tavern character'] = gr.Button(value='Submit', interactive=False)
def create_event_handlers(): def create_event_handlers():

View File

@ -90,7 +90,7 @@ def create_ui():
shared.gradio['quant_type'] = gr.Dropdown(label="quant_type", choices=["nf4", "fp4"], value=shared.args.quant_type) shared.gradio['quant_type'] = gr.Dropdown(label="quant_type", choices=["nf4", "fp4"], value=shared.args.quant_type)
shared.gradio['hqq_backend'] = gr.Dropdown(label="hqq_backend", choices=["PYTORCH", "PYTORCH_COMPILE", "ATEN"], value=shared.args.hqq_backend) shared.gradio['hqq_backend'] = gr.Dropdown(label="hqq_backend", choices=["PYTORCH", "PYTORCH_COMPILE", "ATEN"], value=shared.args.hqq_backend)
shared.gradio['n_gpu_layers'] = gr.Slider(label="n-gpu-layers", minimum=0, maximum=256, value=shared.args.n_gpu_layers) shared.gradio['n_gpu_layers'] = gr.Slider(label="n-gpu-layers", minimum=0, maximum=256, value=shared.args.n_gpu_layers, info='Must be set to more than 0 for your GPU to be used.')
shared.gradio['n_ctx'] = gr.Slider(minimum=0, maximum=shared.settings['truncation_length_max'], step=256, label="n_ctx", value=shared.args.n_ctx, info='Context length. Try lowering this if you run out of memory while loading the model.') shared.gradio['n_ctx'] = gr.Slider(minimum=0, maximum=shared.settings['truncation_length_max'], step=256, label="n_ctx", value=shared.args.n_ctx, info='Context length. Try lowering this if you run out of memory while loading the model.')
shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 18,17') shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 18,17')
shared.gradio['n_batch'] = gr.Slider(label="n_batch", minimum=1, maximum=2048, step=1, value=shared.args.n_batch) shared.gradio['n_batch'] = gr.Slider(label="n_batch", minimum=1, maximum=2048, step=1, value=shared.args.n_batch)
@ -118,7 +118,7 @@ def create_ui():
shared.gradio['auto_devices'] = gr.Checkbox(label="auto-devices", value=shared.args.auto_devices) shared.gradio['auto_devices'] = gr.Checkbox(label="auto-devices", value=shared.args.auto_devices)
shared.gradio['tensorcores'] = gr.Checkbox(label="tensorcores", value=shared.args.tensorcores, info='NVIDIA only: use llama-cpp-python compiled with tensor cores support. This increases performance on RTX cards.') shared.gradio['tensorcores'] = gr.Checkbox(label="tensorcores", value=shared.args.tensorcores, info='NVIDIA only: use llama-cpp-python compiled with tensor cores support. This increases performance on RTX cards.')
shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming_llm", value=shared.args.streaming_llm, info='(experimental) Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.') shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming_llm", value=shared.args.streaming_llm, info='(experimental) Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
shared.gradio['attention_sink_size'] = gr.Number(label="attention_sink_size", value=shared.args.attention_sink_size, info='StreamingLLM: number of sink tokens. Only used if the trimmed prompt doesn\'t share a prefix with the old prompt.') shared.gradio['attention_sink_size'] = gr.Number(label="attention_sink_size", value=shared.args.attention_sink_size, precision=0, info='StreamingLLM: number of sink tokens. Only used if the trimmed prompt doesn\'t share a prefix with the old prompt.')
shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='llama.cpp: Use llama-cpp-python compiled without GPU acceleration. Transformers: use PyTorch in CPU mode.') shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='llama.cpp: Use llama-cpp-python compiled without GPU acceleration. Transformers: use PyTorch in CPU mode.')
shared.gradio['row_split'] = gr.Checkbox(label="row_split", value=shared.args.row_split, info='Split the model by rows across GPUs. This may improve multi-gpu performance.') shared.gradio['row_split'] = gr.Checkbox(label="row_split", value=shared.args.row_split, info='Split the model by rows across GPUs. This may improve multi-gpu performance.')
shared.gradio['no_offload_kqv'] = gr.Checkbox(label="no_offload_kqv", value=shared.args.no_offload_kqv, info='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.') shared.gradio['no_offload_kqv'] = gr.Checkbox(label="no_offload_kqv", value=shared.args.no_offload_kqv, info='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.')

View File

@ -54,7 +54,12 @@ instruction_template_str: |-
chat_template_str: |- chat_template_str: |-
{%- for message in messages %} {%- for message in messages %}
{%- if message['role'] == 'system' -%} {%- if message['role'] == 'system' -%}
{{- message['content'] + '\n\n' -}} {%- if message['content'] -%}
{{- message['content'] + '\n\n' -}}
{%- endif -%}
{%- if user_bio -%}
{{- user_bio + '\n\n' -}}
{%- endif -%}
{%- else -%} {%- else -%}
{%- if message['role'] == 'user' -%} {%- if message['role'] == 'user' -%}
{{- name1 + ': ' + message['content'] + '\n'-}} {{- name1 + ': ' + message['content'] + '\n'-}}