From 836a868abcca409eb9c7f8539dad1b8942b204c2 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 19 Dec 2024 12:21:28 -0800 Subject: [PATCH 01/15] UI: improve the heading fonts --- css/html_instruct_style.css | 19 ++++-------- css/main.css | 58 +++++++++++++++++++++++++++++++++---- 2 files changed, 58 insertions(+), 19 deletions(-) diff --git a/css/html_instruct_style.css b/css/html_instruct_style.css index f6ceb932..5591f7d1 100644 --- a/css/html_instruct_style.css +++ b/css/html_instruct_style.css @@ -17,28 +17,19 @@ line-height: 28px !important; } -.dark .chat .message-body p, -.dark .chat .message-body li, -.dark .chat .message-body q { +.dark .chat .message-body :is(p, li, q, h1, h2, h3, h4, h5, h6) { color: #d1d5db !important; } -.chat .message-body p, -.chat .message-body ul, -.chat .message-body ol { - margin-top: 1.25em !important; - margin-bottom: 1.25em !important; +.chat .message-body :is(p, ul, ol) { + margin: 1.25em 0 !important; } -.chat .message-body p:first-child, -.chat .message-body ul:first-child, -.chat .message-body ol:first-child { +.chat .message-body :is(p, ul, ol):first-child { margin-top: 0 !important; } -.chat .message-body p:last-child, -.chat .message-body ul:last-child, -.chat .message-body ol:last-child { +.chat .message-body :is(p, ul, ol):last-child { margin-bottom: 0 !important; } diff --git a/css/main.css b/css/main.css index fef3d3f1..7b3910f9 100644 --- a/css/main.css +++ b/css/main.css @@ -433,12 +433,60 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { padding-top: 20px; } -.message-body h1, -.message-body h2, -.message-body h3, +.message-body { + font-size: 16px; +} + +.message-body :is(h1, h2, h3, h4, h5, h6) { + color: black !important; +} + +.dark .message-body :is(h1, h2, h3, h4, h5, h6) { + color: white !important; +} + +.message-body h1 { + font-weight: 800; + font-size: 2.25em; + margin-top: 0; + margin-bottom: 0.8888889em; + line-height: 1.1111111; +} + +.message-body h2 { + font-weight: 700; + font-size: 1.5em; + margin-top: 2em; + margin-bottom: 1em; + line-height: 1.3333333; +} + +.message-body h3 { + font-weight: 600; + font-size: 1.25em; + margin-top: 0; + margin-bottom: 0.6em; + line-height: 1.6; +} + .message-body h4 { - color: var(--body-text-color); - margin: 20px 0 10px; + font-weight: 600; + font-size: 1em; + margin-top: 0; + margin-bottom: 0.5em; + line-height: 1.5; +} + +.message-body h5 { + font-weight: normal; + font-size: 1em; + margin: 0; +} + +.message-body h6 { + font-weight: normal; + font-size: 1em; + margin: 0; } .dark .message q { From 24a4c98d42a60283df1bc7e583604be222de4626 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 19 Dec 2024 12:23:03 -0800 Subject: [PATCH 02/15] UI: improve the style of links in messages --- css/main.css | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/css/main.css b/css/main.css index 7b3910f9..637dbae5 100644 --- a/css/main.css +++ b/css/main.css @@ -501,6 +501,10 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { list-style-position: outside; } +.message-body a { + font-weight: 500; +} + .chat .message-body ul, .chat .message-body ol { padding-inline-start: 2em; } From c8ddb86c22e8c8a2b56d161e4acf311c79fb1c17 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 19 Dec 2024 12:23:21 -0800 Subject: [PATCH 03/15] UI: improve some light mode colors --- css/html_instruct_style.css | 2 +- css/main.css | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/css/html_instruct_style.css b/css/html_instruct_style.css index 5591f7d1..14fd919d 100644 --- a/css/html_instruct_style.css +++ b/css/html_instruct_style.css @@ -51,13 +51,13 @@ } .chat .user-message { + background: #f4f4f4; padding: 1.5rem 1rem; border-radius: 0; border-bottom-right-radius: 0; } .chat .assistant-message { - background: #f4f4f4; padding: 1.5rem 1rem; border-radius: 0; border: 0; diff --git a/css/main.css b/css/main.css index 637dbae5..ec118911 100644 --- a/css/main.css +++ b/css/main.css @@ -437,10 +437,6 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { font-size: 16px; } -.message-body :is(h1, h2, h3, h4, h5, h6) { - color: black !important; -} - .dark .message-body :is(h1, h2, h3, h4, h5, h6) { color: white !important; } From e2fb86e5df464b671984762e2445108b15c9145c Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 19 Dec 2024 13:42:17 -0800 Subject: [PATCH 04/15] UI: further improve the style of lists and headings --- css/html_instruct_style.css | 5 ---- css/main.css | 30 ++++++++++------------- modules/html_generator.py | 49 +++++++++++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+), 22 deletions(-) diff --git a/css/html_instruct_style.css b/css/html_instruct_style.css index 14fd919d..7f74bf88 100644 --- a/css/html_instruct_style.css +++ b/css/html_instruct_style.css @@ -33,11 +33,6 @@ margin-bottom: 0 !important; } -.chat .message-body li { - margin-top: 1.25em !important; - margin-bottom: 1.25em !important; -} - .user-message, .assistant-message { font-family: Inter, Helvetica, Arial, sans-serif; } diff --git a/css/main.css b/css/main.css index ec118911..2ea7b960 100644 --- a/css/main.css +++ b/css/main.css @@ -460,7 +460,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { .message-body h3 { font-weight: 600; font-size: 1.25em; - margin-top: 0; + margin-top: 1.6em; margin-bottom: 0.6em; line-height: 1.6; } @@ -468,7 +468,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { .message-body h4 { font-weight: 600; font-size: 1em; - margin-top: 0; + margin-top: 1.5em; margin-bottom: 0.5em; line-height: 1.5; } @@ -495,6 +495,14 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { .message-body li { list-style-position: outside; + margin-top: 0.5em !important; + margin-bottom: 0.5em !important; +} + +.message-body ul.long-list li, +.message-body ol.long-list li { + margin-top: 1.25em !important; + margin-bottom: 1.25em !important; } .message-body a { @@ -505,23 +513,10 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { padding-inline-start: 2em; } -.chat .message-body li:not(:last-child) { - margin-top: 0; - margin-bottom: 2px; -} - -.chat .message-body li:last-child { - margin-bottom: 0 !important; -} - .message-body li > p { display: inline !important; } -.message-body ul, .message-body ol { - font-size: 15px !important; -} - .message-body ul { list-style-type: disc !important; } @@ -834,8 +829,9 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { padding-bottom: 80px !important; } -.chat ol, .chat ul { - margin-top: 6px !important; +.message-body ol, .message-body ul { + margin-top: 0 !important; + margin-bottom: 1.25em !important; } /* ---------------------------------------------- diff --git a/modules/html_generator.py b/modules/html_generator.py index 01b20866..57eac1b1 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -69,6 +69,52 @@ def replace_blockquote(m): return m.group().replace('\n', '\n> ').replace('\\begin{blockquote}', '').replace('\\end{blockquote}', '') +def add_long_list_class(html): + ''' + Adds a long-list class to
or
+ def is_within_block(start_idx, end_idx, block_matches):
+ return any(start < start_idx < end or start < end_idx < end for start, end in block_matches)
+
+ # Find all ...
and ...
blocks
+ pre_blocks = [(m.start(), m.end()) for m in re.finditer(r'.*?
', html, re.DOTALL)]
+ code_blocks = [(m.start(), m.end()) for m in re.finditer(r' or
+
+ opening_tag = match.group(1)
+ list_content = match.group(2)
+ closing_tag = match.group(3)
+
+ # Find all list items within this list
+ li_matches = li_pattern.finditer(list_content)
+ has_long_item = any(len(li_match.group(1).strip()) > 128 for li_match in li_matches)
+
+ if has_long_item:
+ # Add class="long-list" to the opening tag if it doesn't already have a class
+ if 'class=' not in opening_tag:
+ opening_tag = opening_tag[:-1] + ' class="long-list">'
+ else:
+ # If there's already a class, append long-list to it
+ opening_tag = re.sub(r'class="([^"]*)"', r'class="\1 long-list"', opening_tag)
+
+ return opening_tag + list_content + closing_tag
+
+ # Process HTML and replace list blocks
+ return list_pattern.sub(process_list, html)
+
+
@functools.lru_cache(maxsize=None)
def convert_to_markdown(string):
@@ -168,6 +214,9 @@ def convert_to_markdown(string):
pattern = re.compile(r']*>(.*?)
', re.DOTALL)
html_output = pattern.sub(lambda x: html.unescape(x.group()), html_output)
+ # Add "long-list" class to or containing a long - item
+ html_output = add_long_list_class(html_output)
+
return html_output
From 2acec386fc6647782e30cbddbc298652a2224b3c Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 19 Dec 2024 14:08:56 -0800
Subject: [PATCH 05/15] UI: improve the streaming cursor
---
modules/chat.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/modules/chat.py b/modules/chat.py
index 92808fb7..2638c794 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -353,14 +353,14 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
# Extract the reply
if state['mode'] in ['chat', 'chat-instruct']:
- visible_reply = re.sub("(
||{{user}})", state['name1'], reply + '❚')
+ visible_reply = re.sub("(||{{user}})", state['name1'], reply + '▍')
else:
- visible_reply = reply + '❚'
+ visible_reply = reply + '▍'
visible_reply = html.escape(visible_reply)
if shared.stop_everything:
- if output['visible'][-1][1].endswith('❚'):
+ if output['visible'][-1][1].endswith('▍'):
output['visible'][-1][1] = output['visible'][-1][1][:-1]
output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state, is_chat=True)
@@ -378,7 +378,7 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
if is_stream:
yield output
- if output['visible'][-1][1].endswith('❚'):
+ if output['visible'][-1][1].endswith('▍'):
output['visible'][-1][1] = output['visible'][-1][1][:-1]
output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state, is_chat=True)
From 89888bef56018099b95e04317c9393f1952c5aa7 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 19 Dec 2024 14:38:36 -0800
Subject: [PATCH 06/15] UI: increase the threshold for a - to be considered
long
---
modules/html_generator.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/modules/html_generator.py b/modules/html_generator.py
index 57eac1b1..0d831af2 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -99,7 +99,7 @@ def add_long_list_class(html):
# Find all list items within this list
li_matches = li_pattern.finditer(list_content)
- has_long_item = any(len(li_match.group(1).strip()) > 128 for li_match in li_matches)
+ has_long_item = any(len(li_match.group(1).strip()) > 160 for li_match in li_matches)
if has_long_item:
# Add class="long-list" to the opening tag if it doesn't already have a class
From ee3a533e5cc43612d887bdf275a786edd581eec0 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 19 Dec 2024 16:11:29 -0800
Subject: [PATCH 07/15] UI: improve the message width in instruct mode
---
css/html_instruct_style.css | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/css/html_instruct_style.css b/css/html_instruct_style.css
index 7f74bf88..dcc19c29 100644
--- a/css/html_instruct_style.css
+++ b/css/html_instruct_style.css
@@ -68,7 +68,7 @@
.chat .user-message .text,
.chat .assistant-message .text {
- max-width: 40.25rem;
+ max-width: 645px;
margin-left: auto;
margin-right: auto;
}
From 0490ee620a1d479d304cd24b3d6d6c0d44032f7b Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 19 Dec 2024 16:51:34 -0800
Subject: [PATCH 08/15] UI: increase the threshold for a
- to be considered
long (some more)
---
modules/html_generator.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/modules/html_generator.py b/modules/html_generator.py
index 0d831af2..f07b6e75 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -99,7 +99,7 @@ def add_long_list_class(html):
# Find all list items within this list
li_matches = li_pattern.finditer(list_content)
- has_long_item = any(len(li_match.group(1).strip()) > 160 for li_match in li_matches)
+ has_long_item = any(len(li_match.group(1).strip()) > 224 for li_match in li_matches)
if has_long_item:
# Add class="long-list" to the opening tag if it doesn't already have a class
From 39a5c9a49c35875d14f35cca54411c6864fe66ae Mon Sep 17 00:00:00 2001
From: oobabooga
Date: Sun, 29 Dec 2024 11:16:17 -0300
Subject: [PATCH 09/15] UI organization (#6618)
---
css/main.css | 2 +-
modules/loaders.py | 1 -
modules/ui_model_menu.py | 71 ++++++++++------------
modules/ui_parameters.py | 124 +++++++++++++++++++--------------------
4 files changed, 94 insertions(+), 104 deletions(-)
diff --git a/css/main.css b/css/main.css
index 2ea7b960..314b36e0 100644
--- a/css/main.css
+++ b/css/main.css
@@ -38,7 +38,7 @@ div.svelte-iyf88w {
/* "info" messages without a title above */
.block > .svelte-e8n7p6:not(:only-of-type, #chat-mode *) {
- margin-bottom: 2px;
+ margin-bottom: 0;
}
.py-6 {
diff --git a/modules/loaders.py b/modules/loaders.py
index 4cb7e349..1cfdb31b 100644
--- a/modules/loaders.py
+++ b/modules/loaders.py
@@ -26,7 +26,6 @@ loaders_and_params = OrderedDict({
'compress_pos_emb',
'disable_exllama',
'disable_exllamav2',
- 'transformers_info',
],
'llama.cpp': [
'n_ctx',
diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py
index 189bedfd..c4bb8f01 100644
--- a/modules/ui_model_menu.py
+++ b/modules/ui_model_menu.py
@@ -80,59 +80,52 @@ def create_ui():
with gr.Blocks():
with gr.Row():
with gr.Column():
- with gr.Blocks():
- for i in range(len(total_mem)):
- shared.gradio[f'gpu_memory_{i}'] = gr.Slider(label=f"gpu-memory in MiB for device :{i}", maximum=total_mem[i], value=default_gpu_mem[i])
+ for i in range(len(total_mem)):
+ shared.gradio[f'gpu_memory_{i}'] = gr.Slider(label=f"gpu-memory in MiB for device :{i}", maximum=total_mem[i], value=default_gpu_mem[i])
- shared.gradio['cpu_memory'] = gr.Slider(label="cpu-memory in MiB", maximum=total_cpu_mem, value=default_cpu_mem)
-
- with gr.Blocks():
- shared.gradio['transformers_info'] = gr.Markdown('load-in-4bit params:')
- shared.gradio['compute_dtype'] = gr.Dropdown(label="compute_dtype", choices=["bfloat16", "float16", "float32"], value=shared.args.compute_dtype)
- shared.gradio['quant_type'] = gr.Dropdown(label="quant_type", choices=["nf4", "fp4"], value=shared.args.quant_type)
-
- shared.gradio['hqq_backend'] = gr.Dropdown(label="hqq_backend", choices=["PYTORCH", "PYTORCH_COMPILE", "ATEN"], value=shared.args.hqq_backend)
- shared.gradio['n_gpu_layers'] = gr.Slider(label="n-gpu-layers", minimum=0, maximum=256, value=shared.args.n_gpu_layers, info='Must be set to more than 0 for your GPU to be used.')
- shared.gradio['n_ctx'] = gr.Number(label="n_ctx", precision=0, step=256, value=shared.args.n_ctx, info='Context length. Try lowering this if you run out of memory while loading the model.')
- shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 60,40')
- shared.gradio['n_batch'] = gr.Slider(label="n_batch", minimum=1, maximum=2048, step=1, value=shared.args.n_batch)
+ shared.gradio['cpu_memory'] = gr.Slider(label="cpu-memory in MiB", maximum=total_cpu_mem, value=default_cpu_mem)
+ shared.gradio['n_gpu_layers'] = gr.Slider(label="n-gpu-layers", minimum=0, maximum=256, value=shared.args.n_gpu_layers, info='Must be greater than 0 for the GPU to be used. ⚠️ Lower this value if you can\'t load the model.')
shared.gradio['threads'] = gr.Slider(label="threads", minimum=0, step=1, maximum=256, value=shared.args.threads)
shared.gradio['threads_batch'] = gr.Slider(label="threads_batch", minimum=0, step=1, maximum=256, value=shared.args.threads_batch)
+ shared.gradio['n_batch'] = gr.Slider(label="n_batch", minimum=1, maximum=2048, step=1, value=shared.args.n_batch)
+ shared.gradio['hqq_backend'] = gr.Dropdown(label="hqq_backend", choices=["PYTORCH", "PYTORCH_COMPILE", "ATEN"], value=shared.args.hqq_backend)
shared.gradio['wbits'] = gr.Dropdown(label="wbits", choices=["None", 1, 2, 3, 4, 8], value=shared.args.wbits if shared.args.wbits > 0 else "None")
shared.gradio['groupsize'] = gr.Dropdown(label="groupsize", choices=["None", 32, 64, 128, 1024], value=shared.args.groupsize if shared.args.groupsize > 0 else "None")
+ shared.gradio['n_ctx'] = gr.Number(label="n_ctx", precision=0, step=256, value=shared.args.n_ctx, info='Context length. ⚠️ Lower this value if you can\'t load the model.')
+ shared.gradio['max_seq_len'] = gr.Number(label='max_seq_len', precision=0, step=256, value=shared.args.max_seq_len, info='Context length. ⚠️ Lower this value if you can\'t load the model.')
+ shared.gradio['cache_type'] = gr.Dropdown(label="cache_type", choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q6', 'q4'], value=shared.args.cache_type, info='Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4.')
+ shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 60,40')
shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7')
- shared.gradio['max_seq_len'] = gr.Number(label='max_seq_len', precision=0, step=256, value=shared.args.max_seq_len, info='Context length. Try lowering this if you run out of memory while loading the model.')
- with gr.Blocks():
- shared.gradio['alpha_value'] = gr.Number(label='alpha_value', value=shared.args.alpha_value, precision=2, info='Positional embeddings alpha factor for NTK RoPE scaling. Recommended values (NTKv1): 1.75 for 1.5x context, 2.5 for 2x context. Use either this or compress_pos_emb, not both.')
- shared.gradio['rope_freq_base'] = gr.Number(label='rope_freq_base', value=shared.args.rope_freq_base, precision=0, info='Positional embeddings frequency base for NTK RoPE scaling. Related to alpha_value by rope_freq_base = 10000 * alpha_value ^ (64 / 63). 0 = from model.')
- shared.gradio['compress_pos_emb'] = gr.Number(label='compress_pos_emb', value=shared.args.compress_pos_emb, precision=2, info='Positional embeddings compression factor. Should be set to (context length) / (model\'s original context length). Equal to 1/rope_freq_scale.')
-
- shared.gradio['autogptq_info'] = gr.Markdown('ExLlamav2_HF is recommended over AutoGPTQ for models derived from Llama.')
+ shared.gradio['alpha_value'] = gr.Number(label='alpha_value', value=shared.args.alpha_value, precision=2, info='Positional embeddings alpha factor for NTK RoPE scaling. Recommended values (NTKv1): 1.75 for 1.5x context, 2.5 for 2x context. Use either this or compress_pos_emb, not both.')
+ shared.gradio['rope_freq_base'] = gr.Number(label='rope_freq_base', value=shared.args.rope_freq_base, precision=0, info='Positional embeddings frequency base for NTK RoPE scaling. Related to alpha_value by rope_freq_base = 10000 * alpha_value ^ (64 / 63). 0 = from model.')
+ shared.gradio['compress_pos_emb'] = gr.Number(label='compress_pos_emb', value=shared.args.compress_pos_emb, precision=2, info='Positional embeddings compression factor. Should be set to (context length) / (model\'s original context length). Equal to 1/rope_freq_scale.')
+ shared.gradio['compute_dtype'] = gr.Dropdown(label="compute_dtype", choices=["bfloat16", "float16", "float32"], value=shared.args.compute_dtype, info='Used by load-in-4bit.')
+ shared.gradio['quant_type'] = gr.Dropdown(label="quant_type", choices=["nf4", "fp4"], value=shared.args.quant_type, info='Used by load-in-4bit.')
+ shared.gradio['attention_sink_size'] = gr.Number(label="attention_sink_size", value=shared.args.attention_sink_size, precision=0, info='StreamingLLM: number of sink tokens. Only used if the trimmed prompt doesn\'t share a prefix with the old prompt.')
+ shared.gradio['num_experts_per_token'] = gr.Number(label="Number of experts per token", value=shared.args.num_experts_per_token, info='Only applies to MoE models like Mixtral.')
with gr.Column():
+ shared.gradio['tensorcores'] = gr.Checkbox(label="tensorcores", value=shared.args.tensorcores, info='NVIDIA only: use llama-cpp-python compiled with tensor cores support. This may increase performance on newer cards.')
shared.gradio['load_in_8bit'] = gr.Checkbox(label="load-in-8bit", value=shared.args.load_in_8bit)
shared.gradio['load_in_4bit'] = gr.Checkbox(label="load-in-4bit", value=shared.args.load_in_4bit)
- shared.gradio['use_double_quant'] = gr.Checkbox(label="use_double_quant", value=shared.args.use_double_quant)
- shared.gradio['use_flash_attention_2'] = gr.Checkbox(label="use_flash_attention_2", value=shared.args.use_flash_attention_2, info='Set use_flash_attention_2=True while loading the model.')
- shared.gradio['use_eager_attention'] = gr.Checkbox(label="use_eager_attention", value=shared.args.use_eager_attention, info='Set attn_implementation= eager while loading the model.')
shared.gradio['flash_attn'] = gr.Checkbox(label="flash_attn", value=shared.args.flash_attn, info='Use flash-attention.')
- shared.gradio['auto_devices'] = gr.Checkbox(label="auto-devices", value=shared.args.auto_devices)
- shared.gradio['tensorcores'] = gr.Checkbox(label="tensorcores", value=shared.args.tensorcores, info='NVIDIA only: use llama-cpp-python compiled with tensor cores support. This may increase performance on newer cards.')
- shared.gradio['cache_type'] = gr.Dropdown(label="cache_type", choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q6', 'q4'], value=shared.args.cache_type, info='Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4.')
+ shared.gradio['use_flash_attention_2'] = gr.Checkbox(label="use_flash_attention_2", value=shared.args.use_flash_attention_2, info='Set use_flash_attention_2=True while loading the model.')
shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming_llm", value=shared.args.streaming_llm, info='(experimental) Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
- shared.gradio['attention_sink_size'] = gr.Number(label="attention_sink_size", value=shared.args.attention_sink_size, precision=0, info='StreamingLLM: number of sink tokens. Only used if the trimmed prompt doesn\'t share a prefix with the old prompt.')
+ shared.gradio['auto_devices'] = gr.Checkbox(label="auto-devices", value=shared.args.auto_devices)
shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='llama.cpp: Use llama-cpp-python compiled without GPU acceleration. Transformers: use PyTorch in CPU mode.')
+ shared.gradio['disk'] = gr.Checkbox(label="disk", value=shared.args.disk)
shared.gradio['row_split'] = gr.Checkbox(label="row_split", value=shared.args.row_split, info='Split the model by rows across GPUs. This may improve multi-gpu performance.')
shared.gradio['no_offload_kqv'] = gr.Checkbox(label="no_offload_kqv", value=shared.args.no_offload_kqv, info='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.')
shared.gradio['no_mul_mat_q'] = gr.Checkbox(label="no_mul_mat_q", value=shared.args.no_mul_mat_q, info='Disable the mulmat kernels.')
+ shared.gradio['no_mmap'] = gr.Checkbox(label="no-mmap", value=shared.args.no_mmap)
+ shared.gradio['mlock'] = gr.Checkbox(label="mlock", value=shared.args.mlock)
+ shared.gradio['numa'] = gr.Checkbox(label="numa", value=shared.args.numa, info='NUMA support can help on some systems with non-uniform memory access.')
shared.gradio['triton'] = gr.Checkbox(label="triton", value=shared.args.triton)
shared.gradio['no_inject_fused_mlp'] = gr.Checkbox(label="no_inject_fused_mlp", value=shared.args.no_inject_fused_mlp, info='Affects Triton only. Disable fused MLP. Fused MLP improves performance but uses more VRAM. Disable if running low on VRAM.')
shared.gradio['no_use_cuda_fp16'] = gr.Checkbox(label="no_use_cuda_fp16", value=shared.args.no_use_cuda_fp16, info='This can make models faster on some systems.')
shared.gradio['desc_act'] = gr.Checkbox(label="desc_act", value=shared.args.desc_act, info='\'desc_act\', \'wbits\', and \'groupsize\' are used for old models without a quantize_config.json.')
- shared.gradio['no_mmap'] = gr.Checkbox(label="no-mmap", value=shared.args.no_mmap)
- shared.gradio['mlock'] = gr.Checkbox(label="mlock", value=shared.args.mlock)
- shared.gradio['numa'] = gr.Checkbox(label="numa", value=shared.args.numa, info='NUMA support can help on some systems with non-uniform memory access.')
- shared.gradio['disk'] = gr.Checkbox(label="disk", value=shared.args.disk)
+ shared.gradio['use_double_quant'] = gr.Checkbox(label="use_double_quant", value=shared.args.use_double_quant, info='Used by load-in-4bit.')
+ shared.gradio['use_eager_attention'] = gr.Checkbox(label="use_eager_attention", value=shared.args.use_eager_attention, info='Set attn_implementation= eager while loading the model.')
shared.gradio['bf16'] = gr.Checkbox(label="bf16", value=shared.args.bf16)
shared.gradio['autosplit'] = gr.Checkbox(label="autosplit", value=shared.args.autosplit, info='Automatically split the model tensors across the available GPUs.')
shared.gradio['enable_tp'] = gr.Checkbox(label="enable_tp", value=shared.args.enable_tp, info='Enable Tensor Parallelism (TP).')
@@ -141,16 +134,14 @@ def create_ui():
shared.gradio['no_sdpa'] = gr.Checkbox(label="no_sdpa", value=shared.args.no_sdpa)
shared.gradio['cfg_cache'] = gr.Checkbox(label="cfg-cache", value=shared.args.cfg_cache, info='Necessary to use CFG with this loader.')
shared.gradio['cpp_runner'] = gr.Checkbox(label="cpp-runner", value=shared.args.cpp_runner, info='Enable inference with ModelRunnerCpp, which is faster than the default ModelRunner.')
- shared.gradio['num_experts_per_token'] = gr.Number(label="Number of experts per token", value=shared.args.num_experts_per_token, info='Only applies to MoE models like Mixtral.')
- with gr.Blocks():
- shared.gradio['trust_remote_code'] = gr.Checkbox(label="trust-remote-code", value=shared.args.trust_remote_code, info='Set trust_remote_code=True while loading the tokenizer/model. To enable this option, start the web UI with the --trust-remote-code flag.', interactive=shared.args.trust_remote_code)
- shared.gradio['no_use_fast'] = gr.Checkbox(label="no_use_fast", value=shared.args.no_use_fast, info='Set use_fast=False while loading the tokenizer.')
- shared.gradio['logits_all'] = gr.Checkbox(label="logits_all", value=shared.args.logits_all, info='Needs to be set for perplexity evaluation to work with this loader. Otherwise, ignore it, as it makes prompt processing slower.')
-
+ shared.gradio['logits_all'] = gr.Checkbox(label="logits_all", value=shared.args.logits_all, info='Needs to be set for perplexity evaluation to work with this loader. Otherwise, ignore it, as it makes prompt processing slower.')
shared.gradio['disable_exllama'] = gr.Checkbox(label="disable_exllama", value=shared.args.disable_exllama, info='Disable ExLlama kernel for GPTQ models.')
shared.gradio['disable_exllamav2'] = gr.Checkbox(label="disable_exllamav2", value=shared.args.disable_exllamav2, info='Disable ExLlamav2 kernel for GPTQ models.')
- shared.gradio['exllamav2_info'] = gr.Markdown("ExLlamav2_HF is recommended over ExLlamav2 for better integration with extensions and more consistent sampling behavior across loaders.")
+ shared.gradio['trust_remote_code'] = gr.Checkbox(label="trust-remote-code", value=shared.args.trust_remote_code, info='Set trust_remote_code=True while loading the tokenizer/model. To enable this option, start the web UI with the --trust-remote-code flag.', interactive=shared.args.trust_remote_code)
+ shared.gradio['no_use_fast'] = gr.Checkbox(label="no_use_fast", value=shared.args.no_use_fast, info='Set use_fast=False while loading the tokenizer.')
shared.gradio['llamacpp_HF_info'] = gr.Markdown("llamacpp_HF loads llama.cpp as a Transformers model. To use it, you need to place your GGUF in a subfolder of models/ with the necessary tokenizer files.\n\nYou can use the \"llamacpp_HF creator\" menu to do that automatically.")
+ shared.gradio['exllamav2_info'] = gr.Markdown("ExLlamav2_HF is recommended over ExLlamav2 for better integration with extensions and more consistent sampling behavior across loaders.")
+ shared.gradio['autogptq_info'] = gr.Markdown('ExLlamav2_HF is recommended over AutoGPTQ for models derived from Llama.')
shared.gradio['tensorrt_llm_info'] = gr.Markdown('* TensorRT-LLM has to be installed manually in a separate Python 3.10 environment at the moment. For a guide, consult the description of [this PR](https://github.com/oobabooga/text-generation-webui/pull/5715). \n\n* `max_seq_len` is only used when `cpp-runner` is checked.\n\n* `cpp_runner` does not support streaming at the moment.')
with gr.Column():
diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py
index a2665e0d..727a1528 100644
--- a/modules/ui_parameters.py
+++ b/modules/ui_parameters.py
@@ -27,80 +27,80 @@ def create_ui(default_preset):
with gr.Column():
with gr.Row():
with gr.Column():
- shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens'])
+ gr.Markdown('## Curve shape')
shared.gradio['temperature'] = gr.Slider(0.01, 5, value=generate_params['temperature'], step=0.01, label='temperature')
- shared.gradio['top_p'] = gr.Slider(0.0, 1.0, value=generate_params['top_p'], step=0.01, label='top_p')
- shared.gradio['top_k'] = gr.Slider(0, 200, value=generate_params['top_k'], step=1, label='top_k')
- shared.gradio['typical_p'] = gr.Slider(0.0, 1.0, value=generate_params['typical_p'], step=0.01, label='typical_p')
- shared.gradio['min_p'] = gr.Slider(0.0, 1.0, value=generate_params['min_p'], step=0.01, label='min_p')
- shared.gradio['repetition_penalty'] = gr.Slider(1.0, 1.5, value=generate_params['repetition_penalty'], step=0.01, label='repetition_penalty')
- shared.gradio['frequency_penalty'] = gr.Slider(0, 2, value=generate_params['frequency_penalty'], step=0.05, label='frequency_penalty')
- shared.gradio['presence_penalty'] = gr.Slider(0, 2, value=generate_params['presence_penalty'], step=0.05, label='presence_penalty')
- shared.gradio['repetition_penalty_range'] = gr.Slider(0, 4096, step=64, value=generate_params['repetition_penalty_range'], label='repetition_penalty_range')
- shared.gradio['do_sample'] = gr.Checkbox(value=generate_params['do_sample'], label='do_sample')
-
- with gr.Blocks():
- shared.gradio['dry_multiplier'] = gr.Slider(0, 5, value=generate_params['dry_multiplier'], step=0.01, label='dry_multiplier', info='Set to greater than 0 to enable DRY. Recommended value: 0.8.')
- shared.gradio['dry_allowed_length'] = gr.Slider(1, 20, value=generate_params['dry_allowed_length'], step=1, label='dry_allowed_length', info='Longest sequence that can be repeated without being penalized.')
- shared.gradio['dry_base'] = gr.Slider(1, 4, value=generate_params['dry_base'], step=0.01, label='dry_base', info='Controls how fast the penalty grows with increasing sequence length.')
- shared.gradio['dry_sequence_breakers'] = gr.Textbox(value=generate_params['dry_sequence_breakers'], label='dry_sequence_breakers', info='Tokens across which sequence matching is not continued. Specified as a comma-separated list of quoted strings.')
-
- with gr.Blocks():
- shared.gradio['xtc_threshold'] = gr.Slider(0, 0.5, value=generate_params['xtc_threshold'], step=0.01, label='xtc_threshold', info='If 2 or more tokens have probability above this threshold, consider removing all but the last one.')
- shared.gradio['xtc_probability'] = gr.Slider(0, 1, value=generate_params['xtc_probability'], step=0.01, label='xtc_probability', info='Probability that the removal will actually happen. 0 disables the sampler. 1 makes it always happen.')
-
- gr.Markdown("[Learn more](https://github.com/oobabooga/text-generation-webui/wiki/03-%E2%80%90-Parameters-Tab)")
-
- with gr.Column():
- with gr.Group():
- shared.gradio['auto_max_new_tokens'] = gr.Checkbox(value=shared.settings['auto_max_new_tokens'], label='auto_max_new_tokens', info='Expand max_new_tokens to the available context length.')
- shared.gradio['ban_eos_token'] = gr.Checkbox(value=shared.settings['ban_eos_token'], label='Ban the eos_token', info='Forces the model to never end the generation prematurely.')
- shared.gradio['add_bos_token'] = gr.Checkbox(value=shared.settings['add_bos_token'], label='Add the bos_token to the beginning of prompts', info='Disabling this can make the replies more creative.')
- shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=2, value=shared.settings["custom_stopping_strings"] or None, label='Custom stopping strings', info='Written between "" and separated by commas.', placeholder='"\\n", "\\nYou:"')
- shared.gradio['custom_token_bans'] = gr.Textbox(value=shared.settings['custom_token_bans'] or None, label='Token bans', info='Token IDs to ban, separated by commas. The IDs can be found in the Default or Notebook tab.')
-
- shared.gradio['penalty_alpha'] = gr.Slider(0, 5, value=generate_params['penalty_alpha'], label='penalty_alpha', info='For Contrastive Search. do_sample must be unchecked.')
- shared.gradio['guidance_scale'] = gr.Slider(-0.5, 2.5, step=0.05, value=generate_params['guidance_scale'], label='guidance_scale', info='For CFG. 1.5 is a good value.')
- shared.gradio['negative_prompt'] = gr.Textbox(value=shared.settings['negative_prompt'], label='Negative prompt', lines=3, elem_classes=['add_scrollbar'])
- shared.gradio['mirostat_mode'] = gr.Slider(0, 2, step=1, value=generate_params['mirostat_mode'], label='mirostat_mode', info='mode=1 is for llama.cpp only.')
- shared.gradio['mirostat_tau'] = gr.Slider(0, 10, step=0.01, value=generate_params['mirostat_tau'], label='mirostat_tau')
- shared.gradio['mirostat_eta'] = gr.Slider(0, 1, step=0.01, value=generate_params['mirostat_eta'], label='mirostat_eta')
- shared.gradio['epsilon_cutoff'] = gr.Slider(0, 9, value=generate_params['epsilon_cutoff'], step=0.01, label='epsilon_cutoff')
- shared.gradio['eta_cutoff'] = gr.Slider(0, 20, value=generate_params['eta_cutoff'], step=0.01, label='eta_cutoff')
- shared.gradio['encoder_repetition_penalty'] = gr.Slider(0.8, 1.5, value=generate_params['encoder_repetition_penalty'], step=0.01, label='encoder_repetition_penalty')
- shared.gradio['no_repeat_ngram_size'] = gr.Slider(0, 20, step=1, value=generate_params['no_repeat_ngram_size'], label='no_repeat_ngram_size')
-
- with gr.Column():
- with gr.Row() as shared.gradio['grammar_file_row']:
- shared.gradio['grammar_file'] = gr.Dropdown(value='None', choices=utils.get_available_grammars(), label='Load grammar from file (.gbnf)', elem_classes='slim-dropdown')
- ui.create_refresh_button(shared.gradio['grammar_file'], lambda: None, lambda: {'choices': utils.get_available_grammars()}, 'refresh-button', interactive=not mu)
- shared.gradio['save_grammar'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
- shared.gradio['delete_grammar'] = gr.Button('🗑️ ', elem_classes='refresh-button', interactive=not mu)
-
- shared.gradio['grammar_string'] = gr.Textbox(value='', label='Grammar', lines=16, elem_classes=['add_scrollbar', 'monospace'])
-
- with gr.Row():
- with gr.Column():
- shared.gradio['tfs'] = gr.Slider(0.0, 1.0, value=generate_params['tfs'], step=0.01, label='tfs')
- shared.gradio['top_a'] = gr.Slider(0.0, 1.0, value=generate_params['top_a'], step=0.01, label='top_a')
- shared.gradio['smoothing_factor'] = gr.Slider(0.0, 10.0, value=generate_params['smoothing_factor'], step=0.01, label='smoothing_factor', info='Activates Quadratic Sampling.')
- shared.gradio['smoothing_curve'] = gr.Slider(1.0, 10.0, value=generate_params['smoothing_curve'], step=0.01, label='smoothing_curve', info='Adjusts the dropoff curve of Quadratic Sampling.')
- shared.gradio['dynamic_temperature'] = gr.Checkbox(value=generate_params['dynamic_temperature'], label='dynamic_temperature')
shared.gradio['dynatemp_low'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_low'], step=0.01, label='dynatemp_low', visible=generate_params['dynamic_temperature'])
shared.gradio['dynatemp_high'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_high'], step=0.01, label='dynatemp_high', visible=generate_params['dynamic_temperature'])
shared.gradio['dynatemp_exponent'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_exponent'], step=0.01, label='dynatemp_exponent', visible=generate_params['dynamic_temperature'])
- shared.gradio['temperature_last'] = gr.Checkbox(value=generate_params['temperature_last'], label='temperature_last', info='Moves temperature/dynamic temperature/quadratic sampling to the end of the sampler stack, ignoring their positions in "Sampler priority".')
- shared.gradio['sampler_priority'] = gr.Textbox(value=generate_params['sampler_priority'], lines=12, label='Sampler priority', info='Parameter names separated by new lines or commas.')
+ shared.gradio['smoothing_factor'] = gr.Slider(0.0, 10.0, value=generate_params['smoothing_factor'], step=0.01, label='smoothing_factor', info='Activates Quadratic Sampling.')
+ shared.gradio['smoothing_curve'] = gr.Slider(1.0, 10.0, value=generate_params['smoothing_curve'], step=0.01, label='smoothing_curve', info='Adjusts the dropoff curve of Quadratic Sampling.')
+
+ gr.Markdown('## Curve cutoff')
+ shared.gradio['min_p'] = gr.Slider(0.0, 1.0, value=generate_params['min_p'], step=0.01, label='min_p')
+ shared.gradio['top_p'] = gr.Slider(0.0, 1.0, value=generate_params['top_p'], step=0.01, label='top_p')
+ shared.gradio['top_k'] = gr.Slider(0, 200, value=generate_params['top_k'], step=1, label='top_k')
+ shared.gradio['typical_p'] = gr.Slider(0.0, 1.0, value=generate_params['typical_p'], step=0.01, label='typical_p')
+ shared.gradio['xtc_threshold'] = gr.Slider(0, 0.5, value=generate_params['xtc_threshold'], step=0.01, label='xtc_threshold', info='If 2 or more tokens have probability above this threshold, consider removing all but the last one.')
+ shared.gradio['xtc_probability'] = gr.Slider(0, 1, value=generate_params['xtc_probability'], step=0.01, label='xtc_probability', info='Probability that the removal will actually happen. 0 disables the sampler. 1 makes it always happen.')
+ shared.gradio['epsilon_cutoff'] = gr.Slider(0, 9, value=generate_params['epsilon_cutoff'], step=0.01, label='epsilon_cutoff')
+ shared.gradio['eta_cutoff'] = gr.Slider(0, 20, value=generate_params['eta_cutoff'], step=0.01, label='eta_cutoff')
+ shared.gradio['tfs'] = gr.Slider(0.0, 1.0, value=generate_params['tfs'], step=0.01, label='tfs')
+ shared.gradio['top_a'] = gr.Slider(0.0, 1.0, value=generate_params['top_a'], step=0.01, label='top_a')
+
+ gr.Markdown('## Repetition suppression')
+ shared.gradio['dry_multiplier'] = gr.Slider(0, 5, value=generate_params['dry_multiplier'], step=0.01, label='dry_multiplier', info='Set to greater than 0 to enable DRY. Recommended value: 0.8.')
+ shared.gradio['dry_allowed_length'] = gr.Slider(1, 20, value=generate_params['dry_allowed_length'], step=1, label='dry_allowed_length', info='Longest sequence that can be repeated without being penalized.')
+ shared.gradio['dry_base'] = gr.Slider(1, 4, value=generate_params['dry_base'], step=0.01, label='dry_base', info='Controls how fast the penalty grows with increasing sequence length.')
+ shared.gradio['repetition_penalty'] = gr.Slider(1.0, 1.5, value=generate_params['repetition_penalty'], step=0.01, label='repetition_penalty')
+ shared.gradio['frequency_penalty'] = gr.Slider(0, 2, value=generate_params['frequency_penalty'], step=0.05, label='frequency_penalty')
+ shared.gradio['presence_penalty'] = gr.Slider(0, 2, value=generate_params['presence_penalty'], step=0.05, label='presence_penalty')
+ shared.gradio['encoder_repetition_penalty'] = gr.Slider(0.8, 1.5, value=generate_params['encoder_repetition_penalty'], step=0.01, label='encoder_repetition_penalty')
+ shared.gradio['no_repeat_ngram_size'] = gr.Slider(0, 20, step=1, value=generate_params['no_repeat_ngram_size'], label='no_repeat_ngram_size')
+ shared.gradio['repetition_penalty_range'] = gr.Slider(0, 4096, step=64, value=generate_params['repetition_penalty_range'], label='repetition_penalty_range')
with gr.Column():
- shared.gradio['truncation_length'] = gr.Number(precision=0, step=256, value=get_truncation_length(), label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length. Most models require this to be at most 2048.')
+ gr.Markdown('## Alternative sampling methods')
+ shared.gradio['penalty_alpha'] = gr.Slider(0, 5, value=generate_params['penalty_alpha'], label='penalty_alpha', info='For Contrastive Search. do_sample must be unchecked.')
+ shared.gradio['guidance_scale'] = gr.Slider(-0.5, 2.5, step=0.05, value=generate_params['guidance_scale'], label='guidance_scale', info='For CFG. 1.5 is a good value.')
+ shared.gradio['mirostat_mode'] = gr.Slider(0, 2, step=1, value=generate_params['mirostat_mode'], label='mirostat_mode', info='mode=1 is for llama.cpp only.')
+ shared.gradio['mirostat_tau'] = gr.Slider(0, 10, step=0.01, value=generate_params['mirostat_tau'], label='mirostat_tau')
+ shared.gradio['mirostat_eta'] = gr.Slider(0, 1, step=0.01, value=generate_params['mirostat_eta'], label='mirostat_eta')
+
+ gr.Markdown('## Other options')
+ shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], value=shared.settings['max_new_tokens'], step=1, label='max_new_tokens', info='⚠️ Setting this too high can cause prompt truncation.')
shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.')
shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.')
shared.gradio['max_updates_second'] = gr.Slider(value=shared.settings['max_updates_second'], minimum=0, maximum=24, step=1, label='Maximum UI updates/second', info='Set this if you experience lag in the UI during streaming.')
- shared.gradio['seed'] = gr.Number(value=shared.settings['seed'], label='Seed (-1 for random)')
+
+ with gr.Column():
+ with gr.Row():
+ with gr.Column():
+ shared.gradio['do_sample'] = gr.Checkbox(value=generate_params['do_sample'], label='do_sample')
+ shared.gradio['dynamic_temperature'] = gr.Checkbox(value=generate_params['dynamic_temperature'], label='dynamic_temperature')
+ shared.gradio['temperature_last'] = gr.Checkbox(value=generate_params['temperature_last'], label='temperature_last', info='Moves temperature/dynamic temperature/quadratic sampling to the end of the sampler stack, ignoring their positions in "Sampler priority".')
+ shared.gradio['auto_max_new_tokens'] = gr.Checkbox(value=shared.settings['auto_max_new_tokens'], label='auto_max_new_tokens', info='Expand max_new_tokens to the available context length.')
+ shared.gradio['ban_eos_token'] = gr.Checkbox(value=shared.settings['ban_eos_token'], label='Ban the eos_token', info='Forces the model to never end the generation prematurely.')
+ shared.gradio['add_bos_token'] = gr.Checkbox(value=shared.settings['add_bos_token'], label='Add the bos_token to the beginning of prompts', info='Disabling this can make the replies more creative.')
shared.gradio['skip_special_tokens'] = gr.Checkbox(value=shared.settings['skip_special_tokens'], label='Skip special tokens', info='Some specific models need this unset.')
shared.gradio['stream'] = gr.Checkbox(value=shared.settings['stream'], label='Activate text streaming')
+ with gr.Column():
+ shared.gradio['truncation_length'] = gr.Number(precision=0, step=256, value=get_truncation_length(), label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length. Most models require this to be at most 2048.')
+ shared.gradio['seed'] = gr.Number(value=shared.settings['seed'], label='Seed (-1 for random)')
+
+ shared.gradio['sampler_priority'] = gr.Textbox(value=generate_params['sampler_priority'], lines=12, label='Sampler priority', info='Parameter names separated by new lines or commas.', elem_classes=['add_scrollbar'])
+ shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=2, value=shared.settings["custom_stopping_strings"] or None, label='Custom stopping strings', info='Written between "" and separated by commas.', placeholder='"\\n", "\\nYou:"')
+ shared.gradio['custom_token_bans'] = gr.Textbox(value=shared.settings['custom_token_bans'] or None, label='Token bans', info='Token IDs to ban, separated by commas. The IDs can be found in the Default or Notebook tab.')
+ shared.gradio['negative_prompt'] = gr.Textbox(value=shared.settings['negative_prompt'], label='Negative prompt', info='For CFG. Only used when guidance_scale is different than 1.', lines=3, elem_classes=['add_scrollbar'])
+ shared.gradio['dry_sequence_breakers'] = gr.Textbox(value=generate_params['dry_sequence_breakers'], label='dry_sequence_breakers', info='Tokens across which sequence matching is not continued. Specified as a comma-separated list of quoted strings.')
+ with gr.Row() as shared.gradio['grammar_file_row']:
+ shared.gradio['grammar_file'] = gr.Dropdown(value='None', choices=utils.get_available_grammars(), label='Load grammar from file (.gbnf)', elem_classes='slim-dropdown')
+ ui.create_refresh_button(shared.gradio['grammar_file'], lambda: None, lambda: {'choices': utils.get_available_grammars()}, 'refresh-button', interactive=not mu)
+ shared.gradio['save_grammar'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
+ shared.gradio['delete_grammar'] = gr.Button('🗑️ ', elem_classes='refresh-button', interactive=not mu)
+
+ shared.gradio['grammar_string'] = gr.Textbox(value='', label='Grammar', lines=16, elem_classes=['add_scrollbar', 'monospace'])
+
ui_chat.create_chat_settings_ui()
From 4ce9d13dbec04230b4a56b905e8e96ae9f4f4d8e Mon Sep 17 00:00:00 2001
From: oobabooga
Date: Sun, 29 Dec 2024 12:25:26 -0300
Subject: [PATCH 10/15] Preset cleanup (#6619)
---
presets/Big O.yaml | 6 ------
presets/Creative.yaml | 2 ++
presets/{Debug-deterministic.yaml => Deterministic.yaml} | 0
presets/Divine Intellect.yaml | 4 ----
presets/Instruct.yaml | 1 +
presets/LLaMA-Precise.yaml | 4 ----
presets/Midnight Enigma.yaml | 4 ----
presets/Shortwave.yaml | 4 ----
presets/Yara.yaml | 4 ----
presets/simple-1.yaml | 4 ----
10 files changed, 3 insertions(+), 30 deletions(-)
delete mode 100644 presets/Big O.yaml
create mode 100644 presets/Creative.yaml
rename presets/{Debug-deterministic.yaml => Deterministic.yaml} (100%)
delete mode 100644 presets/Divine Intellect.yaml
create mode 100644 presets/Instruct.yaml
delete mode 100644 presets/LLaMA-Precise.yaml
delete mode 100644 presets/Midnight Enigma.yaml
delete mode 100644 presets/Shortwave.yaml
delete mode 100644 presets/Yara.yaml
delete mode 100644 presets/simple-1.yaml
diff --git a/presets/Big O.yaml b/presets/Big O.yaml
deleted file mode 100644
index 2ab18268..00000000
--- a/presets/Big O.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-temperature: 0.87
-top_p: 0.99
-typical_p: 0.68
-tfs: 0.68
-repetition_penalty: 1.01
-top_k: 85
diff --git a/presets/Creative.yaml b/presets/Creative.yaml
new file mode 100644
index 00000000..3ed04190
--- /dev/null
+++ b/presets/Creative.yaml
@@ -0,0 +1,2 @@
+min_p: 0.02
+xtc_probability: 0.5
diff --git a/presets/Debug-deterministic.yaml b/presets/Deterministic.yaml
similarity index 100%
rename from presets/Debug-deterministic.yaml
rename to presets/Deterministic.yaml
diff --git a/presets/Divine Intellect.yaml b/presets/Divine Intellect.yaml
deleted file mode 100644
index ac750e40..00000000
--- a/presets/Divine Intellect.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-temperature: 1.31
-top_p: 0.14
-repetition_penalty: 1.17
-top_k: 49
diff --git a/presets/Instruct.yaml b/presets/Instruct.yaml
new file mode 100644
index 00000000..142fcd82
--- /dev/null
+++ b/presets/Instruct.yaml
@@ -0,0 +1 @@
+min_p: 0.2
diff --git a/presets/LLaMA-Precise.yaml b/presets/LLaMA-Precise.yaml
deleted file mode 100644
index c5f9cae2..00000000
--- a/presets/LLaMA-Precise.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-temperature: 0.7
-top_p: 0.1
-repetition_penalty: 1.18
-top_k: 40
diff --git a/presets/Midnight Enigma.yaml b/presets/Midnight Enigma.yaml
deleted file mode 100644
index 0bd1763c..00000000
--- a/presets/Midnight Enigma.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-temperature: 0.98
-top_p: 0.37
-repetition_penalty: 1.18
-top_k: 100
diff --git a/presets/Shortwave.yaml b/presets/Shortwave.yaml
deleted file mode 100644
index a2528abd..00000000
--- a/presets/Shortwave.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-temperature: 1.53
-top_p: 0.64
-repetition_penalty: 1.07
-top_k: 33
diff --git a/presets/Yara.yaml b/presets/Yara.yaml
deleted file mode 100644
index 87bb019e..00000000
--- a/presets/Yara.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-temperature: 0.82
-top_p: 0.21
-repetition_penalty: 1.19
-top_k: 72
diff --git a/presets/simple-1.yaml b/presets/simple-1.yaml
deleted file mode 100644
index 30a10659..00000000
--- a/presets/simple-1.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-temperature: 0.7
-top_p: 0.9
-repetition_penalty: 1.15
-top_k: 20
From 292cd489e9916104b3f965c8da3ccb9f36253b90 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 30 Dec 2024 04:31:10 -0800
Subject: [PATCH 11/15] Bump ExLlamaV2 to 0.2.7
---
requirements.txt | 10 +++++-----
requirements_amd.txt | 6 +++---
requirements_amd_noavx2.txt | 6 +++---
requirements_apple_intel.txt | 2 +-
requirements_apple_silicon.txt | 2 +-
requirements_noavx2.txt | 10 +++++-----
6 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/requirements.txt b/requirements.txt
index 24c92391..a04cb04f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -50,11 +50,11 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/te
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.5+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# CUDA wheels
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+cu121.torch2.4.1-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+cu121.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
https://github.com/oobabooga/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu12torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
diff --git a/requirements_amd.txt b/requirements_amd.txt
index b7093d50..e5bec4bf 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -39,6 +39,6 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp
# AMD wheels
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.5+rocm6.1.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.5+rocm6.1.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index 88682aea..d408b4cf 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -37,6 +37,6 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# AMD wheels
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt
index 6588278d..da0423a2 100644
--- a/requirements_apple_intel.txt
+++ b/requirements_apple_intel.txt
@@ -33,4 +33,4 @@ tiktoken
# Mac wheels
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.5-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.5-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6-py3-none-any.whl
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl
diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt
index 1fc9795b..dd1933bd 100644
--- a/requirements_apple_silicon.txt
+++ b/requirements_apple_silicon.txt
@@ -35,4 +35,4 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/me
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.5-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.5-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.5-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6-py3-none-any.whl
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index 9ad138d8..db21c0fc 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -50,11 +50,11 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/te
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.5+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# CUDA wheels
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+cu121.torch2.4.1-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+cu121.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
https://github.com/oobabooga/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu12torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
From cca4ac56fa2e093af99552a4a9ce6aece21bde41 Mon Sep 17 00:00:00 2001
From: mamei16
Date: Mon, 30 Dec 2024 13:34:19 +0100
Subject: [PATCH 12/15] Fix interface loading with dark theme even when
'dark_theme' is set to false (#6614)
---
server.py | 3 +++
1 file changed, 3 insertions(+)
diff --git a/server.py b/server.py
index d6069d5e..31e1c4c6 100644
--- a/server.py
+++ b/server.py
@@ -154,6 +154,9 @@ def create_interface():
if ({str(shared.settings['dark_theme']).lower()}) {{
document.getElementsByTagName('body')[0].classList.add('dark');
}}
+ else {{
+ document.getElementsByTagName('body')[0].classList.remove('dark');
+ }}
{js}
{ui.show_controls_js}
toggle_controls(x);
From d24b83132b207b5b95f7ba3754e6b3f9b99e1f84 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 30 Dec 2024 09:35:20 -0300
Subject: [PATCH 13/15] Bump jinja2 from 3.1.4 to 3.1.5 (#6601)
---
requirements.txt | 2 +-
requirements_amd.txt | 2 +-
requirements_amd_noavx2.txt | 2 +-
requirements_apple_intel.txt | 2 +-
requirements_apple_silicon.txt | 2 +-
requirements_cpu_only.txt | 2 +-
requirements_cpu_only_noavx2.txt | 2 +-
requirements_noavx2.txt | 2 +-
requirements_nowheels.txt | 2 +-
9 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/requirements.txt b/requirements.txt
index a04cb04f..8b36f54f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,7 +5,7 @@ datasets
einops
fastapi==0.112.4
gradio==4.37.*
-jinja2==3.1.4
+jinja2==3.1.5
markdown
numba==0.59.*
numpy==1.26.*
diff --git a/requirements_amd.txt b/requirements_amd.txt
index e5bec4bf..1b85b20f 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -4,7 +4,7 @@ datasets
einops
fastapi==0.112.4
gradio==4.37.*
-jinja2==3.1.4
+jinja2==3.1.5
markdown
numba==0.59.*
numpy==1.26.*
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index d408b4cf..1bafc141 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -4,7 +4,7 @@ datasets
einops
fastapi==0.112.4
gradio==4.37.*
-jinja2==3.1.4
+jinja2==3.1.5
markdown
numba==0.59.*
numpy==1.26.*
diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt
index da0423a2..345368b7 100644
--- a/requirements_apple_intel.txt
+++ b/requirements_apple_intel.txt
@@ -4,7 +4,7 @@ datasets
einops
fastapi==0.112.4
gradio==4.37.*
-jinja2==3.1.4
+jinja2==3.1.5
markdown
numba==0.59.*
numpy==1.26.*
diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt
index dd1933bd..a3c3055b 100644
--- a/requirements_apple_silicon.txt
+++ b/requirements_apple_silicon.txt
@@ -4,7 +4,7 @@ datasets
einops
fastapi==0.112.4
gradio==4.37.*
-jinja2==3.1.4
+jinja2==3.1.5
markdown
numba==0.59.*
numpy==1.26.*
diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt
index 53fedd7e..a4b7882c 100644
--- a/requirements_cpu_only.txt
+++ b/requirements_cpu_only.txt
@@ -4,7 +4,7 @@ datasets
einops
fastapi==0.112.4
gradio==4.37.*
-jinja2==3.1.4
+jinja2==3.1.5
markdown
numba==0.59.*
numpy==1.26.*
diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt
index 9f52b172..878aea06 100644
--- a/requirements_cpu_only_noavx2.txt
+++ b/requirements_cpu_only_noavx2.txt
@@ -4,7 +4,7 @@ datasets
einops
fastapi==0.112.4
gradio==4.37.*
-jinja2==3.1.4
+jinja2==3.1.5
markdown
numba==0.59.*
numpy==1.26.*
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index db21c0fc..1a345611 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -5,7 +5,7 @@ datasets
einops
fastapi==0.112.4
gradio==4.37.*
-jinja2==3.1.4
+jinja2==3.1.5
markdown
numba==0.59.*
numpy==1.26.*
diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt
index e2daebd9..95fe3add 100644
--- a/requirements_nowheels.txt
+++ b/requirements_nowheels.txt
@@ -4,7 +4,7 @@ datasets
einops
fastapi==0.112.4
gradio==4.37.*
-jinja2==3.1.4
+jinja2==3.1.5
markdown
numba==0.59.*
numpy==1.26.*
From e953af85cd45b3c24d4fe953a9ed68181d175ed8 Mon Sep 17 00:00:00 2001
From: mamei16
Date: Tue, 31 Dec 2024 05:04:02 +0100
Subject: [PATCH 14/15] Fix newlines in the markdown renderer (#6599)
---------
Co-authored-by: oobabooga
---
modules/html_generator.py | 27 +++++++++++----------------
1 file changed, 11 insertions(+), 16 deletions(-)
diff --git a/modules/html_generator.py b/modules/html_generator.py
index f07b6e75..08147bb7 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -150,7 +150,6 @@ def convert_to_markdown(string):
result = ''
is_code = False
is_latex = False
- previous_line_empty = True
for line in string.split('\n'):
stripped_line = line.strip()
@@ -168,20 +167,16 @@ def convert_to_markdown(string):
elif stripped_line.endswith('\\\\]'):
is_latex = False
- # Preserve indentation for lists and code blocks
- if stripped_line.startswith('-') or stripped_line.startswith('*') or stripped_line.startswith('+') or stripped_line.startswith('>') or re.match(r'\d+\.', stripped_line):
- result += line + '\n'
- previous_line_empty = False
- elif is_code or is_latex or line.startswith('|'):
- result += line + '\n'
- previous_line_empty = False
- else:
- if previous_line_empty:
- result += line.strip() + '\n'
- else:
- result += line.strip() + '\n\n'
+ result += line
- previous_line_empty = stripped_line == ''
+ # Don't add an extra \n for code, LaTeX, or tables
+ if is_code or is_latex or line.startswith('|'):
+ result += '\n'
+ # Also don't add an extra \n for lists
+ elif stripped_line.startswith('-') or stripped_line.startswith('*') or stripped_line.startswith('+') or stripped_line.startswith('>') or re.match(r'\d+\.', stripped_line):
+ result += '\n'
+ else:
+ result += '\n\n'
result = result.strip()
if is_code:
@@ -200,7 +195,7 @@ def convert_to_markdown(string):
result = re.sub(list_item_pattern, r'\g<1> ' + delete_str, result)
# Convert to HTML using markdown
- html_output = markdown.markdown(result, extensions=['fenced_code', 'tables'], tab_length=2)
+ html_output = markdown.markdown(result, extensions=['fenced_code', 'tables'])
# Remove the delete string from the HTML output
pos = html_output.rfind(delete_str)
@@ -208,7 +203,7 @@ def convert_to_markdown(string):
html_output = html_output[:pos] + html_output[pos + len(delete_str):]
else:
# Convert to HTML using markdown
- html_output = markdown.markdown(result, extensions=['fenced_code', 'tables'], tab_length=2)
+ html_output = markdown.markdown(result, extensions=['fenced_code', 'tables'])
# Unescape code blocks
pattern = re.compile(r']*>(.*?)
', re.DOTALL)
From 64853f85095f8671a28298b9563896e619d531da Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 31 Dec 2024 14:43:22 -0800
Subject: [PATCH 15/15] Reapply a necessary change that I removed from #6599
(thanks @mamei16!)
---
modules/html_generator.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/modules/html_generator.py b/modules/html_generator.py
index 08147bb7..40a56731 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -176,7 +176,7 @@ def convert_to_markdown(string):
elif stripped_line.startswith('-') or stripped_line.startswith('*') or stripped_line.startswith('+') or stripped_line.startswith('>') or re.match(r'\d+\.', stripped_line):
result += '\n'
else:
- result += '\n\n'
+ result += ' \n'
result = result.strip()
if is_code: