From 2f71515cb0624f9686b488935f482d423a60b274 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 27 Jun 2024 17:07:04 -0700 Subject: [PATCH 01/91] Make dependabot target the dev branch --- .github/dependabot.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 91abb11f..93aaf445 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -7,5 +7,6 @@ version: 2 updates: - package-ecosystem: "pip" # See documentation for possible values directory: "/" # Location of package manifests + target-branch: "dev" schedule: interval: "weekly" From aa7c14a4633cf5e3b612deb4a718161bd1c23cc2 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 19 Jul 2024 21:42:40 -0700 Subject: [PATCH 02/91] Use chat-instruct mode by default --- modules/shared.py | 2 +- settings-template.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/shared.py b/modules/shared.py index d96e3156..09cbedcf 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -33,7 +33,7 @@ settings = { 'dark_theme': True, 'show_controls': True, 'start_with': '', - 'mode': 'chat', + 'mode': 'chat-instruct', 'chat_style': 'cai-chat', 'prompt-default': 'QA', 'prompt-notebook': 'QA', diff --git a/settings-template.yaml b/settings-template.yaml index f09c845e..8d6e8dd8 100644 --- a/settings-template.yaml +++ b/settings-template.yaml @@ -1,7 +1,7 @@ dark_theme: true show_controls: true start_with: '' -mode: chat +mode: chat-instruct chat_style: cai-chat prompt-default: QA prompt-notebook: QA From 1c3671699c83424fb48adb7929d007f6e9056eaa Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 20 Jul 2024 18:20:26 -0300 Subject: [PATCH 03/91] Bump hqq from 0.1.7.post3 to 0.1.8 (#6238) --- requirements.txt | 2 +- requirements_amd.txt | 2 +- requirements_amd_noavx2.txt | 2 +- requirements_apple_intel.txt | 2 +- requirements_apple_silicon.txt | 2 +- requirements_cpu_only.txt | 2 +- requirements_cpu_only_noavx2.txt | 2 +- requirements_noavx2.txt | 2 +- requirements_nowheels.txt | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/requirements.txt b/requirements.txt index ff2f3161..2cd4328b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ colorama datasets einops gradio==4.26.* -hqq==0.1.7.post3 +hqq==0.1.8 jinja2==3.1.4 lm_eval==0.3.0 markdown diff --git a/requirements_amd.txt b/requirements_amd.txt index 43412538..0d023242 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -3,7 +3,7 @@ colorama datasets einops gradio==4.26.* -hqq==0.1.7.post3 +hqq==0.1.8 jinja2==3.1.4 lm_eval==0.3.0 markdown diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 34cb4599..85e40814 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -3,7 +3,7 @@ colorama datasets einops gradio==4.26.* -hqq==0.1.7.post3 +hqq==0.1.8 jinja2==3.1.4 lm_eval==0.3.0 markdown diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 1b170c27..a2381124 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -3,7 +3,7 @@ colorama datasets einops gradio==4.26.* -hqq==0.1.7.post3 +hqq==0.1.8 jinja2==3.1.4 lm_eval==0.3.0 markdown diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index d557eae1..ef1de35a 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -3,7 +3,7 @@ colorama datasets einops gradio==4.26.* -hqq==0.1.7.post3 +hqq==0.1.8 jinja2==3.1.4 lm_eval==0.3.0 markdown diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index 5aed6167..ada3242c 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -3,7 +3,7 @@ colorama datasets einops gradio==4.26.* -hqq==0.1.7.post3 +hqq==0.1.8 jinja2==3.1.4 lm_eval==0.3.0 markdown diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index 3937b002..8302ce06 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -3,7 +3,7 @@ colorama datasets einops gradio==4.26.* -hqq==0.1.7.post3 +hqq==0.1.8 jinja2==3.1.4 lm_eval==0.3.0 markdown diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 15b45a05..fb16ce81 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -6,7 +6,7 @@ colorama datasets einops gradio==4.26.* -hqq==0.1.7.post3 +hqq==0.1.8 jinja2==3.1.4 lm_eval==0.3.0 markdown diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index 14e3aa88..7aa3971a 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -3,7 +3,7 @@ colorama datasets einops gradio==4.26.* -hqq==0.1.7.post3 +hqq==0.1.8 jinja2==3.1.4 lm_eval==0.3.0 markdown From a9a6d72d8c0bbd0743431b18933318e37fd45a85 Mon Sep 17 00:00:00 2001 From: Vhallo Date: Sat, 20 Jul 2024 23:57:09 +0200 Subject: [PATCH 04/91] Use gr.Number for RoPE scaling parameters (#6233) --------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> --- modules/ui_model_menu.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 7a85020f..93acad4e 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -104,9 +104,9 @@ def create_ui(): shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7') shared.gradio['max_seq_len'] = gr.Slider(label='max_seq_len', minimum=0, maximum=shared.settings['truncation_length_max'], step=256, info='Context length. Try lowering this if you run out of memory while loading the model.', value=shared.args.max_seq_len) with gr.Blocks(): - shared.gradio['alpha_value'] = gr.Slider(label='alpha_value', minimum=1, maximum=8, step=0.05, info='Positional embeddings alpha factor for NTK RoPE scaling. Recommended values (NTKv1): 1.75 for 1.5x context, 2.5 for 2x context. Use either this or compress_pos_emb, not both.', value=shared.args.alpha_value) - shared.gradio['rope_freq_base'] = gr.Slider(label='rope_freq_base', minimum=0, maximum=20000000, step=1000, info='If greater than 0, will be used instead of alpha_value. Those two are related by rope_freq_base = 10000 * alpha_value ^ (64 / 63)', value=shared.args.rope_freq_base) - shared.gradio['compress_pos_emb'] = gr.Slider(label='compress_pos_emb', minimum=1, maximum=8, step=0.1, info='Positional embeddings compression factor. Should be set to (context length) / (model\'s original context length). Equal to 1/rope_freq_scale.', value=shared.args.compress_pos_emb) + shared.gradio['alpha_value'] = gr.Number(label='alpha_value', value=shared.args.alpha_value, precision=0, info='Positional embeddings alpha factor for NTK RoPE scaling. Recommended values (NTKv1): 1.75 for 1.5x context, 2.5 for 2x context. Use either this or compress_pos_emb, not both.') + shared.gradio['rope_freq_base'] = gr.Number(label='rope_freq_base', value=shared.args.rope_freq_base, precision=0, info='Positional embeddings frequency base for NTK RoPE scaling. Related to alpha_value by rope_freq_base = 10000 * alpha_value ^ (64 / 63). 0 = from model.') + shared.gradio['compress_pos_emb'] = gr.Number(label='compress_pos_emb', value=shared.args.compress_pos_emb, precision=0, info='Positional embeddings compression factor. Should be set to (context length) / (model\'s original context length). Equal to 1/rope_freq_scale.') shared.gradio['autogptq_info'] = gr.Markdown('ExLlamav2_HF is recommended over AutoGPTQ for models derived from Llama.') From 6ab477f3752be8af35a4498da20cb3775208ee9b Mon Sep 17 00:00:00 2001 From: "FartyPants (FP HAM)" Date: Sat, 20 Jul 2024 18:05:09 -0400 Subject: [PATCH 05/91] training: Added ChatML-format.json format example (#5899) --- training/formats/ChatML-format.json | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 training/formats/ChatML-format.json diff --git a/training/formats/ChatML-format.json b/training/formats/ChatML-format.json new file mode 100644 index 00000000..a9f8a09a --- /dev/null +++ b/training/formats/ChatML-format.json @@ -0,0 +1,4 @@ +{ + "instruction,output": "<|im_start|>system\n<|im_end|>\n<|im_start|>user\n%instruction%<|im_end|>\n<|im_start|>assistant\n%output%<|im_end|>", + "instruction,input,output": "<|im_start|>system\n<|im_end|>\n<|im_start|>user\n%instruction%: %input%<|im_end|>\n<|im_start|>assistant\n%output%<|im_end|>" +} From a14c510afb36173fef89bdc0394689a6a2923979 Mon Sep 17 00:00:00 2001 From: Alberto Cano <34340962+canoalberto@users.noreply.github.com> Date: Sat, 20 Jul 2024 18:10:39 -0400 Subject: [PATCH 06/91] Customize the subpath for gradio, use with reverse proxy (#5106) --- modules/shared.py | 1 + server.py | 1 + 2 files changed, 2 insertions(+) diff --git a/modules/shared.py b/modules/shared.py index 09cbedcf..20eaff58 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -195,6 +195,7 @@ group.add_argument('--gradio-auth', type=str, help='Set Gradio authentication pa group.add_argument('--gradio-auth-path', type=str, help='Set the Gradio authentication file path. The file should contain one or more user:password pairs in the same format as above.', default=None) group.add_argument('--ssl-keyfile', type=str, help='The path to the SSL certificate key file.', default=None) group.add_argument('--ssl-certfile', type=str, help='The path to the SSL certificate cert file.', default=None) +group.add_argument('--subpath', type=str, help='Customize the subpath for gradio, use with reverse proxy') # API group = parser.add_argument_group('API') diff --git a/server.py b/server.py index 7afa954e..f057d233 100644 --- a/server.py +++ b/server.py @@ -169,6 +169,7 @@ def create_interface(): ssl_verify=False if (shared.args.ssl_keyfile or shared.args.ssl_certfile) else True, ssl_keyfile=shared.args.ssl_keyfile, ssl_certfile=shared.args.ssl_certfile, + root_path=shared.args.subpath, allowed_paths=["cache", "css", "extensions", "js"] ) From 79c4d3da3de731628eab79ec5d31b4a4d1a01b67 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 21 Jul 2024 00:01:42 -0300 Subject: [PATCH 07/91] Optimize the UI (#6251) --- modules/chat.py | 205 +++++++++++++++++++++++++++++++++++++- modules/ui.py | 1 + modules/ui_chat.py | 135 +++++++------------------ modules/ui_default.py | 33 +++--- modules/ui_file_saving.py | 123 ++++++++++++++--------- modules/ui_model_menu.py | 43 ++++---- modules/ui_notebook.py | 17 +--- modules/ui_parameters.py | 14 ++- modules/ui_session.py | 19 ++-- modules/utils.py | 9 +- server.py | 5 +- 11 files changed, 394 insertions(+), 210 deletions(-) diff --git a/modules/chat.py b/modules/chat.py index 6640776f..a1196daf 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -22,7 +22,8 @@ from modules.logging_colors import logger from modules.text_generation import ( generate_reply, get_encoded_length, - get_max_prompt_length + get_max_prompt_length, + stop_everything_event ) from modules.utils import delete_file, get_available_characters, save_file @@ -421,9 +422,12 @@ def generate_chat_reply_wrapper(text, state, regenerate=False, _continue=False): send_dummy_message(text, state) send_dummy_reply(state['start_with'], state) + history = state['history'] for i, history in enumerate(generate_chat_reply(text, state, regenerate, _continue, loading_message=True, for_ui=True)): yield chat_html_wrapper(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']), history + save_history(history, state['unique_id'], state['character_menu'], state['mode']) + def remove_last_message(history): if len(history['visible']) > 0 and history['internal'][-1][0] != '<|BEGIN-VISIBLE-CHAT|>': @@ -995,3 +999,202 @@ def my_yaml_output(data): result += " " + line.rstrip(' ') + "\n" return result + + +def handle_replace_last_reply_click(text, state): + history = replace_last_reply(text, state) + save_history(history, state['unique_id'], state['character_menu'], state['mode']) + html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) + + return [history, html, ""] + + +def handle_send_dummy_message_click(text, state): + history = send_dummy_message(text, state) + save_history(history, state['unique_id'], state['character_menu'], state['mode']) + html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) + + return [history, html, ""] + + +def handle_send_dummy_reply_click(text, state): + history = send_dummy_reply(text, state) + save_history(history, state['unique_id'], state['character_menu'], state['mode']) + html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) + + return [history, html, ""] + + +def handle_remove_last_click(state): + last_input, history = remove_last_message(state['history']) + save_history(history, state['unique_id'], state['character_menu'], state['mode']) + html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) + + return [history, html, last_input] + + +def handle_stop_click(state): + stop_everything_event() + html = redraw_html(state['history'], state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) + + return html + + +def handle_unique_id_select(state): + history = load_history(state['unique_id'], state['character_menu'], state['mode']) + html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) + + return [history, html] + + +def handle_start_new_chat_click(state): + history = start_new_chat(state) + histories = find_all_histories_with_first_prompts(state) + html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) + + return [history, html, gr.update(choices=histories, value=histories[0][1])] + + +def handle_delete_chat_confirm_click(state): + index = str(find_all_histories(state).index(state['unique_id'])) + delete_history(state['unique_id'], state['character_menu'], state['mode']) + history, unique_id = load_history_after_deletion(state, index) + html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) + + return [ + history, + html, + unique_id, + gr.update(visible=False), + gr.update(visible=True), + gr.update(visible=False) + ] + + +def handle_rename_chat_click(): + return [ + gr.update(visible=True, value="My New Chat"), + gr.update(visible=True), + gr.update(visible=True) + ] + + +def handle_rename_chat_confirm(rename_to, state): + rename_history(state['unique_id'], rename_to, state['character_menu'], state['mode']) + histories = find_all_histories_with_first_prompts(state) + + return [ + gr.update(choices=histories, value=rename_to), + gr.update(visible=False), + gr.update(visible=False), + gr.update(visible=False) + ] + + +def handle_upload_chat_history(load_chat_history, state): + history = start_new_chat(state) + history = load_history_json(load_chat_history, history) + histories = find_all_histories_with_first_prompts(state) + save_history(history, state['unique_id'], state['character_menu'], state['mode']) + + html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) + + return [ + history, + html, + gr.update(choices=histories, value=histories[0][1]) + ] + + +def handle_character_menu_change(state): + name1, name2, picture, greeting, context = load_character(state['character_menu'], state['name1'], state['name2']) + + state['name1'] = name1 + state['name2'] = name2 + state['character_picture'] = picture + state['greeting'] = greeting + state['context'] = context + + history = load_latest_history(state) + histories = find_all_histories_with_first_prompts(state) + html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) + + return [ + history, + html, + name1, + name2, + picture, + greeting, + context, + gr.update(choices=histories, value=histories[0][1]), + ] + + +def handle_mode_change(state): + history = load_latest_history(state) + histories = find_all_histories_with_first_prompts(state) + html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) + + return [ + history, + html, + gr.update(visible=state['mode'] != 'instruct'), + gr.update(visible=state['mode'] == 'chat-instruct'), + gr.update(choices=histories, value=histories[0][1]) + ] + + +def handle_save_character_click(name2): + return [ + name2, + gr.update(visible=True) + ] + + +def handle_load_template_click(instruction_template): + output = load_instruction_template(instruction_template) + return [ + output, + "Select template to load..." + ] + + +def handle_save_template_click(instruction_template_str): + contents = generate_instruction_template_yaml(instruction_template_str) + return [ + "My Template.yaml", + "instruction-templates/", + contents, + gr.update(visible=True) + ] + + +def handle_delete_template_click(template): + return [ + f"{template}.yaml", + "instruction-templates/", + gr.update(visible=True) + ] + + +def handle_your_picture_change(picture, state): + upload_your_profile_picture(picture) + html = redraw_html(state['history'], state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) + + return html + + +def handle_send_instruction_click(state): + state['mode'] = 'instruct' + state['history'] = {'internal': [], 'visible': []} + + output = generate_chat_prompt("Input", state) + + return output + + +def handle_send_chat_click(state): + output = generate_chat_prompt("", state, _continue=True) + + return output diff --git a/modules/ui.py b/modules/ui.py index d77266ce..48194540 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -184,6 +184,7 @@ def list_interface_input_elements(): 'start_with', 'character_menu', 'history', + 'unique_id', 'name1', 'user_bio', 'name2', diff --git a/modules/ui_chat.py b/modules/ui_chat.py index c6f6ddb0..7085f5cd 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -7,7 +7,6 @@ from PIL import Image from modules import chat, shared, ui, utils from modules.html_generator import chat_html_wrapper -from modules.text_generation import stop_everything_event from modules.utils import gradio inputs = ('Chat input', 'interface_state') @@ -137,7 +136,7 @@ def create_chat_settings_ui(): shared.gradio['tavern_json'] = gr.State() with gr.Column(): shared.gradio['tavern_name'] = gr.Textbox(value='', lines=1, label='Name', interactive=False) - shared.gradio['tavern_desc'] = gr.Textbox(value='', lines=4, max_lines=4, label='Description', interactive=False) + shared.gradio['tavern_desc'] = gr.Textbox(value='', lines=10, label='Description', interactive=False, elem_classes=['add_scrollbar']) shared.gradio['Submit tavern character'] = gr.Button(value='Submit', interactive=False) @@ -181,169 +180,112 @@ def create_event_handlers(): ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then( chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then( None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['textbox'].submit( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then( chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then( None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Regenerate'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( partial(chat.generate_chat_reply_wrapper, regenerate=True), gradio(inputs), gradio('display', 'history'), show_progress=False).then( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then( None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Continue'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( partial(chat.generate_chat_reply_wrapper, _continue=True), gradio(inputs), gradio('display', 'history'), show_progress=False).then( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then( None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Impersonate'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda x: x, gradio('textbox'), gradio('Chat input'), show_progress=False).then( chat.impersonate_wrapper, gradio(inputs), gradio('textbox', 'display'), show_progress=False).then( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Replace last reply'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - chat.replace_last_reply, gradio('textbox', 'interface_state'), gradio('history')).then( - lambda: '', None, gradio('textbox'), show_progress=False).then( - chat.redraw_html, gradio(reload_arr), gradio('display')).then( - chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None) + chat.handle_replace_last_reply_click, gradio('textbox', 'interface_state'), gradio('history', 'display', 'textbox'), show_progress=False) shared.gradio['Send dummy message'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - chat.send_dummy_message, gradio('textbox', 'interface_state'), gradio('history')).then( - lambda: '', None, gradio('textbox'), show_progress=False).then( - chat.redraw_html, gradio(reload_arr), gradio('display')).then( - chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None) + chat.handle_send_dummy_message_click, gradio('textbox', 'interface_state'), gradio('history', 'display', 'textbox'), show_progress=False) shared.gradio['Send dummy reply'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - chat.send_dummy_reply, gradio('textbox', 'interface_state'), gradio('history')).then( - lambda: '', None, gradio('textbox'), show_progress=False).then( - chat.redraw_html, gradio(reload_arr), gradio('display')).then( - chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None) + chat.handle_send_dummy_reply_click, gradio('textbox', 'interface_state'), gradio('history', 'display', 'textbox'), show_progress=False) shared.gradio['Remove last'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - chat.remove_last_message, gradio('history'), gradio('textbox', 'history'), show_progress=False).then( - chat.redraw_html, gradio(reload_arr), gradio('display')).then( - chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None) + chat.handle_remove_last_click, gradio('interface_state'), gradio('history', 'display', 'textbox'), show_progress=False) shared.gradio['Stop'].click( - stop_everything_event, None, None, queue=False).then( - chat.redraw_html, gradio(reload_arr), gradio('display')) + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + chat.handle_stop_click, gradio('interface_state'), gradio('display'), show_progress=False) if not shared.args.multi_user: shared.gradio['unique_id'].select( - chat.load_history, gradio('unique_id', 'character_menu', 'mode'), gradio('history')).then( - chat.redraw_html, gradio(reload_arr), gradio('display')) + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + chat.handle_unique_id_select, gradio('interface_state'), gradio('history', 'display'), show_progress=False) shared.gradio['Start new chat'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - chat.start_new_chat, gradio('interface_state'), gradio('history')).then( - chat.redraw_html, gradio(reload_arr), gradio('display')).then( - lambda x: gr.update(choices=(histories := chat.find_all_histories_with_first_prompts(x)), value=histories[0][1]), gradio('interface_state'), gradio('unique_id'), show_progress=False) + chat.handle_start_new_chat_click, gradio('interface_state'), gradio('history', 'display', 'unique_id'), show_progress=False) shared.gradio['delete_chat'].click(lambda: [gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, gradio(clear_arr)) shared.gradio['delete_chat-cancel'].click(lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, gradio(clear_arr)) shared.gradio['delete_chat-confirm'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda x, y: str(chat.find_all_histories(x).index(y)), gradio('interface_state', 'unique_id'), gradio('temporary_text')).then( - chat.delete_history, gradio('unique_id', 'character_menu', 'mode'), None).then( - chat.load_history_after_deletion, gradio('interface_state', 'temporary_text'), gradio('history', 'unique_id'), show_progress=False).then( - chat.redraw_html, gradio(reload_arr), gradio('display')).then( - lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, gradio(clear_arr)) - - shared.gradio['rename_chat'].click( - lambda: "My New Chat", None, gradio('rename_to')).then( - lambda: [gr.update(visible=True)] * 3, None, gradio('rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False) - - shared.gradio['rename_to-cancel'].click( - lambda: [gr.update(visible=False)] * 3, None, gradio('rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False) + chat.handle_delete_chat_confirm_click, gradio('interface_state'), gradio('history', 'display', 'unique_id') + gradio(clear_arr), show_progress=False) + shared.gradio['rename_chat'].click(chat.handle_rename_chat_click, None, gradio('rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False) + shared.gradio['rename_to-cancel'].click(lambda: [gr.update(visible=False)] * 3, None, gradio('rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False) shared.gradio['rename_to-confirm'].click( - chat.rename_history, gradio('unique_id', 'rename_to', 'character_menu', 'mode'), None).then( - lambda: [gr.update(visible=False)] * 3, None, gradio('rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False).then( - lambda x, y: gr.update(choices=chat.find_all_histories_with_first_prompts(x), value=y), gradio('interface_state', 'rename_to'), gradio('unique_id')) + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + chat.handle_rename_chat_confirm, gradio('rename_to', 'interface_state'), gradio('unique_id', 'rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False) shared.gradio['rename_to'].submit( - chat.rename_history, gradio('unique_id', 'rename_to', 'character_menu', 'mode'), None).then( - lambda: [gr.update(visible=False)] * 3, None, gradio('rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False).then( - lambda x, y: gr.update(choices=chat.find_all_histories_with_first_prompts(x), value=y), gradio('interface_state', 'rename_to'), gradio('unique_id')) + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + chat.handle_rename_chat_confirm, gradio('rename_to', 'interface_state'), gradio('unique_id', 'rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False) shared.gradio['load_chat_history'].upload( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - chat.start_new_chat, gradio('interface_state'), gradio('history')).then( - chat.load_history_json, gradio('load_chat_history', 'history'), gradio('history')).then( - chat.redraw_html, gradio(reload_arr), gradio('display')).then( - lambda x: gr.update(choices=(histories := chat.find_all_histories_with_first_prompts(x)), value=histories[0][1]), gradio('interface_state'), gradio('unique_id'), show_progress=False).then( - chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then( + chat.handle_upload_chat_history, gradio('load_chat_history', 'interface_state'), gradio('history', 'display', 'unique_id'), show_progress=False).then( None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_chat()}}') shared.gradio['character_menu'].change( - chat.load_character, gradio('character_menu', 'name1', 'name2'), gradio('name1', 'name2', 'character_picture', 'greeting', 'context')).success( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - chat.load_latest_history, gradio('interface_state'), gradio('history')).then( - chat.redraw_html, gradio(reload_arr), gradio('display')).then( - lambda x: gr.update(choices=(histories := chat.find_all_histories_with_first_prompts(x)), value=histories[0][1]), gradio('interface_state'), gradio('unique_id'), show_progress=False).then( + chat.handle_character_menu_change, gradio('interface_state'), gradio('history', 'display', 'name1', 'name2', 'character_picture', 'greeting', 'context', 'unique_id'), show_progress=False).then( None, None, None, js=f'() => {{{ui.update_big_picture_js}; updateBigPicture()}}') - shared.gradio['mode'].change(None, gradio('mode'), None, js="(mode) => {mode === 'instruct' ? document.getElementById('character-menu').parentNode.parentNode.style.display = 'none' : document.getElementById('character-menu').parentNode.parentNode.style.display = ''}") - shared.gradio['mode'].change( - lambda x: [gr.update(visible=x != 'instruct'), gr.update(visible=x == 'chat-instruct')], gradio('mode'), gradio('chat_style', 'chat-instruct_command'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - chat.load_latest_history, gradio('interface_state'), gradio('history')).then( - chat.redraw_html, gradio(reload_arr), gradio('display')).then( - lambda x: gr.update(choices=(histories := chat.find_all_histories_with_first_prompts(x)), value=histories[0][1]), gradio('interface_state'), gradio('unique_id'), show_progress=False) + chat.handle_mode_change, gradio('interface_state'), gradio('history', 'display', 'chat_style', 'chat-instruct_command', 'unique_id'), show_progress=False).then( + None, gradio('mode'), None, js="(mode) => {mode === 'instruct' ? document.getElementById('character-menu').parentNode.parentNode.style.display = 'none' : document.getElementById('character-menu').parentNode.parentNode.style.display = ''}") - shared.gradio['chat_style'].change(chat.redraw_html, gradio(reload_arr), gradio('display')) + shared.gradio['chat_style'].change(chat.redraw_html, gradio(reload_arr), gradio('display'), show_progress=False) shared.gradio['Copy last reply'].click(chat.send_last_reply_to_input, gradio('history'), gradio('textbox'), show_progress=False) # Save/delete a character - shared.gradio['save_character'].click( - lambda x: x, gradio('name2'), gradio('save_character_filename')).then( - lambda: gr.update(visible=True), None, gradio('character_saver')) - - shared.gradio['delete_character'].click(lambda: gr.update(visible=True), None, gradio('character_deleter')) - - shared.gradio['load_template'].click( - chat.load_instruction_template, gradio('instruction_template'), gradio('instruction_template_str')).then( - lambda: "Select template to load...", None, gradio('instruction_template')) - + shared.gradio['save_character'].click(chat.handle_save_character_click, gradio('name2'), gradio('save_character_filename', 'character_saver'), show_progress=False) + shared.gradio['delete_character'].click(lambda: gr.update(visible=True), None, gradio('character_deleter'), show_progress=False) + shared.gradio['load_template'].click(chat.handle_load_template_click, gradio('instruction_template'), gradio('instruction_template_str', 'instruction_template'), show_progress=False) shared.gradio['save_template'].click( - lambda: 'My Template.yaml', None, gradio('save_filename')).then( - lambda: 'instruction-templates/', None, gradio('save_root')).then( - chat.generate_instruction_template_yaml, gradio('instruction_template_str'), gradio('save_contents')).then( - lambda: gr.update(visible=True), None, gradio('file_saver')) - - shared.gradio['delete_template'].click( - lambda x: f'{x}.yaml', gradio('instruction_template'), gradio('delete_filename')).then( - lambda: 'instruction-templates/', None, gradio('delete_root')).then( - lambda: gr.update(visible=True), None, gradio('file_deleter')) + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + chat.handle_save_template_click, gradio('instruction_template_str'), gradio('save_filename', 'save_root', 'save_contents', 'file_saver'), show_progress=False) + shared.gradio['delete_template'].click(chat.handle_delete_template_click, gradio('instruction_template'), gradio('delete_filename', 'delete_root', 'file_deleter'), show_progress=False) shared.gradio['save_chat_history'].click( lambda x: json.dumps(x, indent=4), gradio('history'), gradio('temporary_text')).then( None, gradio('temporary_text', 'character_menu', 'mode'), None, js=f'(hist, char, mode) => {{{ui.save_files_js}; saveHistory(hist, char, mode)}}') shared.gradio['Submit character'].click( - chat.upload_character, gradio('upload_json', 'upload_img_bot'), gradio('character_menu')).then( + chat.upload_character, gradio('upload_json', 'upload_img_bot'), gradio('character_menu'), show_progress=False).then( None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}') shared.gradio['Submit tavern character'].click( - chat.upload_tavern_character, gradio('upload_img_tavern', 'tavern_json'), gradio('character_menu')).then( + chat.upload_tavern_character, gradio('upload_img_tavern', 'tavern_json'), gradio('character_menu'), show_progress=False).then( None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}') shared.gradio['upload_json'].upload(lambda: gr.update(interactive=True), None, gradio('Submit character')) @@ -351,35 +293,32 @@ def create_event_handlers(): shared.gradio['upload_img_tavern'].upload(chat.check_tavern_character, gradio('upload_img_tavern'), gradio('tavern_name', 'tavern_desc', 'tavern_json', 'Submit tavern character'), show_progress=False) shared.gradio['upload_img_tavern'].clear(lambda: (None, None, None, gr.update(interactive=False)), None, gradio('tavern_name', 'tavern_desc', 'tavern_json', 'Submit tavern character'), show_progress=False) shared.gradio['your_picture'].change( - chat.upload_your_profile_picture, gradio('your_picture'), None).then( - partial(chat.redraw_html, reset_cache=True), gradio(reload_arr), gradio('display')) + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + chat.handle_your_picture_change, gradio('your_picture', 'interface_state'), gradio('display'), show_progress=False) shared.gradio['send_instruction_to_default'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda x: x.update({'mode': 'instruct', 'history': {'internal': [], 'visible': []}}), gradio('interface_state'), None).then( - partial(chat.generate_chat_prompt, 'Input'), gradio('interface_state'), gradio('textbox-default')).then( + chat.handle_send_instruction_click, gradio('interface_state'), gradio('textbox-default'), show_progress=False).then( None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_default()}}') shared.gradio['send_instruction_to_notebook'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda x: x.update({'mode': 'instruct', 'history': {'internal': [], 'visible': []}}), gradio('interface_state'), None).then( - partial(chat.generate_chat_prompt, 'Input'), gradio('interface_state'), gradio('textbox-notebook')).then( + chat.handle_send_instruction_click, gradio('interface_state'), gradio('textbox-notebook'), show_progress=False).then( None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}') shared.gradio['send_instruction_to_negative_prompt'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda x: x.update({'mode': 'instruct', 'history': {'internal': [], 'visible': []}}), gradio('interface_state'), None).then( - partial(chat.generate_chat_prompt, 'Input'), gradio('interface_state'), gradio('negative_prompt')).then( + chat.handle_send_instruction_click, gradio('interface_state'), gradio('negative_prompt'), show_progress=False).then( None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_generation_parameters()}}') shared.gradio['send-chat-to-default'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - partial(chat.generate_chat_prompt, '', _continue=True), gradio('interface_state'), gradio('textbox-default')).then( + chat.handle_send_chat_click, gradio('interface_state'), gradio('textbox-default'), show_progress=False).then( None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_default()}}') shared.gradio['send-chat-to-notebook'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - partial(chat.generate_chat_prompt, '', _continue=True), gradio('interface_state'), gradio('textbox-notebook')).then( + chat.handle_send_chat_click, gradio('interface_state'), gradio('textbox-notebook'), show_progress=False).then( None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}') shared.gradio['show_controls'].change(None, gradio('show_controls'), None, js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}') diff --git a/modules/ui_default.py b/modules/ui_default.py index e3bfe784..676b7fa5 100644 --- a/modules/ui_default.py +++ b/modules/ui_default.py @@ -64,38 +64,43 @@ def create_event_handlers(): shared.gradio['Generate-default'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['textbox-default'].submit( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['markdown_render-default'].click(lambda x: x, gradio('output_textbox'), gradio('markdown-default'), queue=False) shared.gradio['Continue-default'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, [shared.gradio['output_textbox']] + gradio(inputs)[1:], gradio(outputs), show_progress=False).then( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Stop-default'].click(stop_everything_event, None, None, queue=False) shared.gradio['prompt_menu-default'].change(load_prompt, gradio('prompt_menu-default'), gradio('textbox-default'), show_progress=False) - shared.gradio['save_prompt-default'].click( - lambda x: x, gradio('textbox-default'), gradio('save_contents')).then( - lambda: 'prompts/', None, gradio('save_root')).then( - lambda: utils.current_time() + '.txt', None, gradio('save_filename')).then( - lambda: gr.update(visible=True), None, gradio('file_saver')) - - shared.gradio['delete_prompt-default'].click( - lambda: 'prompts/', None, gradio('delete_root')).then( - lambda x: x + '.txt', gradio('prompt_menu-default'), gradio('delete_filename')).then( - lambda: gr.update(visible=True), None, gradio('file_deleter')) - + shared.gradio['save_prompt-default'].click(handle_save_prompt, gradio('textbox-default'), gradio('save_contents', 'save_filename', 'save_root', 'file_saver'), show_progress=False) + shared.gradio['delete_prompt-default'].click(handle_delete_prompt, gradio('prompt_menu-default'), gradio('delete_filename', 'delete_root', 'file_deleter'), show_progress=False) shared.gradio['textbox-default'].change(lambda x: f"{count_tokens(x)}", gradio('textbox-default'), gradio('token-counter-default'), show_progress=False) shared.gradio['get_logits-default'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( logits.get_next_logits, gradio('textbox-default', 'interface_state', 'use_samplers-default', 'logits-default'), gradio('logits-default', 'logits-default-previous'), show_progress=False) shared.gradio['get_tokens-default'].click(get_token_ids, gradio('textbox-default'), gradio('tokens-default'), show_progress=False) + + +def handle_save_prompt(text): + return [ + text, + utils.current_time() + ".txt", + "prompts/", + gr.update(visible=True) + ] + + +def handle_delete_prompt(prompt): + return [ + prompt + ".txt", + "prompts/", + gr.update(visible=True) + ] diff --git a/modules/ui_file_saving.py b/modules/ui_file_saving.py index 71471217..c1047e70 100644 --- a/modules/ui_file_saving.py +++ b/modules/ui_file_saving.py @@ -47,57 +47,90 @@ def create_ui(): def create_event_handlers(): - shared.gradio['save_confirm'].click( - lambda x, y, z: utils.save_file(x + y, z), gradio('save_root', 'save_filename', 'save_contents'), None).then( - lambda: gr.update(visible=False), None, gradio('file_saver')) - - shared.gradio['delete_confirm'].click( - lambda x, y: utils.delete_file(x + y), gradio('delete_root', 'delete_filename'), None).then( - lambda: gr.update(visible=False), None, gradio('file_deleter')) - + shared.gradio['save_confirm'].click(handle_save_confirm_click, gradio('save_root', 'save_filename', 'save_contents'), gradio('file_saver'), show_progress=False) + shared.gradio['delete_confirm'].click(handle_delete_confirm_click, gradio('delete_root', 'delete_filename'), gradio('file_deleter'), show_progress=False) shared.gradio['delete_cancel'].click(lambda: gr.update(visible=False), None, gradio('file_deleter')) shared.gradio['save_cancel'].click(lambda: gr.update(visible=False), None, gradio('file_saver')) - - shared.gradio['save_character_confirm'].click( - chat.save_character, gradio('name2', 'greeting', 'context', 'character_picture', 'save_character_filename'), None).then( - lambda: gr.update(visible=False), None, gradio('character_saver')).then( - lambda x: gr.update(choices=utils.get_available_characters(), value=x), gradio('save_character_filename'), gradio('character_menu')) - - shared.gradio['delete_character_confirm'].click( - lambda x: str(utils.get_available_characters().index(x)), gradio('character_menu'), gradio('temporary_text')).then( - chat.delete_character, gradio('character_menu'), None).then( - chat.update_character_menu_after_deletion, gradio('temporary_text'), gradio('character_menu')).then( - lambda: gr.update(visible=False), None, gradio('character_deleter')) - - shared.gradio['save_character_cancel'].click(lambda: gr.update(visible=False), None, gradio('character_saver')) - shared.gradio['delete_character_cancel'].click(lambda: gr.update(visible=False), None, gradio('character_deleter')) - + shared.gradio['save_character_confirm'].click(handle_save_character_confirm_click, gradio('name2', 'greeting', 'context', 'character_picture', 'save_character_filename'), gradio('character_menu', 'character_saver'), show_progress=False) + shared.gradio['delete_character_confirm'].click(handle_delete_character_confirm_click, gradio('character_menu'), gradio('character_menu', 'character_deleter'), show_progress=False) + shared.gradio['save_character_cancel'].click(lambda: gr.update(visible=False), None, gradio('character_saver'), show_progress=False) + shared.gradio['delete_character_cancel'].click(lambda: gr.update(visible=False), None, gradio('character_deleter'), show_progress=False) shared.gradio['save_preset'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - presets.generate_preset_yaml, gradio('interface_state'), gradio('save_preset_contents')).then( - lambda: 'My Preset', None, gradio('save_preset_filename')).then( - lambda: gr.update(visible=True), None, gradio('preset_saver')) + handle_save_preset_click, gradio('interface_state'), gradio('save_preset_contents', 'save_preset_filename', 'preset_saver'), show_progress=False) - shared.gradio['save_preset_confirm'].click( - lambda x, y: utils.save_file(f'presets/{x}.yaml', y), gradio('save_preset_filename', 'save_preset_contents'), None).then( - lambda: gr.update(visible=False), None, gradio('preset_saver')).then( - lambda x: gr.update(choices=utils.get_available_presets(), value=x), gradio('save_preset_filename'), gradio('preset_menu')) + shared.gradio['save_preset_confirm'].click(handle_save_preset_confirm_click, gradio('save_preset_filename', 'save_preset_contents'), gradio('preset_menu', 'preset_saver'), show_progress=False) + shared.gradio['save_preset_cancel'].click(lambda: gr.update(visible=False), None, gradio('preset_saver'), show_progress=False) + shared.gradio['delete_preset'].click(handle_delete_preset_click, gradio('preset_menu'), gradio('delete_filename', 'delete_root', 'file_deleter'), show_progress=False) + shared.gradio['save_grammar'].click(handle_save_grammar_click, gradio('grammar_string'), gradio('save_contents', 'save_filename', 'save_root', 'file_saver'), show_progress=False) + shared.gradio['delete_grammar'].click(handle_delete_grammar_click, gradio('grammar_file'), gradio('delete_filename', 'delete_root', 'file_deleter'), show_progress=False) - shared.gradio['save_preset_cancel'].click(lambda: gr.update(visible=False), None, gradio('preset_saver')) - shared.gradio['delete_preset'].click( - lambda x: f'{x}.yaml', gradio('preset_menu'), gradio('delete_filename')).then( - lambda: 'presets/', None, gradio('delete_root')).then( - lambda: gr.update(visible=True), None, gradio('file_deleter')) +def handle_save_confirm_click(root, filename, contents): + utils.save_file(root + filename, contents) + return gr.update(visible=False) - shared.gradio['save_grammar'].click( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda x: x, gradio('grammar_string'), gradio('save_contents')).then( - lambda: 'grammars/', None, gradio('save_root')).then( - lambda: 'My Fancy Grammar.gbnf', None, gradio('save_filename')).then( - lambda: gr.update(visible=True), None, gradio('file_saver')) - shared.gradio['delete_grammar'].click( - lambda x: x, gradio('grammar_file'), gradio('delete_filename')).then( - lambda: 'grammars/', None, gradio('delete_root')).then( - lambda: gr.update(visible=True), None, gradio('file_deleter')) +def handle_delete_confirm_click(root, filename): + utils.delete_file(root + filename) + return gr.update(visible=False) + + +def handle_save_character_confirm_click(name2, greeting, context, character_picture, filename): + chat.save_character(name2, greeting, context, character_picture, filename) + available_characters = utils.get_available_characters() + + return [ + gr.update(choices=available_characters, value=filename), + gr.update(visible=False) + ] + + +def handle_delete_character_confirm_click(character): + index = str(utils.get_available_characters().index(character)) + chat.delete_character(character) + output = chat.update_character_menu_after_deletion(index) + return [output, gr.update(visible=False)] + + +def handle_save_preset_click(state): + contents = presets.generate_preset_yaml(state) + return [ + contents, + "My Preset", + gr.update(visible=True) + ] + + +def handle_save_preset_confirm_click(filename, contents): + utils.save_file(f"presets/{filename}.yaml", contents) + available_presets = utils.get_available_presets() + return [ + gr.update(choices=available_presets, value=filename), + gr.update(visible=False) + ] + + +def handle_delete_preset_click(preset): + return [ + f"{preset}.yaml", + "presets/", + gr.update(visible=True) + ] + + +def handle_save_grammar_click(grammar_string): + return [ + grammar_string, + "My Fancy Grammar.gbnf", + "grammars/", + gr.update(visible=True) + ] + + +def handle_delete_grammar_click(grammar_file): + return [ + grammar_file, + "grammars/", + gr.update(visible=True) + ] diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 93acad4e..c868be96 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -66,7 +66,6 @@ def create_ui(): ui.create_refresh_button(shared.gradio['model_menu'], lambda: None, lambda: {'choices': utils.get_available_models()}, 'refresh-button', interactive=not mu) shared.gradio['load_model'] = gr.Button("Load", visible=not shared.settings['autoload_model'], elem_classes='refresh-button', interactive=not mu) shared.gradio['unload_model'] = gr.Button("Unload", elem_classes='refresh-button', interactive=not mu) - shared.gradio['reload_model'] = gr.Button("Reload", elem_classes='refresh-button', interactive=not mu) shared.gradio['save_model_settings'] = gr.Button("Save settings", elem_classes='refresh-button', interactive=not mu) with gr.Column(): @@ -188,39 +187,24 @@ def create_ui(): def create_event_handlers(): - shared.gradio['loader'].change(loaders.make_loader_params_visible, gradio('loader'), gradio(loaders.get_all_params())) + shared.gradio['loader'].change(loaders.make_loader_params_visible, gradio('loader'), gradio(loaders.get_all_params()), show_progress=False) # In this event handler, the interface state is read and updated # with the model defaults (if any), and then the model is loaded # unless "autoload_model" is unchecked shared.gradio['model_menu'].change( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - apply_model_settings_to_state, gradio('model_menu', 'interface_state'), gradio('interface_state')).then( - ui.apply_interface_values, gradio('interface_state'), gradio(ui.list_interface_input_elements()), show_progress=False).then( - update_model_parameters, gradio('interface_state'), None).then( + handle_load_model_event_initial, gradio('model_menu', 'interface_state'), gradio(ui.list_interface_input_elements()) + gradio('interface_state'), show_progress=False).then( load_model_wrapper, gradio('model_menu', 'loader', 'autoload_model'), gradio('model_status'), show_progress=False).success( - update_truncation_length, gradio('truncation_length', 'interface_state'), gradio('truncation_length')).then( - lambda x: x, gradio('loader'), gradio('filter_by_loader')) + handle_load_model_event_final, gradio('truncation_length', 'loader', 'interface_state'), gradio('truncation_length', 'filter_by_loader'), show_progress=False) shared.gradio['load_model'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( update_model_parameters, gradio('interface_state'), None).then( partial(load_model_wrapper, autoload=True), gradio('model_menu', 'loader'), gradio('model_status'), show_progress=False).success( - update_truncation_length, gradio('truncation_length', 'interface_state'), gradio('truncation_length')).then( - lambda x: x, gradio('loader'), gradio('filter_by_loader')) - - shared.gradio['reload_model'].click( - unload_model, None, None).then( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - update_model_parameters, gradio('interface_state'), None).then( - partial(load_model_wrapper, autoload=True), gradio('model_menu', 'loader'), gradio('model_status'), show_progress=False).success( - update_truncation_length, gradio('truncation_length', 'interface_state'), gradio('truncation_length')).then( - lambda x: x, gradio('loader'), gradio('filter_by_loader')) - - shared.gradio['unload_model'].click( - unload_model, None, None).then( - lambda: "Model unloaded", None, gradio('model_status')) + handle_load_model_event_final, gradio('truncation_length', 'loader', 'interface_state'), gradio('truncation_length', 'filter_by_loader'), show_progress=False) + shared.gradio['unload_model'].click(handle_unload_model_click, None, gradio('model_status'), show_progress=False) shared.gradio['save_model_settings'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( save_model_settings, gradio('model_menu', 'interface_state'), gradio('model_status'), show_progress=False) @@ -353,3 +337,20 @@ def update_truncation_length(current_length, state): return state['n_ctx'] return current_length + + +def handle_load_model_event_initial(model, state): + state = apply_model_settings_to_state(model, state) + output = ui.apply_interface_values(state) + update_model_parameters(state) + return output + [state] + + +def handle_load_model_event_final(truncation_length, loader, state): + truncation_length = update_truncation_length(truncation_length, state) + return [truncation_length, loader] + + +def handle_unload_model_click(): + unload_model() + return "Model unloaded" diff --git a/modules/ui_notebook.py b/modules/ui_notebook.py index 307bc0f3..8d4aa056 100644 --- a/modules/ui_notebook.py +++ b/modules/ui_notebook.py @@ -7,6 +7,7 @@ from modules.text_generation import ( get_token_ids, stop_everything_event ) +from modules.ui_default import handle_delete_prompt, handle_save_prompt from modules.utils import gradio inputs = ('textbox-notebook', 'interface_state') @@ -66,14 +67,12 @@ def create_event_handlers(): lambda x: x, gradio('textbox-notebook'), gradio('last_input-notebook')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['textbox-notebook'].submit( lambda x: x, gradio('textbox-notebook'), gradio('last_input-notebook')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Undo'].click(lambda x: x, gradio('last_input-notebook'), gradio('textbox-notebook'), show_progress=False) @@ -82,22 +81,12 @@ def create_event_handlers(): lambda x: x, gradio('last_input-notebook'), gradio('textbox-notebook'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Stop-notebook'].click(stop_everything_event, None, None, queue=False) shared.gradio['prompt_menu-notebook'].change(load_prompt, gradio('prompt_menu-notebook'), gradio('textbox-notebook'), show_progress=False) - shared.gradio['save_prompt-notebook'].click( - lambda x: x, gradio('textbox-notebook'), gradio('save_contents')).then( - lambda: 'prompts/', None, gradio('save_root')).then( - lambda: utils.current_time() + '.txt', None, gradio('save_filename')).then( - lambda: gr.update(visible=True), None, gradio('file_saver')) - - shared.gradio['delete_prompt-notebook'].click( - lambda: 'prompts/', None, gradio('delete_root')).then( - lambda x: x + '.txt', gradio('prompt_menu-notebook'), gradio('delete_filename')).then( - lambda: gr.update(visible=True), None, gradio('file_deleter')) - + shared.gradio['save_prompt-notebook'].click(handle_save_prompt, gradio('textbox-notebook'), gradio('save_contents', 'save_filename', 'save_root', 'file_saver'), show_progress=False) + shared.gradio['delete_prompt-notebook'].click(handle_delete_prompt, gradio('prompt_menu-notebook'), gradio('delete_filename', 'delete_root', 'file_deleter'), show_progress=False) shared.gradio['textbox-notebook'].input(lambda x: f"{count_tokens(x)}", gradio('textbox-notebook'), gradio('token-counter-notebook'), show_progress=False) shared.gradio['get_logits-notebook'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py index 68512c7e..cd4288b3 100644 --- a/modules/ui_parameters.py +++ b/modules/ui_parameters.py @@ -102,10 +102,16 @@ def create_ui(default_preset): def create_event_handlers(): shared.gradio['filter_by_loader'].change(loaders.blacklist_samplers, gradio('filter_by_loader', 'dynamic_temperature'), gradio(loaders.list_all_samplers()), show_progress=False) - shared.gradio['preset_menu'].change(presets.load_preset_for_ui, gradio('preset_menu', 'interface_state'), gradio('interface_state') + gradio(presets.presets_params())) - shared.gradio['random_preset'].click(presets.random_preset, gradio('interface_state'), gradio('interface_state') + gradio(presets.presets_params())) - shared.gradio['grammar_file'].change(load_grammar, gradio('grammar_file'), gradio('grammar_string')) - shared.gradio['dynamic_temperature'].change(lambda x: [gr.update(visible=x)] * 3, gradio('dynamic_temperature'), gradio('dynatemp_low', 'dynatemp_high', 'dynatemp_exponent')) + shared.gradio['preset_menu'].change( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + presets.load_preset_for_ui, gradio('preset_menu', 'interface_state'), gradio('interface_state') + gradio(presets.presets_params()), show_progress=False) + + shared.gradio['random_preset'].click( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + presets.random_preset, gradio('interface_state'), gradio('interface_state') + gradio(presets.presets_params()), show_progress=False) + + shared.gradio['grammar_file'].change(load_grammar, gradio('grammar_file'), gradio('grammar_string'), show_progress=False) + shared.gradio['dynamic_temperature'].change(lambda x: [gr.update(visible=x)] * 3, gradio('dynamic_temperature'), gradio('dynatemp_low', 'dynatemp_high', 'dynatemp_exponent'), show_progress=False) def get_truncation_length(): diff --git a/modules/ui_session.py b/modules/ui_session.py index 087091ce..1ae36846 100644 --- a/modules/ui_session.py +++ b/modules/ui_session.py @@ -35,15 +35,22 @@ def create_ui(): None, None, None, js='() => {document.body.innerHTML=\'

Reloading...

\'; setTimeout(function(){location.reload()},2500); return []}') shared.gradio['toggle_dark_mode'].click( - None, None, None, js='() => {document.getElementsByTagName("body")[0].classList.toggle("dark")}').then( - lambda x: 'dark' if x == 'light' else 'light', gradio('theme_state'), gradio('theme_state')) + lambda x: 'dark' if x == 'light' else 'light', gradio('theme_state'), gradio('theme_state')).then( + None, None, None, js='() => {document.getElementsByTagName("body")[0].classList.toggle("dark")}') shared.gradio['save_settings'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - ui.save_settings, gradio('interface_state', 'preset_menu', 'extensions_menu', 'show_controls', 'theme_state'), gradio('save_contents')).then( - lambda: './', None, gradio('save_root')).then( - lambda: 'settings.yaml', None, gradio('save_filename')).then( - lambda: gr.update(visible=True), None, gradio('file_saver')) + handle_save_settings, gradio('interface_state', 'preset_menu', 'extensions_menu', 'show_controls', 'theme_state'), gradio('save_contents', 'save_filename', 'save_root', 'file_saver'), show_progress=False) + + +def handle_save_settings(state, preset, extensions, show_controls, theme): + contents = ui.save_settings(state, preset, extensions, show_controls, theme) + return [ + contents, + "settings.yaml", + "./", + gr.update(visible=True) + ] def set_interface_arguments(extensions, bool_active): diff --git a/modules/utils.py b/modules/utils.py index 4b65736b..f4333031 100644 --- a/modules/utils.py +++ b/modules/utils.py @@ -95,11 +95,10 @@ def get_available_presets(): def get_available_prompts(): - prompts = [] - files = set((k.stem for k in Path('prompts').glob('*.txt'))) - prompts += sorted([k for k in files if re.match('^[0-9]', k)], key=natural_keys, reverse=True) - prompts += sorted([k for k in files if re.match('^[^0-9]', k)], key=natural_keys) - prompts += ['None'] + prompt_files = list(Path('prompts').glob('*.txt')) + sorted_files = sorted(prompt_files, key=lambda x: x.stat().st_mtime, reverse=True) + prompts = [file.stem for file in sorted_files] + prompts.append('None') return prompts diff --git a/server.py b/server.py index f057d233..2794caa1 100644 --- a/server.py +++ b/server.py @@ -149,8 +149,9 @@ def create_interface(): shared.gradio['interface'].load(None, None, None, js=f"() => {{if ({str(shared.settings['dark_theme']).lower()}) {{ document.getElementsByTagName('body')[0].classList.add('dark'); }} }}") shared.gradio['interface'].load(None, None, None, js=f"() => {{{js}}}") shared.gradio['interface'].load(None, gradio('show_controls'), None, js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}') - shared.gradio['interface'].load(partial(ui.apply_interface_values, {}, use_persistent=True), None, gradio(ui.list_interface_input_elements()), show_progress=False) - shared.gradio['interface'].load(chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display')) + shared.gradio['interface'].load( + partial(ui.apply_interface_values, {}, use_persistent=True), None, gradio(ui.list_interface_input_elements()), show_progress=False).then( + chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display'), show_progress=False) extensions_module.create_extensions_tabs() # Extensions tabs extensions_module.create_extensions_block() # Extensions block From 564d8c8c0d98f7e960bc22d515c42a5d5774754c Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 20 Jul 2024 20:02:54 -0700 Subject: [PATCH 08/91] Make alpha_value a float number --- modules/ui_model_menu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index c868be96..e7c2ba7e 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -103,7 +103,7 @@ def create_ui(): shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7') shared.gradio['max_seq_len'] = gr.Slider(label='max_seq_len', minimum=0, maximum=shared.settings['truncation_length_max'], step=256, info='Context length. Try lowering this if you run out of memory while loading the model.', value=shared.args.max_seq_len) with gr.Blocks(): - shared.gradio['alpha_value'] = gr.Number(label='alpha_value', value=shared.args.alpha_value, precision=0, info='Positional embeddings alpha factor for NTK RoPE scaling. Recommended values (NTKv1): 1.75 for 1.5x context, 2.5 for 2x context. Use either this or compress_pos_emb, not both.') + shared.gradio['alpha_value'] = gr.Number(label='alpha_value', value=shared.args.alpha_value, precision=2, info='Positional embeddings alpha factor for NTK RoPE scaling. Recommended values (NTKv1): 1.75 for 1.5x context, 2.5 for 2x context. Use either this or compress_pos_emb, not both.') shared.gradio['rope_freq_base'] = gr.Number(label='rope_freq_base', value=shared.args.rope_freq_base, precision=0, info='Positional embeddings frequency base for NTK RoPE scaling. Related to alpha_value by rope_freq_base = 10000 * alpha_value ^ (64 / 63). 0 = from model.') shared.gradio['compress_pos_emb'] = gr.Number(label='compress_pos_emb', value=shared.args.compress_pos_emb, precision=0, info='Positional embeddings compression factor. Should be set to (context length) / (model\'s original context length). Equal to 1/rope_freq_scale.') From 9b205f94a4a0ee7c598bf5548d13d4f8486a2619 Mon Sep 17 00:00:00 2001 From: Patrick Leiser Date: Sat, 20 Jul 2024 20:05:28 -0700 Subject: [PATCH 09/91] Fix for issue #6024, don't auto-hide the chat contents (#6247) --- css/main.css | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/css/main.css b/css/main.css index 5768348e..04f79186 100644 --- a/css/main.css +++ b/css/main.css @@ -378,6 +378,10 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { } } +.chat-parent .prose { + visibility: visible; +} + .old-ui .chat-parent { height: calc(100dvh - 192px - var(--header-height) - var(--input-delta)); margin-bottom: var(--input-delta) !important; From 916d1d8283054abc4d5f51de5531ff5d85ff155a Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 20 Jul 2024 20:32:57 -0700 Subject: [PATCH 10/91] UI: improve the style of code blocks in light theme --- css/highlightjs/github.min.css | 10 ++++++++++ css/main.css | 5 +++++ modules/block_requests.py | 2 ++ modules/ui.py | 4 ++-- modules/ui_session.py | 2 +- 5 files changed, 20 insertions(+), 3 deletions(-) create mode 100644 css/highlightjs/github.min.css diff --git a/css/highlightjs/github.min.css b/css/highlightjs/github.min.css new file mode 100644 index 00000000..96af2848 --- /dev/null +++ b/css/highlightjs/github.min.css @@ -0,0 +1,10 @@ +pre code.hljs{display:block;overflow-x:auto;padding:1em}code.hljs{padding:3px 5px}/*! + Theme: GitHub + Description: Light theme as seen on github.com + Author: github.com + Maintainer: @Hirse + Updated: 2021-05-15 + + Outdated base version: https://github.com/primer/github-syntax-light + Current colors taken from GitHub's CSS +*/.hljs{color:#24292e;background:#fff}.hljs-doctag,.hljs-keyword,.hljs-meta .hljs-keyword,.hljs-template-tag,.hljs-template-variable,.hljs-type,.hljs-variable.language_{color:#d73a49}.hljs-title,.hljs-title.class_,.hljs-title.class_.inherited__,.hljs-title.function_{color:#6f42c1}.hljs-attr,.hljs-attribute,.hljs-literal,.hljs-meta,.hljs-number,.hljs-operator,.hljs-selector-attr,.hljs-selector-class,.hljs-selector-id,.hljs-variable{color:#005cc5}.hljs-meta .hljs-string,.hljs-regexp,.hljs-string{color:#032f62}.hljs-built_in,.hljs-symbol{color:#e36209}.hljs-code,.hljs-comment,.hljs-formula{color:#6a737d}.hljs-name,.hljs-quote,.hljs-selector-pseudo,.hljs-selector-tag{color:#22863a}.hljs-subst{color:#24292e}.hljs-section{color:#005cc5;font-weight:700}.hljs-bullet{color:#735c0f}.hljs-emphasis{color:#24292e;font-style:italic}.hljs-strong{color:#24292e;font-weight:700}.hljs-addition{color:#22863a;background-color:#f0fff4}.hljs-deletion{color:#b31d28;background-color:#ffeef0} diff --git a/css/main.css b/css/main.css index 04f79186..1e339af3 100644 --- a/css/main.css +++ b/css/main.css @@ -451,6 +451,11 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { border-radius: 5px; font-size: 82%; padding: 1px 3px; + background: white !important; + color: #1f2328; +} + +.dark .message-body code { background: #0d1117 !important; color: rgb(201 209 217); } diff --git a/modules/block_requests.py b/modules/block_requests.py index 778b9f5a..886930f0 100644 --- a/modules/block_requests.py +++ b/modules/block_requests.py @@ -3,6 +3,7 @@ import io import requests +from modules import shared from modules.logging_colors import logger original_open = open @@ -54,6 +55,7 @@ def my_open(*args, **kwargs): '\n ' '\n ' '\n ' + f'\n ' '\n ' '\n ' ) diff --git a/modules/ui.py b/modules/ui.py index 48194540..25b1fdf7 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -15,8 +15,6 @@ with open(Path(__file__).resolve().parent / '../css/main.css', 'r') as f: css += f.read() with open(Path(__file__).resolve().parent / '../css/katex/katex.min.css', 'r') as f: css += f.read() -with open(Path(__file__).resolve().parent / '../css/highlightjs/github-dark.min.css', 'r') as f: - css += f.read() with open(Path(__file__).resolve().parent / '../css/highlightjs/highlightjs-copy.min.css', 'r') as f: css += f.read() with open(Path(__file__).resolve().parent / '../js/main.js', 'r') as f: @@ -29,6 +27,8 @@ with open(Path(__file__).resolve().parent / '../js/show_controls.js', 'r') as f: show_controls_js = f.read() with open(Path(__file__).resolve().parent / '../js/update_big_picture.js', 'r') as f: update_big_picture_js = f.read() +with open(Path(__file__).resolve().parent / '../js/dark_theme.js', 'r') as f: + dark_theme_js = f.read() refresh_symbol = '🔄' delete_symbol = '🗑️' diff --git a/modules/ui_session.py b/modules/ui_session.py index 1ae36846..dfb95b83 100644 --- a/modules/ui_session.py +++ b/modules/ui_session.py @@ -36,7 +36,7 @@ def create_ui(): shared.gradio['toggle_dark_mode'].click( lambda x: 'dark' if x == 'light' else 'light', gradio('theme_state'), gradio('theme_state')).then( - None, None, None, js='() => {document.getElementsByTagName("body")[0].classList.toggle("dark")}') + None, None, None, js=f'() => {{{ui.dark_theme_js}; toggleDarkMode()}}') shared.gradio['save_settings'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( From e9d4bff7d0a01f45f20df50ba45e6c796e59ff66 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 20 Jul 2024 22:04:48 -0700 Subject: [PATCH 11/91] Update the --tensor_split description --- modules/shared.py | 2 +- modules/ui_model_menu.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/shared.py b/modules/shared.py index 20eaff58..975e56c2 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -127,7 +127,7 @@ group.add_argument('--n_batch', type=int, default=512, help='Maximum number of p group.add_argument('--no-mmap', action='store_true', help='Prevent mmap from being used.') group.add_argument('--mlock', action='store_true', help='Force the system to keep the model in RAM.') group.add_argument('--n-gpu-layers', type=int, default=0, help='Number of layers to offload to the GPU.') -group.add_argument('--tensor_split', type=str, default=None, help='Split the model across multiple GPUs. Comma-separated list of proportions. Example: 18,17.') +group.add_argument('--tensor_split', type=str, default=None, help='Split the model across multiple GPUs. Comma-separated list of proportions. Example: 60,40.') group.add_argument('--numa', action='store_true', help='Activate NUMA task allocation for llama.cpp.') group.add_argument('--logits_all', action='store_true', help='Needs to be set for perplexity evaluation to work. Otherwise, ignore it, as it makes prompt processing slower.') group.add_argument('--no_offload_kqv', action='store_true', help='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.') diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index e7c2ba7e..cd245cf8 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -94,7 +94,7 @@ def create_ui(): shared.gradio['hqq_backend'] = gr.Dropdown(label="hqq_backend", choices=["PYTORCH", "PYTORCH_COMPILE", "ATEN"], value=shared.args.hqq_backend) shared.gradio['n_gpu_layers'] = gr.Slider(label="n-gpu-layers", minimum=0, maximum=256, value=shared.args.n_gpu_layers, info='Must be set to more than 0 for your GPU to be used.') shared.gradio['n_ctx'] = gr.Slider(minimum=0, maximum=shared.settings['truncation_length_max'], step=256, label="n_ctx", value=shared.args.n_ctx, info='Context length. Try lowering this if you run out of memory while loading the model.') - shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 18,17') + shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 60,40') shared.gradio['n_batch'] = gr.Slider(label="n_batch", minimum=1, maximum=2048, step=1, value=shared.args.n_batch) shared.gradio['threads'] = gr.Slider(label="threads", minimum=0, step=1, maximum=256, value=shared.args.threads) shared.gradio['threads_batch'] = gr.Slider(label="threads_batch", minimum=0, step=1, maximum=256, value=shared.args.threads_batch) From 58a1581b96cbe285fcb6ac968a9b1d1e5ecb3680 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 21 Jul 2024 09:47:28 -0700 Subject: [PATCH 12/91] Add missing dark_theme.js (oops) --- js/dark_theme.js | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 js/dark_theme.js diff --git a/js/dark_theme.js b/js/dark_theme.js new file mode 100644 index 00000000..b540fb11 --- /dev/null +++ b/js/dark_theme.js @@ -0,0 +1,9 @@ +function toggleDarkMode() { + document.body.classList.toggle("dark"); + var currentCSS = document.getElementById("highlight-css"); + if (currentCSS.getAttribute("href") === "file/css/highlightjs/github-dark.min.css") { + currentCSS.setAttribute("href", "file/css/highlightjs/github.min.css"); + } else { + currentCSS.setAttribute("href", "file/css/highlightjs/github-dark.min.css"); + } +} From d05846eae5dc1ed836ddd9767fd3f6ac62499e33 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 21 Jul 2024 10:17:22 -0700 Subject: [PATCH 13/91] UI: refresh the pfp cache on handle_your_picture_change --- modules/chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/chat.py b/modules/chat.py index a1196daf..c95673ce 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -1180,7 +1180,7 @@ def handle_delete_template_click(template): def handle_your_picture_change(picture, state): upload_your_profile_picture(picture) - html = redraw_html(state['history'], state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) + html = redraw_html(state['history'], state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'], reset_cache=True) return html From 17df2d7bdf3c7fcfede001eed38baddf339ef0e5 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 21 Jul 2024 10:45:01 -0700 Subject: [PATCH 14/91] UI: don't export the instruction template on "Save UI defaults to settings.yaml" --- modules/ui.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/ui.py b/modules/ui.py index 25b1fdf7..b1e4edaf 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -237,7 +237,7 @@ def apply_interface_values(state, use_persistent=False): def save_settings(state, preset, extensions_list, show_controls, theme_state): output = copy.deepcopy(shared.settings) - exclude = ['name2', 'greeting', 'context', 'turn_template', 'truncation_length'] + exclude = ['name2', 'greeting', 'context', 'truncation_length', 'instruction_template_str'] for k in state: if k in shared.settings and k not in exclude: output[k] = state[k] @@ -269,7 +269,7 @@ def save_settings(state, preset, extensions_list, show_controls, theme_state): if key in shared.default_settings and output[key] == shared.default_settings[key]: output.pop(key) - return yaml.dump(output, sort_keys=False, width=float("inf")) + return yaml.dump(output, sort_keys=False, width=float("inf"), allow_unicode=True) def create_refresh_button(refresh_component, refresh_method, refreshed_args, elem_class, interactive=True): From af99e0697e3b983036d87ad9c0e3545e2624d4c5 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 21 Jul 2024 10:45:27 -0700 Subject: [PATCH 15/91] UI: increase the font weight of chat messages --- css/chat_style-TheEncrypted777.css | 1 + css/chat_style-cai-chat.css | 3 ++- css/chat_style-messenger.css | 1 + css/chat_style-wpp.css | 3 ++- 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/css/chat_style-TheEncrypted777.css b/css/chat_style-TheEncrypted777.css index f823eef0..6404f41d 100644 --- a/css/chat_style-TheEncrypted777.css +++ b/css/chat_style-TheEncrypted777.css @@ -90,6 +90,7 @@ line-height: 1.428571429 !important; color: rgb(243 244 246) !important; text-shadow: 2px 2px 2px rgb(0 0 0); + font-weight: 500; } .message-body p em { diff --git a/css/chat_style-cai-chat.css b/css/chat_style-cai-chat.css index ba0c8f13..618184cf 100644 --- a/css/chat_style-cai-chat.css +++ b/css/chat_style-cai-chat.css @@ -46,6 +46,7 @@ .message-body p { font-size: 15px !important; line-height: 22.5px !important; + font-weight: 500; } .message-body p, .chat .message-body ul, .chat .message-body ol { @@ -59,4 +60,4 @@ .message-body p em { color: rgb(110 110 110) !important; font-weight: 500; -} \ No newline at end of file +} diff --git a/css/chat_style-messenger.css b/css/chat_style-messenger.css index 6bb97971..f0fd1578 100644 --- a/css/chat_style-messenger.css +++ b/css/chat_style-messenger.css @@ -88,6 +88,7 @@ margin-bottom: 0 !important; font-size: 15px !important; line-height: 1.428571429 !important; + font-weight: 500; } .dark .message-body p em { diff --git a/css/chat_style-wpp.css b/css/chat_style-wpp.css index ac4fd39a..30ca61f3 100644 --- a/css/chat_style-wpp.css +++ b/css/chat_style-wpp.css @@ -44,6 +44,7 @@ margin-bottom: 0 !important; font-size: 15px !important; line-height: 1.428571429 !important; + font-weight: 500; } .dark .message-body p em { @@ -52,4 +53,4 @@ .message-body p em { color: rgb(110 110 110) !important; -} \ No newline at end of file +} From 423372d6e740128ddb80ff9a0865649cb97d0237 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 21 Jul 2024 13:23:18 -0700 Subject: [PATCH 16/91] Organize ui_file_saving.py --- modules/ui_file_saving.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/modules/ui_file_saving.py b/modules/ui_file_saving.py index c1047e70..4b5ac7c6 100644 --- a/modules/ui_file_saving.py +++ b/modules/ui_file_saving.py @@ -47,24 +47,26 @@ def create_ui(): def create_event_handlers(): - shared.gradio['save_confirm'].click(handle_save_confirm_click, gradio('save_root', 'save_filename', 'save_contents'), gradio('file_saver'), show_progress=False) - shared.gradio['delete_confirm'].click(handle_delete_confirm_click, gradio('delete_root', 'delete_filename'), gradio('file_deleter'), show_progress=False) - shared.gradio['delete_cancel'].click(lambda: gr.update(visible=False), None, gradio('file_deleter')) - shared.gradio['save_cancel'].click(lambda: gr.update(visible=False), None, gradio('file_saver')) - shared.gradio['save_character_confirm'].click(handle_save_character_confirm_click, gradio('name2', 'greeting', 'context', 'character_picture', 'save_character_filename'), gradio('character_menu', 'character_saver'), show_progress=False) - shared.gradio['delete_character_confirm'].click(handle_delete_character_confirm_click, gradio('character_menu'), gradio('character_menu', 'character_deleter'), show_progress=False) - shared.gradio['save_character_cancel'].click(lambda: gr.update(visible=False), None, gradio('character_saver'), show_progress=False) - shared.gradio['delete_character_cancel'].click(lambda: gr.update(visible=False), None, gradio('character_deleter'), show_progress=False) shared.gradio['save_preset'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( handle_save_preset_click, gradio('interface_state'), gradio('save_preset_contents', 'save_preset_filename', 'preset_saver'), show_progress=False) - shared.gradio['save_preset_confirm'].click(handle_save_preset_confirm_click, gradio('save_preset_filename', 'save_preset_contents'), gradio('preset_menu', 'preset_saver'), show_progress=False) - shared.gradio['save_preset_cancel'].click(lambda: gr.update(visible=False), None, gradio('preset_saver'), show_progress=False) shared.gradio['delete_preset'].click(handle_delete_preset_click, gradio('preset_menu'), gradio('delete_filename', 'delete_root', 'file_deleter'), show_progress=False) shared.gradio['save_grammar'].click(handle_save_grammar_click, gradio('grammar_string'), gradio('save_contents', 'save_filename', 'save_root', 'file_saver'), show_progress=False) shared.gradio['delete_grammar'].click(handle_delete_grammar_click, gradio('grammar_file'), gradio('delete_filename', 'delete_root', 'file_deleter'), show_progress=False) + shared.gradio['save_preset_confirm'].click(handle_save_preset_confirm_click, gradio('save_preset_filename', 'save_preset_contents'), gradio('preset_menu', 'preset_saver'), show_progress=False) + shared.gradio['save_confirm'].click(handle_save_confirm_click, gradio('save_root', 'save_filename', 'save_contents'), gradio('file_saver'), show_progress=False) + shared.gradio['delete_confirm'].click(handle_delete_confirm_click, gradio('delete_root', 'delete_filename'), gradio('file_deleter'), show_progress=False) + shared.gradio['save_character_confirm'].click(handle_save_character_confirm_click, gradio('name2', 'greeting', 'context', 'character_picture', 'save_character_filename'), gradio('character_menu', 'character_saver'), show_progress=False) + shared.gradio['delete_character_confirm'].click(handle_delete_character_confirm_click, gradio('character_menu'), gradio('character_menu', 'character_deleter'), show_progress=False) + + shared.gradio['save_preset_cancel'].click(lambda: gr.update(visible=False), None, gradio('preset_saver'), show_progress=False) + shared.gradio['save_cancel'].click(lambda: gr.update(visible=False), None, gradio('file_saver')) + shared.gradio['delete_cancel'].click(lambda: gr.update(visible=False), None, gradio('file_deleter')) + shared.gradio['save_character_cancel'].click(lambda: gr.update(visible=False), None, gradio('character_saver'), show_progress=False) + shared.gradio['delete_character_cancel'].click(lambda: gr.update(visible=False), None, gradio('character_deleter'), show_progress=False) + def handle_save_confirm_click(root, filename, contents): utils.save_file(root + filename, contents) From 7ef241435719df1555cf5bdb9d70a6dbddad7b71 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 21 Jul 2024 15:38:20 -0700 Subject: [PATCH 17/91] UI: Make the file saving dialogs more robust --- modules/ui_file_saving.py | 64 ++++++++++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 18 deletions(-) diff --git a/modules/ui_file_saving.py b/modules/ui_file_saving.py index 4b5ac7c6..b696b655 100644 --- a/modules/ui_file_saving.py +++ b/modules/ui_file_saving.py @@ -2,6 +2,7 @@ import gradio as gr from modules import chat, presets, shared, ui, utils from modules.utils import gradio +import traceback def create_ui(): @@ -68,31 +69,67 @@ def create_event_handlers(): shared.gradio['delete_character_cancel'].click(lambda: gr.update(visible=False), None, gradio('character_deleter'), show_progress=False) +def handle_save_preset_confirm_click(filename, contents): + try: + utils.save_file(f"presets/{filename}.yaml", contents) + available_presets = utils.get_available_presets() + output = gr.update(choices=available_presets, value=filename), + except Exception: + output = gr.update() + traceback.print_exc() + + return [ + output, + gr.update(visible=False) + ] + + def handle_save_confirm_click(root, filename, contents): - utils.save_file(root + filename, contents) + try: + utils.save_file(root + filename, contents) + except Exception: + traceback.print_exc() + return gr.update(visible=False) def handle_delete_confirm_click(root, filename): - utils.delete_file(root + filename) + try: + utils.delete_file(root + filename) + except Exception: + traceback.print_exc() + return gr.update(visible=False) def handle_save_character_confirm_click(name2, greeting, context, character_picture, filename): - chat.save_character(name2, greeting, context, character_picture, filename) - available_characters = utils.get_available_characters() + try: + chat.save_character(name2, greeting, context, character_picture, filename) + available_characters = utils.get_available_characters() + output = gr.update(choices=available_characters, value=filename), + except Exception: + output = gr.update() + traceback.print_exc() return [ - gr.update(choices=available_characters, value=filename), + output, gr.update(visible=False) ] def handle_delete_character_confirm_click(character): - index = str(utils.get_available_characters().index(character)) - chat.delete_character(character) - output = chat.update_character_menu_after_deletion(index) - return [output, gr.update(visible=False)] + try: + index = str(utils.get_available_characters().index(character)) + chat.delete_character(character) + output = chat.update_character_menu_after_deletion(index) + except Exception: + output = gr.update() + traceback.print_exc() + + return [ + output, + gr.update(visible=False) + ] def handle_save_preset_click(state): @@ -104,15 +141,6 @@ def handle_save_preset_click(state): ] -def handle_save_preset_confirm_click(filename, contents): - utils.save_file(f"presets/{filename}.yaml", contents) - available_presets = utils.get_available_presets() - return [ - gr.update(choices=available_presets, value=filename), - gr.update(visible=False) - ] - - def handle_delete_preset_click(preset): return [ f"{preset}.yaml", From e1085180cf49b7c11d8138ff7e23e4712f97a5f4 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 21 Jul 2024 21:20:22 -0700 Subject: [PATCH 18/91] UI: better handle scrolling when the input area grows --- js/main.js | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/js/main.js b/js/main.js index 6b456517..bdbb7cef 100644 --- a/js/main.js +++ b/js/main.js @@ -445,14 +445,12 @@ function updateCssProperties() { // Check if the chat container is visible if (chatContainer.clientHeight > 0) { - - // Calculate new chat height and adjust CSS properties var numericHeight = chatContainer.parentNode.clientHeight - chatInputHeight + 40 - 100; if (document.getElementById("chat-tab").style.paddingBottom != "") { numericHeight += 20; } - const newChatHeight = `${numericHeight}px`; + const newChatHeight = `${numericHeight}px`; document.documentElement.style.setProperty("--chat-height", newChatHeight); document.documentElement.style.setProperty("--input-delta", `${chatInputHeight - 40}px`); @@ -463,15 +461,14 @@ function updateCssProperties() { // Adjust scrollTop based on input height change if (chatInputHeight !== currentChatInputHeight) { - chatContainer.scrollTop += chatInputHeight > currentChatInputHeight ? chatInputHeight : -chatInputHeight + 40; + chatContainer.scrollTop += chatInputHeight - currentChatInputHeight; currentChatInputHeight = chatInputHeight; } } } // Observe textarea size changes and call update function -new ResizeObserver(updateCssProperties) - .observe(document.querySelector("#chat-input textarea")); +new ResizeObserver(updateCssProperties).observe(document.querySelector("#chat-input textarea")); // Handle changes in window size window.addEventListener("resize", updateCssProperties); From 79e8dbe45f51135c54ad4d8427a9eca9d9544c3f Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 21 Jul 2024 22:06:49 -0700 Subject: [PATCH 19/91] UI: minor optimization --- modules/ui.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/modules/ui.py b/modules/ui.py index b1e4edaf..0b56c20b 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -116,6 +116,7 @@ def list_model_elements(): 'hqq_backend', 'cpp_runner', ] + if is_torch_xpu_available(): for i in range(torch.xpu.device_count()): elements.append(f'gpu_memory_{i}') @@ -214,9 +215,11 @@ def list_interface_input_elements(): def gather_interface_values(*args): + interface_elements = list_interface_input_elements() + output = {} - for i, element in enumerate(list_interface_input_elements()): - output[element] = args[i] + for element, value in zip(interface_elements, args): + output[element] = value if not shared.args.multi_user: shared.persistent_interface_state = output From 8768b69a2d6bf37278593584184f05de6ea0bb19 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 21 Jul 2024 22:08:14 -0700 Subject: [PATCH 20/91] Lint --- modules/ui_file_saving.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/ui_file_saving.py b/modules/ui_file_saving.py index b696b655..51fac69f 100644 --- a/modules/ui_file_saving.py +++ b/modules/ui_file_saving.py @@ -1,8 +1,9 @@ +import traceback + import gradio as gr from modules import chat, presets, shared, ui, utils from modules.utils import gradio -import traceback def create_ui(): From f2d802e70744c1d219eac82d97ac61a3e2f51813 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 22 Jul 2024 11:05:40 -0700 Subject: [PATCH 21/91] UI: make Default/Notebook contents persist on page reload --- modules/ui.py | 6 ++++++ modules/ui_default.py | 5 ++++- modules/ui_notebook.py | 10 ++++++++-- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/modules/ui.py b/modules/ui.py index 0b56c20b..cfe709fa 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -230,8 +230,14 @@ def gather_interface_values(*args): def apply_interface_values(state, use_persistent=False): if use_persistent: state = shared.persistent_interface_state + if 'textbox-default' in state: + state.pop('prompt_menu-default') + + if 'textbox-notebook' in state: + state.pop('prompt_menu-notebook') elements = list_interface_input_elements() + if len(state) == 0: return [gr.update() for k in elements] # Dummy, do nothing else: diff --git a/modules/ui_default.py b/modules/ui_default.py index 676b7fa5..112acd23 100644 --- a/modules/ui_default.py +++ b/modules/ui_default.py @@ -64,20 +64,23 @@ def create_event_handlers(): shared.gradio['Generate-default'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( + lambda state, left, right: state.update({'textbox-default': left, 'output_textbox': right}), gradio('interface_state', 'textbox-default', 'output_textbox'), None).then( None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['textbox-default'].submit( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( + lambda state, left, right: state.update({'textbox-default': left, 'output_textbox': right}), gradio('interface_state', 'textbox-default', 'output_textbox'), None).then( None, None, None, js=f'() => {{{ui.audio_notification_js}}}') - shared.gradio['markdown_render-default'].click(lambda x: x, gradio('output_textbox'), gradio('markdown-default'), queue=False) shared.gradio['Continue-default'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, [shared.gradio['output_textbox']] + gradio(inputs)[1:], gradio(outputs), show_progress=False).then( + lambda state, left, right: state.update({'textbox-default': left, 'output_textbox': right}), gradio('interface_state', 'textbox-default', 'output_textbox'), None).then( None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Stop-default'].click(stop_everything_event, None, None, queue=False) + shared.gradio['markdown_render-default'].click(lambda x: x, gradio('output_textbox'), gradio('markdown-default'), queue=False) shared.gradio['prompt_menu-default'].change(load_prompt, gradio('prompt_menu-default'), gradio('textbox-default'), show_progress=False) shared.gradio['save_prompt-default'].click(handle_save_prompt, gradio('textbox-default'), gradio('save_contents', 'save_filename', 'save_root', 'file_saver'), show_progress=False) shared.gradio['delete_prompt-default'].click(handle_delete_prompt, gradio('prompt_menu-default'), gradio('delete_filename', 'delete_root', 'file_deleter'), show_progress=False) diff --git a/modules/ui_notebook.py b/modules/ui_notebook.py index 8d4aa056..79932844 100644 --- a/modules/ui_notebook.py +++ b/modules/ui_notebook.py @@ -67,22 +67,28 @@ def create_event_handlers(): lambda x: x, gradio('textbox-notebook'), gradio('last_input-notebook')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( + lambda state, text: state.update({'textbox-notebook': text}), gradio('interface_state', 'textbox-notebook'), None).then( None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['textbox-notebook'].submit( lambda x: x, gradio('textbox-notebook'), gradio('last_input-notebook')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( + lambda state, text: state.update({'textbox-notebook': text}), gradio('interface_state', 'textbox-notebook'), None).then( None, None, None, js=f'() => {{{ui.audio_notification_js}}}') - shared.gradio['Undo'].click(lambda x: x, gradio('last_input-notebook'), gradio('textbox-notebook'), show_progress=False) - shared.gradio['markdown_render-notebook'].click(lambda x: x, gradio('textbox-notebook'), gradio('markdown-notebook'), queue=False) shared.gradio['Regenerate-notebook'].click( lambda x: x, gradio('last_input-notebook'), gradio('textbox-notebook'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( + lambda state, text: state.update({'textbox-notebook': text}), gradio('interface_state', 'textbox-notebook'), None).then( None, None, None, js=f'() => {{{ui.audio_notification_js}}}') + shared.gradio['Undo'].click( + lambda x: x, gradio('last_input-notebook'), gradio('textbox-notebook'), show_progress=False).then( + lambda state, text: state.update({'textbox-notebook': text}), gradio('interface_state', 'textbox-notebook'), None) + + shared.gradio['markdown_render-notebook'].click(lambda x: x, gradio('textbox-notebook'), gradio('markdown-notebook'), queue=False) shared.gradio['Stop-notebook'].click(stop_everything_event, None, None, queue=False) shared.gradio['prompt_menu-notebook'].change(load_prompt, gradio('prompt_menu-notebook'), gradio('textbox-notebook'), show_progress=False) shared.gradio['save_prompt-notebook'].click(handle_save_prompt, gradio('textbox-notebook'), gradio('save_contents', 'save_filename', 'save_root', 'file_saver'), show_progress=False) From 7d2449f8b09a1456a7e1cca3543c05802988629b Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 22 Jul 2024 11:49:20 -0700 Subject: [PATCH 22/91] Bump llama-cpp-python to 0.2.82.3 (unofficial build) --- requirements.txt | 24 ++++++++++++------------ requirements_amd.txt | 12 ++++++------ requirements_amd_noavx2.txt | 8 ++++---- requirements_apple_intel.txt | 8 ++++---- requirements_apple_silicon.txt | 12 ++++++------ requirements_cpu_only.txt | 8 ++++---- requirements_cpu_only_noavx2.txt | 8 ++++---- requirements_noavx2.txt | 24 ++++++++++++------------ 8 files changed, 52 insertions(+), 52 deletions(-) diff --git a/requirements.txt b/requirements.txt index 2cd4328b..8fc0396f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -35,22 +35,22 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, no tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # llama-cpp-python (CUDA, tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82.3+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82.3+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82.3+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82.3+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements_amd.txt b/requirements_amd.txt index 0d023242..cc19ccbf 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -32,14 +32,14 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.82+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.82+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.82.3+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.82.3+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 85e40814..b5319ad2 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -32,10 +32,10 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index a2381124..6d84a0aa 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -32,8 +32,8 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index ef1de35a..5d8237a6 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -32,10 +32,10 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index ada3242c..64691cf1 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index 8302ce06..d6d6868b 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index fb16ce81..81e20b7c 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -35,22 +35,22 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, no tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # llama-cpp-python (CUDA, tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82.3+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82.3+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82.3+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82.3+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" From 017d2332ea7af16ff6d8b95e0a1f5fd3e74b80c9 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 22 Jul 2024 11:50:36 -0700 Subject: [PATCH 23/91] Remove no longer necessary llama-cpp-python patch --- modules/llama_cpp_python_hijack.py | 46 ------------------------------ 1 file changed, 46 deletions(-) diff --git a/modules/llama_cpp_python_hijack.py b/modules/llama_cpp_python_hijack.py index 64280dc9..320404ff 100644 --- a/modules/llama_cpp_python_hijack.py +++ b/modules/llama_cpp_python_hijack.py @@ -1,8 +1,5 @@ import importlib import platform -from typing import Sequence - -from tqdm import tqdm from modules import shared from modules.cache_utils import process_llamacpp_cache @@ -49,48 +46,6 @@ def llama_cpp_lib(): return None -def eval_with_progress(self, tokens: Sequence[int]): - """ - A copy of - - https://github.com/abetlen/llama-cpp-python/blob/main/llama_cpp/llama.py - - with tqdm to show prompt processing progress. - """ - assert self._ctx.ctx is not None - assert self._batch.batch is not None - self._ctx.kv_cache_seq_rm(-1, self.n_tokens, -1) - - if len(tokens) > 1: - progress_bar = tqdm(range(0, len(tokens), self.n_batch), desc="Prompt evaluation", leave=False) - else: - progress_bar = range(0, len(tokens), self.n_batch) - - for i in progress_bar: - batch = tokens[i : min(len(tokens), i + self.n_batch)] - n_past = self.n_tokens - n_tokens = len(batch) - self._batch.set_batch( - batch=batch, n_past=n_past, logits_all=self.context_params.logits_all - ) - self._ctx.decode(self._batch) - # Save tokens - self.input_ids[n_past : n_past + n_tokens] = batch - # Save logits - if self.context_params.logits_all: - rows = n_tokens - cols = self._n_vocab - logits = self._ctx.get_logits()[: rows * cols] - self.scores[n_past : n_past + n_tokens, :].reshape(-1)[: :] = logits - else: - rows = 1 - cols = self._n_vocab - logits = self._ctx.get_logits()[: rows * cols] - self.scores[n_past + n_tokens - 1, :].reshape(-1)[: :] = logits - # Update n_tokens - self.n_tokens += n_tokens - - def monkey_patch_llama_cpp_python(lib): if getattr(lib.Llama, '_is_patched', False): # If the patch is already applied, do nothing @@ -107,7 +62,6 @@ def monkey_patch_llama_cpp_python(lib): for output in self.original_generate(*args, **kwargs): yield output - lib.Llama.eval = eval_with_progress lib.Llama.original_generate = lib.Llama.generate lib.Llama.generate = my_generate From a687f950ba33db0b937a5c3c91df18de217b520b Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 22 Jul 2024 11:52:40 -0700 Subject: [PATCH 24/91] Remove the tensorcores llama.cpp wheels They are not faster than the default wheels anymore and they use a lot of space. --- modules/llama_cpp_python_hijack.py | 2 -- modules/loaders.py | 2 -- modules/shared.py | 2 +- modules/ui.py | 1 - modules/ui_model_menu.py | 1 - requirements.txt | 6 ------ requirements_noavx2.txt | 6 ------ 7 files changed, 1 insertion(+), 19 deletions(-) diff --git a/modules/llama_cpp_python_hijack.py b/modules/llama_cpp_python_hijack.py index 320404ff..5d73befb 100644 --- a/modules/llama_cpp_python_hijack.py +++ b/modules/llama_cpp_python_hijack.py @@ -4,7 +4,6 @@ import platform from modules import shared from modules.cache_utils import process_llamacpp_cache - imported_module = None @@ -22,7 +21,6 @@ def llama_cpp_lib(): else: lib_names = [ ('cpu', 'llama_cpp'), - ('tensorcores', 'llama_cpp_cuda_tensorcores'), (None, 'llama_cpp_cuda'), (None, 'llama_cpp') ] diff --git a/modules/loaders.py b/modules/loaders.py index 75ed897b..b5d8777c 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -46,7 +46,6 @@ loaders_and_params = OrderedDict({ 'numa', 'no_offload_kqv', 'row_split', - 'tensorcores', 'flash_attn', 'streaming_llm', 'attention_sink_size', @@ -73,7 +72,6 @@ loaders_and_params = OrderedDict({ 'logits_all', 'no_offload_kqv', 'row_split', - 'tensorcores', 'flash_attn', 'streaming_llm', 'attention_sink_size', diff --git a/modules/shared.py b/modules/shared.py index 975e56c2..9dcd848a 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -118,7 +118,6 @@ group.add_argument('--quant_type', type=str, default='nf4', help='quant_type for # llama.cpp group = parser.add_argument_group('llama.cpp') group.add_argument('--flash-attn', action='store_true', help='Use flash-attention.') -group.add_argument('--tensorcores', action='store_true', help='Use llama-cpp-python compiled with tensor cores support. This increases performance on RTX cards. NVIDIA only.') group.add_argument('--n_ctx', type=int, default=2048, help='Size of the prompt context.') group.add_argument('--threads', type=int, default=0, help='Number of threads to use.') group.add_argument('--threads-batch', type=int, default=0, help='Number of threads to use for batches/prompt processing.') @@ -217,6 +216,7 @@ group.add_argument('--model_type', type=str, help='DEPRECATED') group.add_argument('--pre_layer', type=int, nargs='+', help='DEPRECATED') group.add_argument('--checkpoint', type=str, help='DEPRECATED') group.add_argument('--monkey-patch', action='store_true', help='DEPRECATED') +group.add_argument('--tensorcores', action='store_true', help='DEPRECATED') args = parser.parse_args() args_defaults = parser.parse_args([]) diff --git a/modules/ui.py b/modules/ui.py index cfe709fa..f4414597 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -109,7 +109,6 @@ def list_model_elements(): 'logits_all', 'no_offload_kqv', 'row_split', - 'tensorcores', 'flash_attn', 'streaming_llm', 'attention_sink_size', diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index cd245cf8..5cab4078 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -117,7 +117,6 @@ def create_ui(): shared.gradio['use_eager_attention'] = gr.Checkbox(label="use_eager_attention", value=shared.args.use_eager_attention, info='Set attn_implementation= eager while loading the model.') shared.gradio['flash_attn'] = gr.Checkbox(label="flash_attn", value=shared.args.flash_attn, info='Use flash-attention.') shared.gradio['auto_devices'] = gr.Checkbox(label="auto-devices", value=shared.args.auto_devices) - shared.gradio['tensorcores'] = gr.Checkbox(label="tensorcores", value=shared.args.tensorcores, info='NVIDIA only: use llama-cpp-python compiled with tensor cores support. This increases performance on RTX cards.') shared.gradio['cache_8bit'] = gr.Checkbox(label="cache_8bit", value=shared.args.cache_8bit, info='Use 8-bit cache to save VRAM.') shared.gradio['cache_4bit'] = gr.Checkbox(label="cache_4bit", value=shared.args.cache_4bit, info='Use Q4 cache to save VRAM.') shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming_llm", value=shared.args.streaming_llm, info='(experimental) Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.') diff --git a/requirements.txt b/requirements.txt index 8fc0396f..1052147f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -46,12 +46,6 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/te https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -# llama-cpp-python (CUDA, tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82.3+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82.3+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82.3+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82.3+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" - # CUDA wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 81e20b7c..93aae60b 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -46,12 +46,6 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/te https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -# llama-cpp-python (CUDA, tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82.3+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82.3+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82.3+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82.3+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" - # CUDA wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" From 0f53a736c19d288f2c9ca590b27f09bfffe2537c Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 22 Jul 2024 12:02:25 -0700 Subject: [PATCH 25/91] Revert the llama-cpp-python update --- modules/llama_cpp_python_hijack.py | 48 ++++++++++++++++++++++++++++++ modules/loaders.py | 2 ++ modules/shared.py | 2 +- modules/ui.py | 1 + modules/ui_model_menu.py | 1 + requirements.txt | 22 +++++++++----- requirements_amd.txt | 12 ++++---- requirements_amd_noavx2.txt | 8 ++--- requirements_apple_intel.txt | 8 ++--- requirements_apple_silicon.txt | 12 ++++---- requirements_cpu_only.txt | 8 ++--- requirements_cpu_only_noavx2.txt | 8 ++--- requirements_noavx2.txt | 22 +++++++++----- 13 files changed, 109 insertions(+), 45 deletions(-) diff --git a/modules/llama_cpp_python_hijack.py b/modules/llama_cpp_python_hijack.py index 5d73befb..64280dc9 100644 --- a/modules/llama_cpp_python_hijack.py +++ b/modules/llama_cpp_python_hijack.py @@ -1,9 +1,13 @@ import importlib import platform +from typing import Sequence + +from tqdm import tqdm from modules import shared from modules.cache_utils import process_llamacpp_cache + imported_module = None @@ -21,6 +25,7 @@ def llama_cpp_lib(): else: lib_names = [ ('cpu', 'llama_cpp'), + ('tensorcores', 'llama_cpp_cuda_tensorcores'), (None, 'llama_cpp_cuda'), (None, 'llama_cpp') ] @@ -44,6 +49,48 @@ def llama_cpp_lib(): return None +def eval_with_progress(self, tokens: Sequence[int]): + """ + A copy of + + https://github.com/abetlen/llama-cpp-python/blob/main/llama_cpp/llama.py + + with tqdm to show prompt processing progress. + """ + assert self._ctx.ctx is not None + assert self._batch.batch is not None + self._ctx.kv_cache_seq_rm(-1, self.n_tokens, -1) + + if len(tokens) > 1: + progress_bar = tqdm(range(0, len(tokens), self.n_batch), desc="Prompt evaluation", leave=False) + else: + progress_bar = range(0, len(tokens), self.n_batch) + + for i in progress_bar: + batch = tokens[i : min(len(tokens), i + self.n_batch)] + n_past = self.n_tokens + n_tokens = len(batch) + self._batch.set_batch( + batch=batch, n_past=n_past, logits_all=self.context_params.logits_all + ) + self._ctx.decode(self._batch) + # Save tokens + self.input_ids[n_past : n_past + n_tokens] = batch + # Save logits + if self.context_params.logits_all: + rows = n_tokens + cols = self._n_vocab + logits = self._ctx.get_logits()[: rows * cols] + self.scores[n_past : n_past + n_tokens, :].reshape(-1)[: :] = logits + else: + rows = 1 + cols = self._n_vocab + logits = self._ctx.get_logits()[: rows * cols] + self.scores[n_past + n_tokens - 1, :].reshape(-1)[: :] = logits + # Update n_tokens + self.n_tokens += n_tokens + + def monkey_patch_llama_cpp_python(lib): if getattr(lib.Llama, '_is_patched', False): # If the patch is already applied, do nothing @@ -60,6 +107,7 @@ def monkey_patch_llama_cpp_python(lib): for output in self.original_generate(*args, **kwargs): yield output + lib.Llama.eval = eval_with_progress lib.Llama.original_generate = lib.Llama.generate lib.Llama.generate = my_generate diff --git a/modules/loaders.py b/modules/loaders.py index b5d8777c..75ed897b 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -46,6 +46,7 @@ loaders_and_params = OrderedDict({ 'numa', 'no_offload_kqv', 'row_split', + 'tensorcores', 'flash_attn', 'streaming_llm', 'attention_sink_size', @@ -72,6 +73,7 @@ loaders_and_params = OrderedDict({ 'logits_all', 'no_offload_kqv', 'row_split', + 'tensorcores', 'flash_attn', 'streaming_llm', 'attention_sink_size', diff --git a/modules/shared.py b/modules/shared.py index 9dcd848a..975e56c2 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -118,6 +118,7 @@ group.add_argument('--quant_type', type=str, default='nf4', help='quant_type for # llama.cpp group = parser.add_argument_group('llama.cpp') group.add_argument('--flash-attn', action='store_true', help='Use flash-attention.') +group.add_argument('--tensorcores', action='store_true', help='Use llama-cpp-python compiled with tensor cores support. This increases performance on RTX cards. NVIDIA only.') group.add_argument('--n_ctx', type=int, default=2048, help='Size of the prompt context.') group.add_argument('--threads', type=int, default=0, help='Number of threads to use.') group.add_argument('--threads-batch', type=int, default=0, help='Number of threads to use for batches/prompt processing.') @@ -216,7 +217,6 @@ group.add_argument('--model_type', type=str, help='DEPRECATED') group.add_argument('--pre_layer', type=int, nargs='+', help='DEPRECATED') group.add_argument('--checkpoint', type=str, help='DEPRECATED') group.add_argument('--monkey-patch', action='store_true', help='DEPRECATED') -group.add_argument('--tensorcores', action='store_true', help='DEPRECATED') args = parser.parse_args() args_defaults = parser.parse_args([]) diff --git a/modules/ui.py b/modules/ui.py index f4414597..cfe709fa 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -109,6 +109,7 @@ def list_model_elements(): 'logits_all', 'no_offload_kqv', 'row_split', + 'tensorcores', 'flash_attn', 'streaming_llm', 'attention_sink_size', diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 5cab4078..cd245cf8 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -117,6 +117,7 @@ def create_ui(): shared.gradio['use_eager_attention'] = gr.Checkbox(label="use_eager_attention", value=shared.args.use_eager_attention, info='Set attn_implementation= eager while loading the model.') shared.gradio['flash_attn'] = gr.Checkbox(label="flash_attn", value=shared.args.flash_attn, info='Use flash-attention.') shared.gradio['auto_devices'] = gr.Checkbox(label="auto-devices", value=shared.args.auto_devices) + shared.gradio['tensorcores'] = gr.Checkbox(label="tensorcores", value=shared.args.tensorcores, info='NVIDIA only: use llama-cpp-python compiled with tensor cores support. This increases performance on RTX cards.') shared.gradio['cache_8bit'] = gr.Checkbox(label="cache_8bit", value=shared.args.cache_8bit, info='Use 8-bit cache to save VRAM.') shared.gradio['cache_4bit'] = gr.Checkbox(label="cache_4bit", value=shared.args.cache_4bit, info='Use Q4 cache to save VRAM.') shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming_llm", value=shared.args.streaming_llm, info='(experimental) Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.') diff --git a/requirements.txt b/requirements.txt index 1052147f..2cd4328b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -35,16 +35,22 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, no tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" + +# llama-cpp-python (CUDA, tensor cores) +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements_amd.txt b/requirements_amd.txt index cc19ccbf..0d023242 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -32,14 +32,14 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.82.3+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.82.3+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.82+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.82+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index b5319ad2..85e40814 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -32,10 +32,10 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 6d84a0aa..a2381124 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -32,8 +32,8 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 5d8237a6..ef1de35a 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -32,10 +32,10 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index 64691cf1..ada3242c 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index d6d6868b..8302ce06 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 93aae60b..fb16ce81 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -35,16 +35,22 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, no tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" + +# llama-cpp-python (CUDA, tensor cores) +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" From 11bbf71aa59315444244356017ea02321c3b81de Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 22 Jul 2024 16:19:41 -0300 Subject: [PATCH 26/91] Bump back llama-cpp-python (#6257) --- modules/llama_cpp_python_hijack.py | 48 ------------------------------ modules/loaders.py | 2 -- modules/shared.py | 2 +- modules/ui.py | 1 - modules/ui_model_menu.py | 1 - requirements.txt | 22 +++++--------- requirements_amd.txt | 12 ++++---- requirements_amd_noavx2.txt | 8 ++--- requirements_apple_intel.txt | 8 ++--- requirements_apple_silicon.txt | 12 ++++---- requirements_cpu_only.txt | 8 ++--- requirements_cpu_only_noavx2.txt | 8 ++--- requirements_noavx2.txt | 22 +++++--------- 13 files changed, 45 insertions(+), 109 deletions(-) diff --git a/modules/llama_cpp_python_hijack.py b/modules/llama_cpp_python_hijack.py index 64280dc9..5d73befb 100644 --- a/modules/llama_cpp_python_hijack.py +++ b/modules/llama_cpp_python_hijack.py @@ -1,13 +1,9 @@ import importlib import platform -from typing import Sequence - -from tqdm import tqdm from modules import shared from modules.cache_utils import process_llamacpp_cache - imported_module = None @@ -25,7 +21,6 @@ def llama_cpp_lib(): else: lib_names = [ ('cpu', 'llama_cpp'), - ('tensorcores', 'llama_cpp_cuda_tensorcores'), (None, 'llama_cpp_cuda'), (None, 'llama_cpp') ] @@ -49,48 +44,6 @@ def llama_cpp_lib(): return None -def eval_with_progress(self, tokens: Sequence[int]): - """ - A copy of - - https://github.com/abetlen/llama-cpp-python/blob/main/llama_cpp/llama.py - - with tqdm to show prompt processing progress. - """ - assert self._ctx.ctx is not None - assert self._batch.batch is not None - self._ctx.kv_cache_seq_rm(-1, self.n_tokens, -1) - - if len(tokens) > 1: - progress_bar = tqdm(range(0, len(tokens), self.n_batch), desc="Prompt evaluation", leave=False) - else: - progress_bar = range(0, len(tokens), self.n_batch) - - for i in progress_bar: - batch = tokens[i : min(len(tokens), i + self.n_batch)] - n_past = self.n_tokens - n_tokens = len(batch) - self._batch.set_batch( - batch=batch, n_past=n_past, logits_all=self.context_params.logits_all - ) - self._ctx.decode(self._batch) - # Save tokens - self.input_ids[n_past : n_past + n_tokens] = batch - # Save logits - if self.context_params.logits_all: - rows = n_tokens - cols = self._n_vocab - logits = self._ctx.get_logits()[: rows * cols] - self.scores[n_past : n_past + n_tokens, :].reshape(-1)[: :] = logits - else: - rows = 1 - cols = self._n_vocab - logits = self._ctx.get_logits()[: rows * cols] - self.scores[n_past + n_tokens - 1, :].reshape(-1)[: :] = logits - # Update n_tokens - self.n_tokens += n_tokens - - def monkey_patch_llama_cpp_python(lib): if getattr(lib.Llama, '_is_patched', False): # If the patch is already applied, do nothing @@ -107,7 +60,6 @@ def monkey_patch_llama_cpp_python(lib): for output in self.original_generate(*args, **kwargs): yield output - lib.Llama.eval = eval_with_progress lib.Llama.original_generate = lib.Llama.generate lib.Llama.generate = my_generate diff --git a/modules/loaders.py b/modules/loaders.py index 75ed897b..b5d8777c 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -46,7 +46,6 @@ loaders_and_params = OrderedDict({ 'numa', 'no_offload_kqv', 'row_split', - 'tensorcores', 'flash_attn', 'streaming_llm', 'attention_sink_size', @@ -73,7 +72,6 @@ loaders_and_params = OrderedDict({ 'logits_all', 'no_offload_kqv', 'row_split', - 'tensorcores', 'flash_attn', 'streaming_llm', 'attention_sink_size', diff --git a/modules/shared.py b/modules/shared.py index 975e56c2..9dcd848a 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -118,7 +118,6 @@ group.add_argument('--quant_type', type=str, default='nf4', help='quant_type for # llama.cpp group = parser.add_argument_group('llama.cpp') group.add_argument('--flash-attn', action='store_true', help='Use flash-attention.') -group.add_argument('--tensorcores', action='store_true', help='Use llama-cpp-python compiled with tensor cores support. This increases performance on RTX cards. NVIDIA only.') group.add_argument('--n_ctx', type=int, default=2048, help='Size of the prompt context.') group.add_argument('--threads', type=int, default=0, help='Number of threads to use.') group.add_argument('--threads-batch', type=int, default=0, help='Number of threads to use for batches/prompt processing.') @@ -217,6 +216,7 @@ group.add_argument('--model_type', type=str, help='DEPRECATED') group.add_argument('--pre_layer', type=int, nargs='+', help='DEPRECATED') group.add_argument('--checkpoint', type=str, help='DEPRECATED') group.add_argument('--monkey-patch', action='store_true', help='DEPRECATED') +group.add_argument('--tensorcores', action='store_true', help='DEPRECATED') args = parser.parse_args() args_defaults = parser.parse_args([]) diff --git a/modules/ui.py b/modules/ui.py index cfe709fa..f4414597 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -109,7 +109,6 @@ def list_model_elements(): 'logits_all', 'no_offload_kqv', 'row_split', - 'tensorcores', 'flash_attn', 'streaming_llm', 'attention_sink_size', diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index cd245cf8..5cab4078 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -117,7 +117,6 @@ def create_ui(): shared.gradio['use_eager_attention'] = gr.Checkbox(label="use_eager_attention", value=shared.args.use_eager_attention, info='Set attn_implementation= eager while loading the model.') shared.gradio['flash_attn'] = gr.Checkbox(label="flash_attn", value=shared.args.flash_attn, info='Use flash-attention.') shared.gradio['auto_devices'] = gr.Checkbox(label="auto-devices", value=shared.args.auto_devices) - shared.gradio['tensorcores'] = gr.Checkbox(label="tensorcores", value=shared.args.tensorcores, info='NVIDIA only: use llama-cpp-python compiled with tensor cores support. This increases performance on RTX cards.') shared.gradio['cache_8bit'] = gr.Checkbox(label="cache_8bit", value=shared.args.cache_8bit, info='Use 8-bit cache to save VRAM.') shared.gradio['cache_4bit'] = gr.Checkbox(label="cache_4bit", value=shared.args.cache_4bit, info='Use Q4 cache to save VRAM.') shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming_llm", value=shared.args.streaming_llm, info='(experimental) Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.') diff --git a/requirements.txt b/requirements.txt index 2cd4328b..1052147f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -35,22 +35,16 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, no tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" - -# llama-cpp-python (CUDA, tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements_amd.txt b/requirements_amd.txt index 0d023242..cc19ccbf 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -32,14 +32,14 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.82+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.82+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.82.3+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.82.3+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 85e40814..b5319ad2 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -32,10 +32,10 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index a2381124..6d84a0aa 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -32,8 +32,8 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index ef1de35a..5d8237a6 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -32,10 +32,10 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index ada3242c..64691cf1 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index 8302ce06..d6d6868b 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index fb16ce81..93aae60b 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -35,22 +35,16 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, no tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" - -# llama-cpp-python (CUDA, tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" From aa809e420eacedbe6caeaf641af621f32d99dc6e Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 22 Jul 2024 18:05:11 -0700 Subject: [PATCH 27/91] Bump llama-cpp-python to 0.2.83, add back tensorcore wheels Also add back the progress bar patch --- modules/llama_cpp_python_hijack.py | 48 ++++++++++++++++++++++++++++++ modules/loaders.py | 2 ++ modules/shared.py | 2 +- modules/ui.py | 1 + modules/ui_model_menu.py | 1 + requirements.txt | 22 +++++++++----- requirements_amd.txt | 12 ++++---- requirements_amd_noavx2.txt | 8 ++--- requirements_apple_intel.txt | 8 ++--- requirements_apple_silicon.txt | 12 ++++---- requirements_cpu_only.txt | 8 ++--- requirements_cpu_only_noavx2.txt | 8 ++--- requirements_noavx2.txt | 22 +++++++++----- 13 files changed, 109 insertions(+), 45 deletions(-) diff --git a/modules/llama_cpp_python_hijack.py b/modules/llama_cpp_python_hijack.py index 5d73befb..64280dc9 100644 --- a/modules/llama_cpp_python_hijack.py +++ b/modules/llama_cpp_python_hijack.py @@ -1,9 +1,13 @@ import importlib import platform +from typing import Sequence + +from tqdm import tqdm from modules import shared from modules.cache_utils import process_llamacpp_cache + imported_module = None @@ -21,6 +25,7 @@ def llama_cpp_lib(): else: lib_names = [ ('cpu', 'llama_cpp'), + ('tensorcores', 'llama_cpp_cuda_tensorcores'), (None, 'llama_cpp_cuda'), (None, 'llama_cpp') ] @@ -44,6 +49,48 @@ def llama_cpp_lib(): return None +def eval_with_progress(self, tokens: Sequence[int]): + """ + A copy of + + https://github.com/abetlen/llama-cpp-python/blob/main/llama_cpp/llama.py + + with tqdm to show prompt processing progress. + """ + assert self._ctx.ctx is not None + assert self._batch.batch is not None + self._ctx.kv_cache_seq_rm(-1, self.n_tokens, -1) + + if len(tokens) > 1: + progress_bar = tqdm(range(0, len(tokens), self.n_batch), desc="Prompt evaluation", leave=False) + else: + progress_bar = range(0, len(tokens), self.n_batch) + + for i in progress_bar: + batch = tokens[i : min(len(tokens), i + self.n_batch)] + n_past = self.n_tokens + n_tokens = len(batch) + self._batch.set_batch( + batch=batch, n_past=n_past, logits_all=self.context_params.logits_all + ) + self._ctx.decode(self._batch) + # Save tokens + self.input_ids[n_past : n_past + n_tokens] = batch + # Save logits + if self.context_params.logits_all: + rows = n_tokens + cols = self._n_vocab + logits = self._ctx.get_logits()[: rows * cols] + self.scores[n_past : n_past + n_tokens, :].reshape(-1)[: :] = logits + else: + rows = 1 + cols = self._n_vocab + logits = self._ctx.get_logits()[: rows * cols] + self.scores[n_past + n_tokens - 1, :].reshape(-1)[: :] = logits + # Update n_tokens + self.n_tokens += n_tokens + + def monkey_patch_llama_cpp_python(lib): if getattr(lib.Llama, '_is_patched', False): # If the patch is already applied, do nothing @@ -60,6 +107,7 @@ def monkey_patch_llama_cpp_python(lib): for output in self.original_generate(*args, **kwargs): yield output + lib.Llama.eval = eval_with_progress lib.Llama.original_generate = lib.Llama.generate lib.Llama.generate = my_generate diff --git a/modules/loaders.py b/modules/loaders.py index b5d8777c..75ed897b 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -46,6 +46,7 @@ loaders_and_params = OrderedDict({ 'numa', 'no_offload_kqv', 'row_split', + 'tensorcores', 'flash_attn', 'streaming_llm', 'attention_sink_size', @@ -72,6 +73,7 @@ loaders_and_params = OrderedDict({ 'logits_all', 'no_offload_kqv', 'row_split', + 'tensorcores', 'flash_attn', 'streaming_llm', 'attention_sink_size', diff --git a/modules/shared.py b/modules/shared.py index 9dcd848a..975e56c2 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -118,6 +118,7 @@ group.add_argument('--quant_type', type=str, default='nf4', help='quant_type for # llama.cpp group = parser.add_argument_group('llama.cpp') group.add_argument('--flash-attn', action='store_true', help='Use flash-attention.') +group.add_argument('--tensorcores', action='store_true', help='Use llama-cpp-python compiled with tensor cores support. This increases performance on RTX cards. NVIDIA only.') group.add_argument('--n_ctx', type=int, default=2048, help='Size of the prompt context.') group.add_argument('--threads', type=int, default=0, help='Number of threads to use.') group.add_argument('--threads-batch', type=int, default=0, help='Number of threads to use for batches/prompt processing.') @@ -216,7 +217,6 @@ group.add_argument('--model_type', type=str, help='DEPRECATED') group.add_argument('--pre_layer', type=int, nargs='+', help='DEPRECATED') group.add_argument('--checkpoint', type=str, help='DEPRECATED') group.add_argument('--monkey-patch', action='store_true', help='DEPRECATED') -group.add_argument('--tensorcores', action='store_true', help='DEPRECATED') args = parser.parse_args() args_defaults = parser.parse_args([]) diff --git a/modules/ui.py b/modules/ui.py index f4414597..cfe709fa 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -109,6 +109,7 @@ def list_model_elements(): 'logits_all', 'no_offload_kqv', 'row_split', + 'tensorcores', 'flash_attn', 'streaming_llm', 'attention_sink_size', diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 5cab4078..cd245cf8 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -117,6 +117,7 @@ def create_ui(): shared.gradio['use_eager_attention'] = gr.Checkbox(label="use_eager_attention", value=shared.args.use_eager_attention, info='Set attn_implementation= eager while loading the model.') shared.gradio['flash_attn'] = gr.Checkbox(label="flash_attn", value=shared.args.flash_attn, info='Use flash-attention.') shared.gradio['auto_devices'] = gr.Checkbox(label="auto-devices", value=shared.args.auto_devices) + shared.gradio['tensorcores'] = gr.Checkbox(label="tensorcores", value=shared.args.tensorcores, info='NVIDIA only: use llama-cpp-python compiled with tensor cores support. This increases performance on RTX cards.') shared.gradio['cache_8bit'] = gr.Checkbox(label="cache_8bit", value=shared.args.cache_8bit, info='Use 8-bit cache to save VRAM.') shared.gradio['cache_4bit'] = gr.Checkbox(label="cache_4bit", value=shared.args.cache_4bit, info='Use Q4 cache to save VRAM.') shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming_llm", value=shared.args.streaming_llm, info='(experimental) Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.') diff --git a/requirements.txt b/requirements.txt index 1052147f..340443a4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -35,16 +35,22 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, no tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.83+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.83+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.83+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.83+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" + +# llama-cpp-python (CUDA, tensor cores) +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements_amd.txt b/requirements_amd.txt index cc19ccbf..423fac0e 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -32,14 +32,14 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.82.3+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.82.3+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.83+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.83+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index b5319ad2..e771bf8a 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -32,10 +32,10 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 6d84a0aa..f19b9e74 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -32,8 +32,8 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 5d8237a6..429e65a6 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -32,10 +32,10 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82.3-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index 64691cf1..28e5b039 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index d6d6868b..09bd8def 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 93aae60b..df96bad6 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -35,16 +35,22 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82.3+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, no tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82.3+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.83+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.83+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.83+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.83+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" + +# llama-cpp-python (CUDA, tensor cores) +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" From f18c947a86c99751adc83fdcb9bb1114578f071a Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 22 Jul 2024 18:06:41 -0700 Subject: [PATCH 28/91] Update the tensorcores description --- modules/shared.py | 2 +- modules/ui_model_menu.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/shared.py b/modules/shared.py index 975e56c2..dec427dd 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -118,7 +118,7 @@ group.add_argument('--quant_type', type=str, default='nf4', help='quant_type for # llama.cpp group = parser.add_argument_group('llama.cpp') group.add_argument('--flash-attn', action='store_true', help='Use flash-attention.') -group.add_argument('--tensorcores', action='store_true', help='Use llama-cpp-python compiled with tensor cores support. This increases performance on RTX cards. NVIDIA only.') +group.add_argument('--tensorcores', action='store_true', help='NVIDIA only: use llama-cpp-python compiled with tensor cores support. This may increase performance on newer cards.') group.add_argument('--n_ctx', type=int, default=2048, help='Size of the prompt context.') group.add_argument('--threads', type=int, default=0, help='Number of threads to use.') group.add_argument('--threads-batch', type=int, default=0, help='Number of threads to use for batches/prompt processing.') diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index cd245cf8..54ac9b12 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -117,7 +117,7 @@ def create_ui(): shared.gradio['use_eager_attention'] = gr.Checkbox(label="use_eager_attention", value=shared.args.use_eager_attention, info='Set attn_implementation= eager while loading the model.') shared.gradio['flash_attn'] = gr.Checkbox(label="flash_attn", value=shared.args.flash_attn, info='Use flash-attention.') shared.gradio['auto_devices'] = gr.Checkbox(label="auto-devices", value=shared.args.auto_devices) - shared.gradio['tensorcores'] = gr.Checkbox(label="tensorcores", value=shared.args.tensorcores, info='NVIDIA only: use llama-cpp-python compiled with tensor cores support. This increases performance on RTX cards.') + shared.gradio['tensorcores'] = gr.Checkbox(label="tensorcores", value=shared.args.tensorcores, info='NVIDIA only: use llama-cpp-python compiled with tensor cores support. This may increase performance on newer cards.') shared.gradio['cache_8bit'] = gr.Checkbox(label="cache_8bit", value=shared.args.cache_8bit, info='Use 8-bit cache to save VRAM.') shared.gradio['cache_4bit'] = gr.Checkbox(label="cache_4bit", value=shared.args.cache_4bit, info='Use Q4 cache to save VRAM.') shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming_llm", value=shared.args.streaming_llm, info='(experimental) Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.') From 7e73058943161e3cd6b1ce7eb87f7975c9ac9598 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 22 Jul 2024 18:18:02 -0700 Subject: [PATCH 29/91] UI: fix h1/h2/h3/h4 color in light mode --- css/main.css | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/css/main.css b/css/main.css index 1e339af3..d8e12e59 100644 --- a/css/main.css +++ b/css/main.css @@ -62,10 +62,6 @@ ol li p, ul li p { border: 0; } -.gradio-container-3-18-0 .prose * h1, h2, h3, h4 { - color: white; -} - .gradio-container { max-width: 100% !important; padding-top: 0 !important; @@ -403,6 +399,13 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { padding-bottom: 15px !important; } +.message-body h1, +.message-body h2, +.message-body h3, +.message-body h4 { + color: var(--body-text-color); +} + .message-body li { list-style-position: outside; } @@ -805,4 +808,3 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { max-height: 300px; } } - From 5c5e7264ec49a19ce9843a8718db59f06d5e2db9 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 22 Jul 2024 18:19:08 -0700 Subject: [PATCH 30/91] Update README --- README.md | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 1b8de70a..40ae94d5 100644 --- a/README.md +++ b/README.md @@ -204,16 +204,16 @@ List of command-line flags usage: server.py [-h] [--multi-user] [--character CHARACTER] [--model MODEL] [--lora LORA [LORA ...]] [--model-dir MODEL_DIR] [--lora-dir LORA_DIR] [--model-menu] [--settings SETTINGS] [--extensions EXTENSIONS [EXTENSIONS ...]] [--verbose] [--chat-buttons] [--idle-timeout IDLE_TIMEOUT] [--loader LOADER] [--cpu] [--auto-devices] [--gpu-memory GPU_MEMORY [GPU_MEMORY ...]] [--cpu-memory CPU_MEMORY] [--disk] [--disk-cache-dir DISK_CACHE_DIR] [--load-in-8bit] [--bf16] [--no-cache] [--trust-remote-code] - [--force-safetensors] [--no_use_fast] [--use_flash_attention_2] [--load-in-4bit] [--use_double_quant] [--compute_dtype COMPUTE_DTYPE] [--quant_type QUANT_TYPE] [--flash-attn] - [--tensorcores] [--n_ctx N_CTX] [--threads THREADS] [--threads-batch THREADS_BATCH] [--no_mul_mat_q] [--n_batch N_BATCH] [--no-mmap] [--mlock] [--n-gpu-layers N_GPU_LAYERS] - [--tensor_split TENSOR_SPLIT] [--numa] [--logits_all] [--no_offload_kqv] [--cache-capacity CACHE_CAPACITY] [--row_split] [--streaming-llm] [--attention-sink-size ATTENTION_SINK_SIZE] - [--gpu-split GPU_SPLIT] [--autosplit] [--max_seq_len MAX_SEQ_LEN] [--cfg-cache] [--no_flash_attn] [--cache_8bit] [--cache_4bit] [--num_experts_per_token NUM_EXPERTS_PER_TOKEN] - [--triton] [--no_inject_fused_mlp] [--no_use_cuda_fp16] [--desc_act] [--disable_exllama] [--disable_exllamav2] [--wbits WBITS] [--groupsize GROUPSIZE] [--no_inject_fused_attention] - [--hqq-backend HQQ_BACKEND] [--deepspeed] [--nvme-offload-dir NVME_OFFLOAD_DIR] [--local_rank LOCAL_RANK] [--alpha_value ALPHA_VALUE] [--rope_freq_base ROPE_FREQ_BASE] - [--compress_pos_emb COMPRESS_POS_EMB] [--listen] [--listen-port LISTEN_PORT] [--listen-host LISTEN_HOST] [--share] [--auto-launch] [--gradio-auth GRADIO_AUTH] - [--gradio-auth-path GRADIO_AUTH_PATH] [--ssl-keyfile SSL_KEYFILE] [--ssl-certfile SSL_CERTFILE] [--api] [--public-api] [--public-api-id PUBLIC_API_ID] [--api-port API_PORT] - [--api-key API_KEY] [--admin-key ADMIN_KEY] [--nowebui] [--multimodal-pipeline MULTIMODAL_PIPELINE] [--model_type MODEL_TYPE] [--pre_layer PRE_LAYER [PRE_LAYER ...]] - [--checkpoint CHECKPOINT] [--monkey-patch] + [--force-safetensors] [--no_use_fast] [--use_flash_attention_2] [--use_eager_attention] [--load-in-4bit] [--use_double_quant] [--compute_dtype COMPUTE_DTYPE] [--quant_type QUANT_TYPE] + [--flash-attn] [--tensorcores] [--n_ctx N_CTX] [--threads THREADS] [--threads-batch THREADS_BATCH] [--no_mul_mat_q] [--n_batch N_BATCH] [--no-mmap] [--mlock] + [--n-gpu-layers N_GPU_LAYERS] [--tensor_split TENSOR_SPLIT] [--numa] [--logits_all] [--no_offload_kqv] [--cache-capacity CACHE_CAPACITY] [--row_split] [--streaming-llm] + [--attention-sink-size ATTENTION_SINK_SIZE] [--gpu-split GPU_SPLIT] [--autosplit] [--max_seq_len MAX_SEQ_LEN] [--cfg-cache] [--no_flash_attn] [--no_xformers] [--no_sdpa] + [--cache_8bit] [--cache_4bit] [--num_experts_per_token NUM_EXPERTS_PER_TOKEN] [--triton] [--no_inject_fused_mlp] [--no_use_cuda_fp16] [--desc_act] [--disable_exllama] + [--disable_exllamav2] [--wbits WBITS] [--groupsize GROUPSIZE] [--no_inject_fused_attention] [--hqq-backend HQQ_BACKEND] [--cpp-runner] [--deepspeed] + [--nvme-offload-dir NVME_OFFLOAD_DIR] [--local_rank LOCAL_RANK] [--alpha_value ALPHA_VALUE] [--rope_freq_base ROPE_FREQ_BASE] [--compress_pos_emb COMPRESS_POS_EMB] [--listen] + [--listen-port LISTEN_PORT] [--listen-host LISTEN_HOST] [--share] [--auto-launch] [--gradio-auth GRADIO_AUTH] [--gradio-auth-path GRADIO_AUTH_PATH] [--ssl-keyfile SSL_KEYFILE] + [--ssl-certfile SSL_CERTFILE] [--subpath SUBPATH] [--api] [--public-api] [--public-api-id PUBLIC_API_ID] [--api-port API_PORT] [--api-key API_KEY] [--admin-key ADMIN_KEY] [--nowebui] + [--multimodal-pipeline MULTIMODAL_PIPELINE] [--model_type MODEL_TYPE] [--pre_layer PRE_LAYER [PRE_LAYER ...]] [--checkpoint CHECKPOINT] [--monkey-patch] Text generation web UI @@ -254,6 +254,7 @@ Transformers/Accelerate: --force-safetensors Set use_safetensors=True while loading the model. This prevents arbitrary code execution. --no_use_fast Set use_fast=False while loading the tokenizer (it's True by default). Use this if you have any problems related to use_fast. --use_flash_attention_2 Set use_flash_attention_2=True while loading the model. + --use_eager_attention Set attn_implementation= eager while loading the model. bitsandbytes 4-bit: --load-in-4bit Load the model with 4-bit precision (using bitsandbytes). @@ -263,7 +264,7 @@ bitsandbytes 4-bit: llama.cpp: --flash-attn Use flash-attention. - --tensorcores Use llama-cpp-python compiled with tensor cores support. This increases performance on RTX cards. NVIDIA only. + --tensorcores NVIDIA only: use llama-cpp-python compiled with tensor cores support. This may increase performance on newer cards. --n_ctx N_CTX Size of the prompt context. --threads THREADS Number of threads to use. --threads-batch THREADS_BATCH Number of threads to use for batches/prompt processing. @@ -272,7 +273,7 @@ llama.cpp: --no-mmap Prevent mmap from being used. --mlock Force the system to keep the model in RAM. --n-gpu-layers N_GPU_LAYERS Number of layers to offload to the GPU. - --tensor_split TENSOR_SPLIT Split the model across multiple GPUs. Comma-separated list of proportions. Example: 18,17. + --tensor_split TENSOR_SPLIT Split the model across multiple GPUs. Comma-separated list of proportions. Example: 60,40. --numa Activate NUMA task allocation for llama.cpp. --logits_all Needs to be set for perplexity evaluation to work. Otherwise, ignore it, as it makes prompt processing slower. --no_offload_kqv Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance. @@ -287,6 +288,8 @@ ExLlamaV2: --max_seq_len MAX_SEQ_LEN Maximum sequence length. --cfg-cache ExLlamav2_HF: Create an additional cache for CFG negative prompts. Necessary to use CFG with that loader. --no_flash_attn Force flash-attention to not be used. + --no_xformers Force xformers to not be used. + --no_sdpa Force Torch SDPA to not be used. --cache_8bit Use 8-bit cache to save VRAM. --cache_4bit Use Q4 cache to save VRAM. --num_experts_per_token NUM_EXPERTS_PER_TOKEN Number of experts to use for generation. Applies to MoE models like Mixtral. @@ -307,6 +310,9 @@ AutoAWQ: HQQ: --hqq-backend HQQ_BACKEND Backend for the HQQ loader. Valid options: PYTORCH, PYTORCH_COMPILE, ATEN. +TensorRT-LLM: + --cpp-runner Use the ModelRunnerCpp runner, which is faster than the default ModelRunner but doesn't support streaming yet. + DeepSpeed: --deepspeed Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration. --nvme-offload-dir NVME_OFFLOAD_DIR DeepSpeed: Directory to use for ZeRO-3 NVME offloading. @@ -327,6 +333,7 @@ Gradio: --gradio-auth-path GRADIO_AUTH_PATH Set the Gradio authentication file path. The file should contain one or more user:password pairs in the same format as above. --ssl-keyfile SSL_KEYFILE The path to the SSL certificate key file. --ssl-certfile SSL_CERTFILE The path to the SSL certificate cert file. + --subpath SUBPATH Customize the subpath for gradio, use with reverse proxy API: --api Enable the API extension. @@ -392,18 +399,11 @@ Run `python download-model.py --help` to see all the options. https://colab.research.google.com/github/oobabooga/text-generation-webui/blob/main/Colab-TextGen-GPU.ipynb -## Acknowledgment - -In August 2023, [Andreessen Horowitz](https://a16z.com/) (a16z) provided a generous grant to encourage and support my independent work on this project. I am **extremely** grateful for their trust and recognition. - -## Links - -#### Community +## Community * Subreddit: https://www.reddit.com/r/oobabooga/ * Discord: https://discord.gg/jwZCF2dPQN -#### Support +## Acknowledgment -* ko-fi: https://ko-fi.com/oobabooga -* GitHub Sponsors: https://github.com/sponsors/oobabooga +In August 2023, [Andreessen Horowitz](https://a16z.com/) (a16z) provided a generous grant to encourage and support my independent work on this project. I am **extremely** grateful for their trust and recognition. From 5e7f4ee88a3c8209ce98931462a730c5fd729480 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 22 Jul 2024 19:11:55 -0700 Subject: [PATCH 31/91] UI: simplify the interface load events --- server.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/server.py b/server.py index 2794caa1..57e26be8 100644 --- a/server.py +++ b/server.py @@ -146,12 +146,21 @@ def create_interface(): ui_model_menu.create_event_handlers() # Interface launch events - shared.gradio['interface'].load(None, None, None, js=f"() => {{if ({str(shared.settings['dark_theme']).lower()}) {{ document.getElementsByTagName('body')[0].classList.add('dark'); }} }}") - shared.gradio['interface'].load(None, None, None, js=f"() => {{{js}}}") - shared.gradio['interface'].load(None, gradio('show_controls'), None, js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}') shared.gradio['interface'].load( - partial(ui.apply_interface_values, {}, use_persistent=True), None, gradio(ui.list_interface_input_elements()), show_progress=False).then( - chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display'), show_progress=False) + None, + gradio('show_controls'), + None, + js=f"""(x) => {{ + if ({str(shared.settings['dark_theme']).lower()}) {{ + document.getElementsByTagName('body')[0].classList.add('dark'); + }} + {js} + {ui.show_controls_js} + toggle_controls(x); + }}""" + ) + + shared.gradio['interface'].load(partial(ui.apply_interface_values, {}, use_persistent=True), None, gradio(ui.list_interface_input_elements()), show_progress=False) extensions_module.create_extensions_tabs() # Extensions tabs extensions_module.create_extensions_block() # Extensions block From 3ee682208ca4d9aaedc5b237ad4a87b90f39114b Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 22 Jul 2024 19:53:56 -0700 Subject: [PATCH 32/91] Revert "Bump hqq from 0.1.7.post3 to 0.1.8 (#6238)" This reverts commit 1c3671699c83424fb48adb7929d007f6e9056eaa. --- requirements.txt | 2 +- requirements_amd.txt | 2 +- requirements_amd_noavx2.txt | 2 +- requirements_apple_intel.txt | 2 +- requirements_apple_silicon.txt | 2 +- requirements_cpu_only.txt | 2 +- requirements_cpu_only_noavx2.txt | 2 +- requirements_noavx2.txt | 2 +- requirements_nowheels.txt | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/requirements.txt b/requirements.txt index 340443a4..0c3f4690 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ colorama datasets einops gradio==4.26.* -hqq==0.1.8 +hqq==0.1.7.post3 jinja2==3.1.4 lm_eval==0.3.0 markdown diff --git a/requirements_amd.txt b/requirements_amd.txt index 423fac0e..7c9f4cda 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -3,7 +3,7 @@ colorama datasets einops gradio==4.26.* -hqq==0.1.8 +hqq==0.1.7.post3 jinja2==3.1.4 lm_eval==0.3.0 markdown diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index e771bf8a..cfe3a8e0 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -3,7 +3,7 @@ colorama datasets einops gradio==4.26.* -hqq==0.1.8 +hqq==0.1.7.post3 jinja2==3.1.4 lm_eval==0.3.0 markdown diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index f19b9e74..a020387f 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -3,7 +3,7 @@ colorama datasets einops gradio==4.26.* -hqq==0.1.8 +hqq==0.1.7.post3 jinja2==3.1.4 lm_eval==0.3.0 markdown diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 429e65a6..9f59a487 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -3,7 +3,7 @@ colorama datasets einops gradio==4.26.* -hqq==0.1.8 +hqq==0.1.7.post3 jinja2==3.1.4 lm_eval==0.3.0 markdown diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index 28e5b039..6110eab6 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -3,7 +3,7 @@ colorama datasets einops gradio==4.26.* -hqq==0.1.8 +hqq==0.1.7.post3 jinja2==3.1.4 lm_eval==0.3.0 markdown diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index 09bd8def..d4591919 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -3,7 +3,7 @@ colorama datasets einops gradio==4.26.* -hqq==0.1.8 +hqq==0.1.7.post3 jinja2==3.1.4 lm_eval==0.3.0 markdown diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index df96bad6..8a486ef4 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -6,7 +6,7 @@ colorama datasets einops gradio==4.26.* -hqq==0.1.8 +hqq==0.1.7.post3 jinja2==3.1.4 lm_eval==0.3.0 markdown diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index 7aa3971a..14e3aa88 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -3,7 +3,7 @@ colorama datasets einops gradio==4.26.* -hqq==0.1.8 +hqq==0.1.7.post3 jinja2==3.1.4 lm_eval==0.3.0 markdown From 95b3e98c360e9e334cc75f8109e679d9cd331d1e Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 22 Jul 2024 23:06:06 -0700 Subject: [PATCH 33/91] UI: Fix code syntax highlighting --- css/highlightjs/github-dark.min.css | 105 +++++++++++++++++++++++++++- css/highlightjs/github.min.css | 105 +++++++++++++++++++++++++++- 2 files changed, 206 insertions(+), 4 deletions(-) diff --git a/css/highlightjs/github-dark.min.css b/css/highlightjs/github-dark.min.css index 03b6da8b..469b94a3 100644 --- a/css/highlightjs/github-dark.min.css +++ b/css/highlightjs/github-dark.min.css @@ -1,4 +1,14 @@ -pre code.hljs{display:block;overflow-x:auto;padding:1em}code.hljs{padding:3px 5px}/*! +html body gradio-app .gradio-container pre code.hljs { + display: block; + overflow-x: auto; + padding: 1em +} + +html body gradio-app .gradio-container code.hljs { + padding: 3px 5px +} + +/*! Theme: GitHub Dark Description: Dark theme as seen on github.com Author: github.com @@ -7,4 +17,95 @@ pre code.hljs{display:block;overflow-x:auto;padding:1em}code.hljs{padding:3px 5p Outdated base version: https://github.com/primer/github-syntax-dark Current colors taken from GitHub's CSS -*/.hljs{color:#c9d1d9;background:#0d1117}.hljs-doctag,.hljs-keyword,.hljs-meta .hljs-keyword,.hljs-template-tag,.hljs-template-variable,.hljs-type,.hljs-variable.language_{color:#ff7b72}.hljs-title,.hljs-title.class_,.hljs-title.class_.inherited__,.hljs-title.function_{color:#d2a8ff}.hljs-attr,.hljs-attribute,.hljs-literal,.hljs-meta,.hljs-number,.hljs-operator,.hljs-selector-attr,.hljs-selector-class,.hljs-selector-id,.hljs-variable{color:#79c0ff}.hljs-meta .hljs-string,.hljs-regexp,.hljs-string{color:#a5d6ff}.hljs-built_in,.hljs-symbol{color:#ffa657}.hljs-code,.hljs-comment,.hljs-formula{color:#8b949e}.hljs-name,.hljs-quote,.hljs-selector-pseudo,.hljs-selector-tag{color:#7ee787}.hljs-subst{color:#c9d1d9}.hljs-section{color:#1f6feb;font-weight:700}.hljs-bullet{color:#f2cc60}.hljs-emphasis{color:#c9d1d9;font-style:italic}.hljs-strong{color:#c9d1d9;font-weight:700}.hljs-addition{color:#aff5b4;background-color:#033a16}.hljs-deletion{color:#ffdcd7;background-color:#67060c} \ No newline at end of file +*/ +html body gradio-app .gradio-container .hljs { + color: #c9d1d9; + background: #0d1117 +} + +html body gradio-app .gradio-container .hljs-doctag, +html body gradio-app .gradio-container .hljs-keyword, +html body gradio-app .gradio-container .hljs-meta .hljs-keyword, +html body gradio-app .gradio-container .hljs-template-tag, +html body gradio-app .gradio-container .hljs-template-variable, +html body gradio-app .gradio-container .hljs-type, +html body gradio-app .gradio-container .hljs-variable.language_ { + color: #ff7b72 +} + +html body gradio-app .gradio-container .hljs-title, +html body gradio-app .gradio-container .hljs-title.class_, +html body gradio-app .gradio-container .hljs-title.class_.inherited__, +html body gradio-app .gradio-container .hljs-title.function_ { + color: #d2a8ff +} + +html body gradio-app .gradio-container .hljs-attr, +html body gradio-app .gradio-container .hljs-attribute, +html body gradio-app .gradio-container .hljs-literal, +html body gradio-app .gradio-container .hljs-meta, +html body gradio-app .gradio-container .hljs-number, +html body gradio-app .gradio-container .hljs-operator, +html body gradio-app .gradio-container .hljs-selector-attr, +html body gradio-app .gradio-container .hljs-selector-class, +html body gradio-app .gradio-container .hljs-selector-id, +html body gradio-app .gradio-container .hljs-variable { + color: #79c0ff +} + +html body gradio-app .gradio-container .hljs-meta .hljs-string, +html body gradio-app .gradio-container .hljs-regexp, +html body gradio-app .gradio-container .hljs-string { + color: #a5d6ff +} + +html body gradio-app .gradio-container .hljs-built_in, +html body gradio-app .gradio-container .hljs-symbol { + color: #ffa657 +} + +html body gradio-app .gradio-container .hljs-code, +html body gradio-app .gradio-container .hljs-comment, +html body gradio-app .gradio-container .hljs-formula { + color: #8b949e +} + +html body gradio-app .gradio-container .hljs-name, +html body gradio-app .gradio-container .hljs-quote, +html body gradio-app .gradio-container .hljs-selector-pseudo, +html body gradio-app .gradio-container .hljs-selector-tag { + color: #7ee787 +} + +html body gradio-app .gradio-container .hljs-subst { + color: #c9d1d9 +} + +html body gradio-app .gradio-container .hljs-section { + color: #1f6feb; + font-weight: 700 +} + +html body gradio-app .gradio-container .hljs-bullet { + color: #f2cc60 +} + +html body gradio-app .gradio-container .hljs-emphasis { + color: #c9d1d9; + font-style: italic +} + +html body gradio-app .gradio-container .hljs-strong { + color: #c9d1d9; + font-weight: 700 +} + +html body gradio-app .gradio-container .hljs-addition { + color: #aff5b4; + background-color: #033a16 +} + +html body gradio-app .gradio-container .hljs-deletion { + color: #ffdcd7; + background-color: #67060c +} diff --git a/css/highlightjs/github.min.css b/css/highlightjs/github.min.css index 96af2848..fbc58cca 100644 --- a/css/highlightjs/github.min.css +++ b/css/highlightjs/github.min.css @@ -1,4 +1,14 @@ -pre code.hljs{display:block;overflow-x:auto;padding:1em}code.hljs{padding:3px 5px}/*! +html body gradio-app .gradio-container pre code.hljs { + display: block; + overflow-x: auto; + padding: 1em +} + +html body gradio-app .gradio-container code.hljs { + padding: 3px 5px +} + +/*! Theme: GitHub Description: Light theme as seen on github.com Author: github.com @@ -7,4 +17,95 @@ pre code.hljs{display:block;overflow-x:auto;padding:1em}code.hljs{padding:3px 5p Outdated base version: https://github.com/primer/github-syntax-light Current colors taken from GitHub's CSS -*/.hljs{color:#24292e;background:#fff}.hljs-doctag,.hljs-keyword,.hljs-meta .hljs-keyword,.hljs-template-tag,.hljs-template-variable,.hljs-type,.hljs-variable.language_{color:#d73a49}.hljs-title,.hljs-title.class_,.hljs-title.class_.inherited__,.hljs-title.function_{color:#6f42c1}.hljs-attr,.hljs-attribute,.hljs-literal,.hljs-meta,.hljs-number,.hljs-operator,.hljs-selector-attr,.hljs-selector-class,.hljs-selector-id,.hljs-variable{color:#005cc5}.hljs-meta .hljs-string,.hljs-regexp,.hljs-string{color:#032f62}.hljs-built_in,.hljs-symbol{color:#e36209}.hljs-code,.hljs-comment,.hljs-formula{color:#6a737d}.hljs-name,.hljs-quote,.hljs-selector-pseudo,.hljs-selector-tag{color:#22863a}.hljs-subst{color:#24292e}.hljs-section{color:#005cc5;font-weight:700}.hljs-bullet{color:#735c0f}.hljs-emphasis{color:#24292e;font-style:italic}.hljs-strong{color:#24292e;font-weight:700}.hljs-addition{color:#22863a;background-color:#f0fff4}.hljs-deletion{color:#b31d28;background-color:#ffeef0} +*/ +html body gradio-app .gradio-container .hljs { + color: #24292e; + background: #fff +} + +html body gradio-app .gradio-container .hljs-doctag, +html body gradio-app .gradio-container .hljs-keyword, +html body gradio-app .gradio-container .hljs-meta .hljs-keyword, +html body gradio-app .gradio-container .hljs-template-tag, +html body gradio-app .gradio-container .hljs-template-variable, +html body gradio-app .gradio-container .hljs-type, +html body gradio-app .gradio-container .hljs-variable.language_ { + color: #d73a49 +} + +html body gradio-app .gradio-container .hljs-title, +html body gradio-app .gradio-container .hljs-title.class_, +html body gradio-app .gradio-container .hljs-title.class_.inherited__, +html body gradio-app .gradio-container .hljs-title.function_ { + color: #6f42c1 +} + +html body gradio-app .gradio-container .hljs-attr, +html body gradio-app .gradio-container .hljs-attribute, +html body gradio-app .gradio-container .hljs-literal, +html body gradio-app .gradio-container .hljs-meta, +html body gradio-app .gradio-container .hljs-number, +html body gradio-app .gradio-container .hljs-operator, +html body gradio-app .gradio-container .hljs-selector-attr, +html body gradio-app .gradio-container .hljs-selector-class, +html body gradio-app .gradio-container .hljs-selector-id, +html body gradio-app .gradio-container .hljs-variable { + color: #005cc5 +} + +html body gradio-app .gradio-container .hljs-meta .hljs-string, +html body gradio-app .gradio-container .hljs-regexp, +html body gradio-app .gradio-container .hljs-string { + color: #032f62 +} + +html body gradio-app .gradio-container .hljs-built_in, +html body gradio-app .gradio-container .hljs-symbol { + color: #e36209 +} + +html body gradio-app .gradio-container .hljs-code, +html body gradio-app .gradio-container .hljs-comment, +html body gradio-app .gradio-container .hljs-formula { + color: #6a737d +} + +html body gradio-app .gradio-container .hljs-name, +html body gradio-app .gradio-container .hljs-quote, +html body gradio-app .gradio-container .hljs-selector-pseudo, +html body gradio-app .gradio-container .hljs-selector-tag { + color: #22863a +} + +html body gradio-app .gradio-container .hljs-subst { + color: #24292e +} + +html body gradio-app .gradio-container .hljs-section { + color: #005cc5; + font-weight: 700 +} + +html body gradio-app .gradio-container .hljs-bullet { + color: #735c0f +} + +html body gradio-app .gradio-container .hljs-emphasis { + color: #24292e; + font-style: italic +} + +html body gradio-app .gradio-container .hljs-strong { + color: #24292e; + font-weight: 700 +} + +html body gradio-app .gradio-container .hljs-addition { + color: #22863a; + background-color: #f0fff4 +} + +html body gradio-app .gradio-container .hljs-deletion { + color: #b31d28; + background-color: #ffeef0 +} From f66ab63d64a7ce9dfa411fd150052355d56b5b70 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 23 Jul 2024 14:06:34 -0700 Subject: [PATCH 34/91] Bump transformers to 4.43 --- requirements.txt | 2 +- requirements_amd.txt | 2 +- requirements_amd_noavx2.txt | 2 +- requirements_apple_intel.txt | 2 +- requirements_apple_silicon.txt | 2 +- requirements_cpu_only.txt | 2 +- requirements_cpu_only_noavx2.txt | 2 +- requirements_noavx2.txt | 2 +- requirements_nowheels.txt | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/requirements.txt b/requirements.txt index 0c3f4690..6b1d6247 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,7 +24,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.42.* +transformers==4.43.* tqdm wandb diff --git a/requirements_amd.txt b/requirements_amd.txt index 7c9f4cda..b392089d 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.42.* +transformers==4.43.* tqdm wandb diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index cfe3a8e0..5aadd8c9 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.42.* +transformers==4.43.* tqdm wandb diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index a020387f..a166d4f6 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.42.* +transformers==4.43.* tqdm wandb diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 9f59a487..45511a8f 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.42.* +transformers==4.43.* tqdm wandb diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index 6110eab6..d4913ac8 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.42.* +transformers==4.43.* tqdm wandb diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index d4591919..b468adaf 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.42.* +transformers==4.43.* tqdm wandb diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 8a486ef4..09ee3257 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -24,7 +24,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.42.* +transformers==4.43.* tqdm wandb diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index 14e3aa88..bc8a59aa 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.42.* +transformers==4.43.* tqdm wandb From e6181e834ab0b32baa19a55773f369dc9a64802d Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 23 Jul 2024 15:26:02 -0700 Subject: [PATCH 35/91] Remove AutoAWQ as a standalone loader (it works better through transformers) --- modules/LoRA.py | 2 -- modules/loaders.py | 10 ---------- modules/models.py | 19 ------------------- modules/models_settings.py | 2 -- modules/shared.py | 9 ++------- modules/ui.py | 1 - modules/ui_model_menu.py | 1 - 7 files changed, 2 insertions(+), 42 deletions(-) diff --git a/modules/LoRA.py b/modules/LoRA.py index eda5e406..117022cf 100644 --- a/modules/LoRA.py +++ b/modules/LoRA.py @@ -72,8 +72,6 @@ def add_lora_autogptq(lora_names): else: if len(lora_names) > 1: logger.warning('AutoGPTQ can only work with 1 LoRA at the moment. Only the first one in the list will be loaded.') - if not shared.args.no_inject_fused_attention: - logger.warning('Fused Attention + AutoGPTQ may break Lora loading. Disable it.') peft_config = GPTQLoraConfig( inference_mode=True, diff --git a/modules/loaders.py b/modules/loaders.py index 75ed897b..549de5fb 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -127,15 +127,6 @@ loaders_and_params = OrderedDict({ 'no_use_fast', 'autogptq_info', ], - 'AutoAWQ': [ - 'cpu_memory', - 'gpu_memory', - 'auto_devices', - 'max_seq_len', - 'no_inject_fused_attention', - 'trust_remote_code', - 'no_use_fast', - ], 'HQQ': [ 'hqq_backend', 'trust_remote_code', @@ -200,7 +191,6 @@ def transformers_samplers(): loaders_samplers = { 'Transformers': transformers_samplers(), 'AutoGPTQ': transformers_samplers(), - 'AutoAWQ': transformers_samplers(), 'HQQ': transformers_samplers(), 'ExLlamav2': { 'temperature', diff --git a/modules/models.py b/modules/models.py index 07c14308..ea046e9b 100644 --- a/modules/models.py +++ b/modules/models.py @@ -75,7 +75,6 @@ def load_model(model_name, loader=None): 'llamacpp_HF': llamacpp_HF_loader, 'ExLlamav2': ExLlamav2_loader, 'ExLlamav2_HF': ExLlamav2_HF_loader, - 'AutoAWQ': AutoAWQ_loader, 'HQQ': HQQ_loader, 'TensorRT-LLM': TensorRT_LLM_loader, } @@ -292,24 +291,6 @@ def llamacpp_HF_loader(model_name): return model -def AutoAWQ_loader(model_name): - from awq import AutoAWQForCausalLM - - model_dir = Path(f'{shared.args.model_dir}/{model_name}') - - model = AutoAWQForCausalLM.from_quantized( - quant_path=model_dir, - max_new_tokens=shared.args.max_seq_len, - trust_remote_code=shared.args.trust_remote_code, - fuse_layers=not shared.args.no_inject_fused_attention, - max_memory=get_max_memory_dict(), - batch_size=1, - safetensors=any(model_dir.glob('*.safetensors')), - ) - - return model - - def AutoGPTQ_loader(model_name): import modules.AutoGPTQ_loader diff --git a/modules/models_settings.py b/modules/models_settings.py index 7ae68125..1bb00ceb 100644 --- a/modules/models_settings.py +++ b/modules/models_settings.py @@ -180,8 +180,6 @@ def infer_loader(model_name, model_settings): loader = None elif (path_to_model / 'quantize_config.json').exists() or ('wbits' in model_settings and isinstance(model_settings['wbits'], int) and model_settings['wbits'] > 0): loader = 'ExLlamav2_HF' - elif (path_to_model / 'quant_config.json').exists() or re.match(r'.*-awq', model_name.lower()): - loader = 'AutoAWQ' elif len(list(path_to_model.glob('*.gguf'))) > 0 and path_to_model.is_dir() and (path_to_model / 'tokenizer_config.json').exists(): loader = 'llamacpp_HF' elif len(list(path_to_model.glob('*.gguf'))) > 0: diff --git a/modules/shared.py b/modules/shared.py index dec427dd..fe09a165 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -89,7 +89,7 @@ group.add_argument('--idle-timeout', type=int, default=0, help='Unload model aft # Model loader group = parser.add_argument_group('Model loader') -group.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, llamacpp_HF, ExLlamav2_HF, ExLlamav2, AutoGPTQ, AutoAWQ.') +group.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, llamacpp_HF, ExLlamav2_HF, ExLlamav2, AutoGPTQ.') # Transformers/Accelerate group = parser.add_argument_group('Transformers/Accelerate') @@ -160,10 +160,6 @@ group.add_argument('--disable_exllamav2', action='store_true', help='Disable ExL group.add_argument('--wbits', type=int, default=0, help='Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported.') group.add_argument('--groupsize', type=int, default=-1, help='Group size.') -# AutoAWQ -group = parser.add_argument_group('AutoAWQ') -group.add_argument('--no_inject_fused_attention', action='store_true', help='Disable the use of fused attention, which will use less VRAM at the cost of slower inference.') - # HQQ group = parser.add_argument_group('HQQ') group.add_argument('--hqq-backend', type=str, default='PYTORCH_COMPILE', help='Backend for the HQQ loader. Valid options: PYTORCH, PYTORCH_COMPILE, ATEN.') @@ -217,6 +213,7 @@ group.add_argument('--model_type', type=str, help='DEPRECATED') group.add_argument('--pre_layer', type=int, nargs='+', help='DEPRECATED') group.add_argument('--checkpoint', type=str, help='DEPRECATED') group.add_argument('--monkey-patch', action='store_true', help='DEPRECATED') +group.add_argument('--no_inject_fused_attention', action='store_true', help='DEPRECATED') args = parser.parse_args() args_defaults = parser.parse_args([]) @@ -267,8 +264,6 @@ def fix_loader_name(name): return 'ExLlamav2' elif name in ['exllamav2-hf', 'exllamav2_hf', 'exllama-v2-hf', 'exllama_v2_hf', 'exllama-v2_hf', 'exllama2-hf', 'exllama2_hf', 'exllama-2-hf', 'exllama_2_hf', 'exllama-2_hf']: return 'ExLlamav2_HF' - elif name in ['autoawq', 'awq', 'auto-awq']: - return 'AutoAWQ' elif name in ['hqq']: return 'HQQ' elif name in ['tensorrt', 'tensorrtllm', 'tensorrt_llm', 'tensorrt-llm', 'tensort', 'tensortllm']: diff --git a/modules/ui.py b/modules/ui.py index cfe709fa..47f92cf0 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -78,7 +78,6 @@ def list_model_elements(): 'groupsize', 'triton', 'desc_act', - 'no_inject_fused_attention', 'no_inject_fused_mlp', 'no_use_cuda_fp16', 'disable_exllama', diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 54ac9b12..2938c120 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -127,7 +127,6 @@ def create_ui(): shared.gradio['no_offload_kqv'] = gr.Checkbox(label="no_offload_kqv", value=shared.args.no_offload_kqv, info='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.') shared.gradio['no_mul_mat_q'] = gr.Checkbox(label="no_mul_mat_q", value=shared.args.no_mul_mat_q, info='Disable the mulmat kernels.') shared.gradio['triton'] = gr.Checkbox(label="triton", value=shared.args.triton) - shared.gradio['no_inject_fused_attention'] = gr.Checkbox(label="no_inject_fused_attention", value=shared.args.no_inject_fused_attention, info='Disable fused attention. Fused attention improves inference performance but uses more VRAM. Fuses layers for AutoAWQ. Disable if running low on VRAM.') shared.gradio['no_inject_fused_mlp'] = gr.Checkbox(label="no_inject_fused_mlp", value=shared.args.no_inject_fused_mlp, info='Affects Triton only. Disable fused MLP. Fused MLP improves performance but uses more VRAM. Disable if running low on VRAM.') shared.gradio['no_use_cuda_fp16'] = gr.Checkbox(label="no_use_cuda_fp16", value=shared.args.no_use_cuda_fp16, info='This can make models faster on some systems.') shared.gradio['desc_act'] = gr.Checkbox(label="desc_act", value=shared.args.desc_act, info='\'desc_act\', \'wbits\', and \'groupsize\' are used for old models without a quantize_config.json.') From 1815877061e87c9926079d79f85af12612be5d33 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 23 Jul 2024 18:48:10 -0700 Subject: [PATCH 36/91] UI: fix the default character not loading correctly on startup --- modules/ui_chat.py | 2 +- server.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/ui_chat.py b/modules/ui_chat.py index 7085f5cd..8b370d86 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -84,7 +84,7 @@ def create_ui(): shared.gradio['start_with'] = gr.Textbox(label='Start reply with', placeholder='Sure thing!', value=shared.settings['start_with'], elem_classes=['add_scrollbar']) with gr.Row(): - shared.gradio['mode'] = gr.Radio(choices=['chat', 'chat-instruct', 'instruct'], label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template Parameters > Instruction template is used.', elem_id='chat-mode') + shared.gradio['mode'] = gr.Radio(choices=['chat', 'chat-instruct', 'instruct'], value=shared.settings['mode'] if shared.settings['mode'] in ['chat', 'chat-instruct'] else None, label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template Parameters > Instruction template is used.', elem_id='chat-mode') with gr.Row(): shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label='Chat style', value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct') diff --git a/server.py b/server.py index 57e26be8..d6069d5e 100644 --- a/server.py +++ b/server.py @@ -90,7 +90,7 @@ def create_interface(): # Force some events to be triggered on page load shared.persistent_interface_state.update({ 'loader': shared.args.loader or 'Transformers', - 'mode': shared.settings['mode'], + 'mode': shared.settings['mode'] if shared.settings['mode'] == 'instruct' else gr.update(), 'character_menu': shared.args.character or shared.settings['character'], 'instruction_template_str': shared.settings['instruction_template_str'], 'prompt_menu-default': shared.settings['prompt-default'], From e777b7334943e6d16e71750c1da1beb536bb153a Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 23 Jul 2024 19:04:19 -0700 Subject: [PATCH 37/91] UI: prevent LaTeX from being rendered for inline "$" --- js/main.js | 2 -- 1 file changed, 2 deletions(-) diff --git a/js/main.js b/js/main.js index bdbb7cef..3b8b13e4 100644 --- a/js/main.js +++ b/js/main.js @@ -213,12 +213,10 @@ function doSyntaxHighlighting() { renderMathInElement(element, { delimiters: [ { left: "$$", right: "$$", display: true }, - { left: "$", right: "$", display: false }, { left: "\\(", right: "\\)", display: false }, { left: "\\[", right: "\\]", display: true }, ], }); - }); observer.observe(targetElement, config); From 8b52b93e8566a7268eb487ba7d81def9e9227ded Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 23 Jul 2024 19:35:00 -0700 Subject: [PATCH 38/91] Make the Google Colab notebook functional again (attempt) --- Colab-TextGen-GPU.ipynb | 49 ++++++++++++++--------------------------- 1 file changed, 16 insertions(+), 33 deletions(-) diff --git a/Colab-TextGen-GPU.ipynb b/Colab-TextGen-GPU.ipynb index 82e6c18e..739232a4 100644 --- a/Colab-TextGen-GPU.ipynb +++ b/Colab-TextGen-GPU.ipynb @@ -22,7 +22,7 @@ "source": [ "# oobabooga/text-generation-webui\n", "\n", - "After running both cells, a public gradio URL will appear at the bottom in a few minutes. You can optionally generate an API link.\n", + "After running both cells, a public gradio URL will appear at the bottom in around 10 minutes. You can optionally generate an API link.\n", "\n", "* Project page: https://github.com/oobabooga/text-generation-webui\n", "* Gradio server status: https://status.gradio.app/" @@ -53,43 +53,27 @@ "\n", "#@markdown If unsure about the branch, write \"main\" or leave it blank.\n", "\n", - "import torch\n", + "import os\n", "from pathlib import Path\n", "\n", + "os.environ.pop('PYTHONPATH', None)\n", + "\n", "if Path.cwd().name != 'text-generation-webui':\n", - " print(\"Installing the webui...\")\n", + " print(\"\\033[1;32;1m\\n --> Installing the web UI. This will take a while, but after the initial setup, you can download and test as many models as you like.\\033[0;37;0m\\n\")\n", "\n", " !git clone https://github.com/oobabooga/text-generation-webui\n", " %cd text-generation-webui\n", "\n", - " torver = torch.__version__\n", - " print(f\"TORCH: {torver}\")\n", - " is_cuda118 = '+cu118' in torver # 2.1.0+cu118\n", - "\n", - " if is_cuda118:\n", - " !python -m pip install --upgrade torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cu118\n", - " else:\n", - " !python -m pip install --upgrade torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cu121\n", - "\n", - " textgen_requirements = open('requirements.txt').read().splitlines()\n", - " if is_cuda118:\n", - " textgen_requirements = [req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') for req in textgen_requirements]\n", - " with open('temp_requirements.txt', 'w') as file:\n", - " file.write('\\n'.join(textgen_requirements))\n", - "\n", - " !pip install -r temp_requirements.txt --upgrade\n", - "\n", - " print(\"\\033[1;32;1m\\n --> If you see a warning about \\\"previously imported packages\\\", just ignore it.\\033[0;37;0m\")\n", - " print(\"\\033[1;32;1m\\n --> There is no need to restart the runtime.\\n\\033[0;37;0m\")\n", - "\n", - " try:\n", - " import flash_attn\n", - " except:\n", - " !pip uninstall -y flash_attn\n", + " # Install the project in an isolated environment\n", + " !GPU_CHOICE=A \\\n", + " USE_CUDA118=FALSE \\\n", + " LAUNCH_AFTER_INSTALL=FALSE \\\n", + " INSTALL_EXTENSIONS=FALSE \\\n", + " ./start_linux.sh\n", "\n", "# Parameters\n", - "model_url = \"https://huggingface.co/TheBloke/MythoMax-L2-13B-GPTQ\" #@param {type:\"string\"}\n", - "branch = \"gptq-4bit-32g-actorder_True\" #@param {type:\"string\"}\n", + "model_url = \"https://huggingface.co/turboderp/gemma-2-9b-it-exl2\" #@param {type:\"string\"}\n", + "branch = \"8.0bpw\" #@param {type:\"string\"}\n", "command_line_flags = \"--n-gpu-layers 128 --load-in-4bit --use_double_quant\" #@param {type:\"string\"}\n", "api = False #@param {type:\"boolean\"}\n", "\n", @@ -116,11 +100,10 @@ " output_folder = \"\"\n", "\n", "# Start the web UI\n", - "cmd = f\"python server.py --share\"\n", + "cmd = f\"./start_linux.sh {command_line_flags} --share\"\n", "if output_folder != \"\":\n", " cmd += f\" --model {output_folder}\"\n", - "cmd += f\" {command_line_flags}\"\n", - "print(cmd)\n", + "\n", "!$cmd" ], "metadata": { @@ -131,4 +114,4 @@ "outputs": [] } ] -} +} \ No newline at end of file From 9d5513fda0f5a78db0fc03262c6b3fdc72e166b0 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 23 Jul 2024 19:38:04 -0700 Subject: [PATCH 39/91] Remove the AutoAWQ requirement --- requirements.txt | 1 - requirements_amd.txt | 2 -- requirements_amd_noavx2.txt | 2 -- requirements_noavx2.txt | 1 - 4 files changed, 6 deletions(-) diff --git a/requirements.txt b/requirements.txt index 6b1d6247..db73b8b1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -62,4 +62,3 @@ https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -autoawq==0.2.5; platform_system == "Linux" or platform_system == "Windows" diff --git a/requirements_amd.txt b/requirements_amd.txt index b392089d..600db9b4 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -43,5 +43,3 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/ro https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" -https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.5/autoawq-0.2.5+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.5/autoawq-0.2.5+rocm561-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 5aadd8c9..4f148c94 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -41,5 +41,3 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" -https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.5/autoawq-0.2.5+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.5/autoawq-0.2.5+rocm561-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 09ee3257..603fb0b8 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -62,4 +62,3 @@ https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -autoawq==0.2.5; platform_system == "Linux" or platform_system == "Windows" From af839d20acee2710022d735d2d1dc9cb48696f73 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 23 Jul 2024 19:38:04 -0700 Subject: [PATCH 40/91] Remove the AutoAWQ requirement --- requirements.txt | 1 - requirements_amd.txt | 2 -- requirements_amd_noavx2.txt | 2 -- requirements_noavx2.txt | 1 - 4 files changed, 6 deletions(-) diff --git a/requirements.txt b/requirements.txt index 0c3f4690..1f952872 100644 --- a/requirements.txt +++ b/requirements.txt @@ -62,4 +62,3 @@ https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -autoawq==0.2.5; platform_system == "Linux" or platform_system == "Windows" diff --git a/requirements_amd.txt b/requirements_amd.txt index 7c9f4cda..e85e3e0c 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -43,5 +43,3 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/ro https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" -https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.5/autoawq-0.2.5+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.5/autoawq-0.2.5+rocm561-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index cfe3a8e0..35c9a485 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -41,5 +41,3 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" -https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.5/autoawq-0.2.5+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.5/autoawq-0.2.5+rocm561-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 8a486ef4..2249b0a8 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -62,4 +62,3 @@ https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -autoawq==0.2.5; platform_system == "Linux" or platform_system == "Windows" From 98ed6d3a666e3924410d34568b3e1919709656a2 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 23 Jul 2024 19:50:56 -0700 Subject: [PATCH 41/91] Don't use flash attention on Google Colab --- Colab-TextGen-GPU.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Colab-TextGen-GPU.ipynb b/Colab-TextGen-GPU.ipynb index 739232a4..8e305e1d 100644 --- a/Colab-TextGen-GPU.ipynb +++ b/Colab-TextGen-GPU.ipynb @@ -74,7 +74,7 @@ "# Parameters\n", "model_url = \"https://huggingface.co/turboderp/gemma-2-9b-it-exl2\" #@param {type:\"string\"}\n", "branch = \"8.0bpw\" #@param {type:\"string\"}\n", - "command_line_flags = \"--n-gpu-layers 128 --load-in-4bit --use_double_quant\" #@param {type:\"string\"}\n", + "command_line_flags = \"--n-gpu-layers 128 --load-in-4bit --use_double_quant --no_flash_attn\" #@param {type:\"string\"}\n", "api = False #@param {type:\"boolean\"}\n", "\n", "if api:\n", @@ -114,4 +114,4 @@ "outputs": [] } ] -} \ No newline at end of file +} From e637b702ff9d6955e830fe96b94bf5313e9f2703 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 23 Jul 2024 21:29:30 -0700 Subject: [PATCH 42/91] UI: make text between quotes colored in chat mode --- css/main.css | 12 ++++++++++++ modules/chat.py | 2 +- modules/html_generator.py | 26 ++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/css/main.css b/css/main.css index d8e12e59..6f2a9fb7 100644 --- a/css/main.css +++ b/css/main.css @@ -406,6 +406,18 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { color: var(--body-text-color); } +.message q { + color: #707070; +} + +.dark .message q { + color: orange; +} + +.message q::before, .message q::after { + content: ""; +} + .message-body li { list-style-position: outside; } diff --git a/modules/chat.py b/modules/chat.py index c95673ce..9919cb76 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -488,7 +488,7 @@ def start_new_chat(state): greeting = replace_character_names(state['greeting'], state['name1'], state['name2']) if greeting != '': history['internal'] += [['<|BEGIN-VISIBLE-CHAT|>', greeting]] - history['visible'] += [['', apply_extensions('output', greeting, state, is_chat=True)]] + history['visible'] += [['', apply_extensions('output', html.escape(greeting), state, is_chat=True)]] unique_id = datetime.now().strftime('%Y%m%d-%H-%M-%S') save_history(history, unique_id, state['character_menu'], state['mode']) diff --git a/modules/html_generator.py b/modules/html_generator.py index 657133bd..61e61b0f 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -42,6 +42,29 @@ def fix_newlines(string): return string +def replace_quotes(text): + + # Define a list of quote pairs (opening and closing), using HTML entities + quote_pairs = [ + ('"', '"'), # Double quotes + ('“', '”'), # Unicode left and right double quotation marks + ('‘', '’'), # Unicode left and right single quotation marks + ('«', '»'), # French quotes + ('„', '“'), # German quotes + ('‘', '’'), # Alternative single quotes + ('“', '”'), # Unicode quotes (numeric entities) + ('“', '”'), # Unicode quotes (hex entities) + ] + + # Create a regex pattern that matches any of the quote pairs, including newlines + pattern = '|'.join(f'({re.escape(open_q)})(.*?)({re.escape(close_q)})' for open_q, close_q in quote_pairs) + + # Replace matched patterns with tags, keeping original quotes + replaced_text = re.sub(pattern, lambda m: f'{m.group(1)}{m.group(2)}{m.group(3)}', text, flags=re.DOTALL) + + return replaced_text + + def replace_blockquote(m): return m.group().replace('\n', '\n> ').replace('\\begin{blockquote}', '').replace('\\end{blockquote}', '') @@ -49,6 +72,9 @@ def replace_blockquote(m): @functools.lru_cache(maxsize=4096) def convert_to_markdown(string): + # Quote to + string = replace_quotes(string) + # Blockquote string = re.sub(r'(^|[\n])>', r'\1>', string) pattern = re.compile(r'\\begin{blockquote}(.*?)\\end{blockquote}', re.DOTALL) From 8a5f110c14f4ce4810c8bfd1a3fa8080935a61ba Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 24 Jul 2024 09:22:48 -0700 Subject: [PATCH 43/91] Bump ExLlamaV2 to 0.1.8 --- requirements.txt | 10 +++++----- requirements_amd.txt | 6 +++--- requirements_amd_noavx2.txt | 6 +++--- requirements_apple_intel.txt | 2 +- requirements_apple_silicon.txt | 2 +- requirements_noavx2.txt | 10 +++++----- 6 files changed, 18 insertions(+), 18 deletions(-) diff --git a/requirements.txt b/requirements.txt index db73b8b1..bc41421c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -53,11 +53,11 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/te https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements_amd.txt b/requirements_amd.txt index 600db9b4..df3ab7fb 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -40,6 +40,6 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp # AMD wheels https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.83+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.83+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 4f148c94..e85d1262 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -38,6 +38,6 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index a166d4f6..123b6d9b 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -36,4 +36,4 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/me https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 45511a8f..08509b05 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -38,4 +38,4 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/me https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 603fb0b8..28d13a90 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -53,11 +53,11 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/te https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" From 3b2c23dfb568d011276d5e2ec7ffc2596ce25580 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 24 Jul 2024 11:15:00 -0700 Subject: [PATCH 44/91] Add AutoAWQ 0.2.6 wheels for PyTorch 2.2.2 --- requirements.txt | 8 ++++++++ requirements_amd.txt | 4 ++++ requirements_amd_noavx2.txt | 4 ++++ requirements_noavx2.txt | 8 ++++++++ 4 files changed, 24 insertions(+) diff --git a/requirements.txt b/requirements.txt index bc41421c..4461c9cd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -62,3 +62,11 @@ https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" diff --git a/requirements_amd.txt b/requirements_amd.txt index df3ab7fb..48604f70 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -43,3 +43,7 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/ro https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" +https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index e85d1262..dcfaa5df 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -41,3 +41,7 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" +https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 28d13a90..4756d844 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -62,3 +62,11 @@ https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" From 947016d01092342fa07b150979d1e13efa45d975 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 24 Jul 2024 11:54:26 -0700 Subject: [PATCH 45/91] UI: make the markdown LRU cache infinite (for really long conversations) --- modules/chat.py | 19 +++++++++++++++++-- modules/html_generator.py | 2 +- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/modules/chat.py b/modules/chat.py index 9919cb76..c744defc 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -17,7 +17,11 @@ from PIL import Image import modules.shared as shared from modules import utils from modules.extensions import apply_extensions -from modules.html_generator import chat_html_wrapper, make_thumbnail +from modules.html_generator import ( + chat_html_wrapper, + convert_to_markdown, + make_thumbnail +) from modules.logging_colors import logger from modules.text_generation import ( generate_reply, @@ -368,7 +372,6 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess def impersonate_wrapper(text, state): - static_output = chat_html_wrapper(state['history'], state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) prompt = generate_chat_prompt('', state, impersonate=True) @@ -1044,6 +1047,8 @@ def handle_unique_id_select(state): history = load_history(state['unique_id'], state['character_menu'], state['mode']) html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) + convert_to_markdown.cache_clear() + return [history, html] @@ -1052,6 +1057,8 @@ def handle_start_new_chat_click(state): histories = find_all_histories_with_first_prompts(state) html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) + convert_to_markdown.cache_clear() + return [history, html, gr.update(choices=histories, value=histories[0][1])] @@ -1061,6 +1068,8 @@ def handle_delete_chat_confirm_click(state): history, unique_id = load_history_after_deletion(state, index) html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) + convert_to_markdown.cache_clear() + return [ history, html, @@ -1099,6 +1108,8 @@ def handle_upload_chat_history(load_chat_history, state): html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) + convert_to_markdown.cache_clear() + return [ history, html, @@ -1119,6 +1130,8 @@ def handle_character_menu_change(state): histories = find_all_histories_with_first_prompts(state) html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) + convert_to_markdown.cache_clear() + return [ history, html, @@ -1136,6 +1149,8 @@ def handle_mode_change(state): histories = find_all_histories_with_first_prompts(state) html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) + convert_to_markdown.cache_clear() + return [ history, html, diff --git a/modules/html_generator.py b/modules/html_generator.py index 61e61b0f..1b687ade 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -69,7 +69,7 @@ def replace_blockquote(m): return m.group().replace('\n', '\n> ').replace('\\begin{blockquote}', '').replace('\\end{blockquote}', '') -@functools.lru_cache(maxsize=4096) +@functools.lru_cache(maxsize=None) def convert_to_markdown(string): # Quote to From 7e2851e5058bf2a4569a7d374450ab549bcf2886 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 24 Jul 2024 15:04:12 -0700 Subject: [PATCH 46/91] UI: fix "Command for chat-instruct mode" not appearing by default --- modules/ui_chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ui_chat.py b/modules/ui_chat.py index 8b370d86..7ef8df4d 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -90,7 +90,7 @@ def create_ui(): shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label='Chat style', value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct') with gr.Row(): - shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=12, label='Command for chat-instruct mode', info='<|character|> and <|prompt|> get replaced with the bot name and the regular chat prompt respectively.', visible=False, elem_classes=['add_scrollbar']) + shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=12, label='Command for chat-instruct mode', info='<|character|> and <|prompt|> get replaced with the bot name and the regular chat prompt respectively.', visible=shared.settings['mode'] == 'chat-instruct', elem_classes=['add_scrollbar']) def create_chat_settings_ui(): From 3170b6efc9e8e6189712c87e2dcf5723c89bb8ed Mon Sep 17 00:00:00 2001 From: Luana Date: Wed, 24 Jul 2024 22:23:29 -0300 Subject: [PATCH 47/91] Fixes Linux shebangs (#6110) --- cmd_linux.sh | 2 +- start_linux.sh | 2 +- update_wizard_linux.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd_linux.sh b/cmd_linux.sh index 1685050a..576dbf02 100755 --- a/cmd_linux.sh +++ b/cmd_linux.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash cd "$(dirname "${BASH_SOURCE[0]}")" diff --git a/start_linux.sh b/start_linux.sh index 5620c831..792daca8 100755 --- a/start_linux.sh +++ b/start_linux.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash cd "$(dirname "${BASH_SOURCE[0]}")" diff --git a/update_wizard_linux.sh b/update_wizard_linux.sh index c5add61e..3ada9a1e 100755 --- a/update_wizard_linux.sh +++ b/update_wizard_linux.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash cd "$(dirname "${BASH_SOURCE[0]}")" From 1f101ee3e5d0516ea17b905128b6b76b8d2b0f23 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 24 Jul 2024 18:56:54 -0700 Subject: [PATCH 48/91] UI: improve the quote colors --- css/main.css | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/css/main.css b/css/main.css index 6f2a9fb7..3ecf0044 100644 --- a/css/main.css +++ b/css/main.css @@ -406,12 +406,8 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { color: var(--body-text-color); } -.message q { - color: #707070; -} - .dark .message q { - color: orange; + color: #f5b031; } .message q::before, .message q::after { From ac30e7fe9c0ef0a03c7efc268f3300a4c6963ca4 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 24 Jul 2024 19:03:34 -0700 Subject: [PATCH 49/91] Updater: don't reinstall requirements if no updates after git pull --- one_click.py | 54 +++++++++++++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/one_click.py b/one_click.py index e94b6d44..547f4df4 100644 --- a/one_click.py +++ b/one_click.py @@ -337,6 +337,7 @@ def update_requirements(initial_installation=False, pull=True): git_creation_cmd = 'git init -b main && git remote add origin https://github.com/oobabooga/text-generation-webui && git fetch && git symbolic-ref refs/remotes/origin/HEAD refs/remotes/origin/main && git reset --hard origin/main && git branch --set-upstream-to=origin/main' run_cmd(git_creation_cmd, environment=True, assert_success=True) + repository_updated = False if pull: print_big_message("Updating the local copy of the repository with \"git pull\"") @@ -347,9 +348,13 @@ def update_requirements(initial_installation=False, pull=True): ] before_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check} - run_cmd("git pull --autostash", assert_success=True, environment=True) + pull_output = run_cmd("git pull --autostash", assert_success=True, environment=True, capture_output=True) after_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check} + # Check if git pull actually updated anything + if "Already up to date." not in pull_output: + repository_updated = True + # Check for differences in installation file hashes for file_name in files_to_check: if before_pull_hashes[file_name] != after_pull_hashes[file_name]: @@ -382,39 +387,40 @@ def update_requirements(initial_installation=False, pull=True): requirements_file = base_requirements - print_big_message(f"Installing webui requirements from file: {requirements_file}") - print(f"TORCH: {torver}\n") + if repository_updated or initial_installation: + print_big_message(f"Installing webui requirements from file: {requirements_file}") + print(f"TORCH: {torver}\n") - # Prepare the requirements file - textgen_requirements = open(requirements_file).read().splitlines() - if is_cuda118: - textgen_requirements = [req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') for req in textgen_requirements if "auto-gptq" not in req] - if is_windows() and is_cuda118: # No flash-attention on Windows for CUDA 11 - textgen_requirements = [req for req in textgen_requirements if 'oobabooga/flash-attention' not in req] + # Prepare the requirements file + textgen_requirements = open(requirements_file).read().splitlines() - with open('temp_requirements.txt', 'w') as file: - file.write('\n'.join(textgen_requirements)) + if is_cuda118: + textgen_requirements = [req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') for req in textgen_requirements if "auto-gptq" not in req] + if is_windows() and is_cuda118: # No flash-attention on Windows for CUDA 11 + textgen_requirements = [req for req in textgen_requirements if 'oobabooga/flash-attention' not in req] - # Workaround for git+ packages not updating properly. - git_requirements = [req for req in textgen_requirements if req.startswith("git+")] - for req in git_requirements: - url = req.replace("git+", "") - package_name = url.split("/")[-1].split("@")[0].rstrip(".git") - run_cmd(f"python -m pip uninstall -y {package_name}", environment=True) - print(f"Uninstalled {package_name}") + with open('temp_requirements.txt', 'w') as file: + file.write('\n'.join(textgen_requirements)) - # Install/update the project requirements - run_cmd("python -m pip install -r temp_requirements.txt --upgrade", assert_success=True, environment=True) - os.remove('temp_requirements.txt') + # Workaround for git+ packages not updating properly. + git_requirements = [req for req in textgen_requirements if req.startswith("git+")] + for req in git_requirements: + url = req.replace("git+", "") + package_name = url.split("/")[-1].split("@")[0].rstrip(".git") + run_cmd(f"python -m pip uninstall -y {package_name}", environment=True) + print(f"Uninstalled {package_name}") + + # Install/update the project requirements + run_cmd("python -m pip install -r temp_requirements.txt --upgrade", assert_success=True, environment=True) + os.remove('temp_requirements.txt') + else: + print("Repository is already up to date. Skipping requirements installation.") # Check for '+cu' or '+rocm' in version string to determine if torch uses CUDA or ROCm. Check for pytorch-cuda as well for backwards compatibility if not any((is_cuda, is_rocm)) and run_cmd("conda list -f pytorch-cuda | grep pytorch-cuda", environment=True, capture_output=True).returncode == 1: clear_cache() return - if not os.path.exists("repositories/"): - os.mkdir("repositories") - clear_cache() From b85ae6bc96ff03aca12f8ad6212da39ec9cca192 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 24 Jul 2024 19:10:17 -0700 Subject: [PATCH 50/91] Fix after previous commit --- one_click.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/one_click.py b/one_click.py index 547f4df4..d823ecb3 100644 --- a/one_click.py +++ b/one_click.py @@ -352,7 +352,7 @@ def update_requirements(initial_installation=False, pull=True): after_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check} # Check if git pull actually updated anything - if "Already up to date." not in pull_output: + if "Already up to date." not in pull_output.stdout.decode('utf-8'): repository_updated = True # Check for differences in installation file hashes From 14584fda366b8f38d02e31fbb511c0527438c078 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 24 Jul 2024 20:55:18 -0700 Subject: [PATCH 51/91] UI: don't change the color of italics in instruct mode --- css/html_instruct_style.css | 8 -------- 1 file changed, 8 deletions(-) diff --git a/css/html_instruct_style.css b/css/html_instruct_style.css index 8a31d6e2..50b9402f 100644 --- a/css/html_instruct_style.css +++ b/css/html_instruct_style.css @@ -39,14 +39,6 @@ margin-bottom: 0 !important; } -.dark .message-body p em { - color: rgb(198 202 214) !important; -} - -.message-body p em { - color: rgb(110 110 110) !important; -} - .gradio-container .chat .assistant-message { padding: 20px; background: #f4f4f4; From d581334a41b0ddd2e9701aea4643ba6206f2edff Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 25 Jul 2024 05:38:52 -0700 Subject: [PATCH 52/91] Don't install AutoAWQ on CUDA 11.8 --- one_click.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/one_click.py b/one_click.py index d823ecb3..065265fe 100644 --- a/one_click.py +++ b/one_click.py @@ -395,7 +395,11 @@ def update_requirements(initial_installation=False, pull=True): textgen_requirements = open(requirements_file).read().splitlines() if is_cuda118: - textgen_requirements = [req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') for req in textgen_requirements if "auto-gptq" not in req] + textgen_requirements = [ + req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') + for req in textgen_requirements + if "auto-gptq" not in req.lower() and "autoawq" not in req.lower() + ] if is_windows() and is_cuda118: # No flash-attention on Windows for CUDA 11 textgen_requirements = [req for req in textgen_requirements if 'oobabooga/flash-attention' not in req] From a34273755b7389adb14ee0a04d5d6345b0b00dcb Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 25 Jul 2024 07:34:01 -0700 Subject: [PATCH 53/91] Revert "Updater: don't reinstall requirements if no updates after git pull" This reverts commit ac30e7fe9c0ef0a03c7efc268f3300a4c6963ca4. --- one_click.py | 61 ++++++++++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 33 deletions(-) diff --git a/one_click.py b/one_click.py index 065265fe..0a0412ba 100644 --- a/one_click.py +++ b/one_click.py @@ -337,7 +337,6 @@ def update_requirements(initial_installation=False, pull=True): git_creation_cmd = 'git init -b main && git remote add origin https://github.com/oobabooga/text-generation-webui && git fetch && git symbolic-ref refs/remotes/origin/HEAD refs/remotes/origin/main && git reset --hard origin/main && git branch --set-upstream-to=origin/main' run_cmd(git_creation_cmd, environment=True, assert_success=True) - repository_updated = False if pull: print_big_message("Updating the local copy of the repository with \"git pull\"") @@ -348,13 +347,9 @@ def update_requirements(initial_installation=False, pull=True): ] before_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check} - pull_output = run_cmd("git pull --autostash", assert_success=True, environment=True, capture_output=True) + run_cmd("git pull --autostash", assert_success=True, environment=True) after_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check} - # Check if git pull actually updated anything - if "Already up to date." not in pull_output.stdout.decode('utf-8'): - repository_updated = True - # Check for differences in installation file hashes for file_name in files_to_check: if before_pull_hashes[file_name] != after_pull_hashes[file_name]: @@ -387,44 +382,44 @@ def update_requirements(initial_installation=False, pull=True): requirements_file = base_requirements - if repository_updated or initial_installation: - print_big_message(f"Installing webui requirements from file: {requirements_file}") - print(f"TORCH: {torver}\n") + print_big_message(f"Installing webui requirements from file: {requirements_file}") + print(f"TORCH: {torver}\n") - # Prepare the requirements file - textgen_requirements = open(requirements_file).read().splitlines() + # Prepare the requirements file + textgen_requirements = open(requirements_file).read().splitlines() + if is_cuda118: + textgen_requirements = [ + req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') + for req in textgen_requirements + if "auto-gptq" not in req.lower() and "autoawq" not in req.lower() + ] - if is_cuda118: - textgen_requirements = [ - req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') - for req in textgen_requirements - if "auto-gptq" not in req.lower() and "autoawq" not in req.lower() - ] - if is_windows() and is_cuda118: # No flash-attention on Windows for CUDA 11 - textgen_requirements = [req for req in textgen_requirements if 'oobabooga/flash-attention' not in req] + if is_windows() and is_cuda118: # No flash-attention on Windows for CUDA 11 + textgen_requirements = [req for req in textgen_requirements if 'oobabooga/flash-attention' not in req] - with open('temp_requirements.txt', 'w') as file: - file.write('\n'.join(textgen_requirements)) + with open('temp_requirements.txt', 'w') as file: + file.write('\n'.join(textgen_requirements)) - # Workaround for git+ packages not updating properly. - git_requirements = [req for req in textgen_requirements if req.startswith("git+")] - for req in git_requirements: - url = req.replace("git+", "") - package_name = url.split("/")[-1].split("@")[0].rstrip(".git") - run_cmd(f"python -m pip uninstall -y {package_name}", environment=True) - print(f"Uninstalled {package_name}") + # Workaround for git+ packages not updating properly. + git_requirements = [req for req in textgen_requirements if req.startswith("git+")] + for req in git_requirements: + url = req.replace("git+", "") + package_name = url.split("/")[-1].split("@")[0].rstrip(".git") + run_cmd(f"python -m pip uninstall -y {package_name}", environment=True) + print(f"Uninstalled {package_name}") - # Install/update the project requirements - run_cmd("python -m pip install -r temp_requirements.txt --upgrade", assert_success=True, environment=True) - os.remove('temp_requirements.txt') - else: - print("Repository is already up to date. Skipping requirements installation.") + # Install/update the project requirements + run_cmd("python -m pip install -r temp_requirements.txt --upgrade", assert_success=True, environment=True) + os.remove('temp_requirements.txt') # Check for '+cu' or '+rocm' in version string to determine if torch uses CUDA or ROCm. Check for pytorch-cuda as well for backwards compatibility if not any((is_cuda, is_rocm)) and run_cmd("conda list -f pytorch-cuda | grep pytorch-cuda", environment=True, capture_output=True).returncode == 1: clear_cache() return + if not os.path.exists("repositories/"): + os.mkdir("repositories") + clear_cache() From 42e80108f57830f14c2a20884371b7df2d60dd60 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 25 Jul 2024 08:01:42 -0700 Subject: [PATCH 54/91] UI: clear the markdown LRU cache when using the default/notebook tabs --- modules/html_generator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/html_generator.py b/modules/html_generator.py index 1b687ade..c5eba5a8 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -150,6 +150,7 @@ def convert_to_markdown_wrapped(string, use_cache=True): def generate_basic_html(string): + convert_to_markdown.cache_clear() string = convert_to_markdown(string) string = f'
{string}
' return string From b80d5906c238a00dcca6177f3ad8a1e25d12e653 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 25 Jul 2024 15:09:31 -0700 Subject: [PATCH 55/91] UI: fix saving characters --- modules/ui_file_saving.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ui_file_saving.py b/modules/ui_file_saving.py index 51fac69f..ac72c623 100644 --- a/modules/ui_file_saving.py +++ b/modules/ui_file_saving.py @@ -107,7 +107,7 @@ def handle_save_character_confirm_click(name2, greeting, context, character_pict try: chat.save_character(name2, greeting, context, character_picture, filename) available_characters = utils.get_available_characters() - output = gr.update(choices=available_characters, value=filename), + output = gr.update(choices=available_characters, value=filename) except Exception: output = gr.update() traceback.print_exc() From c5814db17337d8512ea2b488b2252c0cb7240820 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 25 Jul 2024 20:22:07 -0700 Subject: [PATCH 56/91] UI: fix double quotes in instruct mode --- css/main.css | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/css/main.css b/css/main.css index 3ecf0044..b10012a3 100644 --- a/css/main.css +++ b/css/main.css @@ -410,7 +410,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { color: #f5b031; } -.message q::before, .message q::after { +.message-body q::before, .message-body q::after { content: ""; } From 9e82f8c3943511b27fcbdbf317bc1b68623d3071 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 26 Jul 2024 22:35:30 -0700 Subject: [PATCH 57/91] UI: Fix chat sometimes not scrolling down after sending a message --- js/main.js | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/js/main.js b/js/main.js index 3b8b13e4..d5287482 100644 --- a/js/main.js +++ b/js/main.js @@ -459,7 +459,12 @@ function updateCssProperties() { // Adjust scrollTop based on input height change if (chatInputHeight !== currentChatInputHeight) { - chatContainer.scrollTop += chatInputHeight - currentChatInputHeight; + if (!isScrolled && chatInputHeight < currentChatInputHeight) { + chatContainer.scrollTop = chatContainer.scrollHeight; + } else { + chatContainer.scrollTop += chatInputHeight - currentChatInputHeight; + } + currentChatInputHeight = chatInputHeight; } } From f32d26240da55daba074a9bede27675d2ce9f497 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 26 Jul 2024 23:03:05 -0700 Subject: [PATCH 58/91] UI: Fix the chat "stop" event --- modules/chat.py | 10 +--------- modules/ui_chat.py | 5 +++-- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/modules/chat.py b/modules/chat.py index c744defc..18fee9fa 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -26,8 +26,7 @@ from modules.logging_colors import logger from modules.text_generation import ( generate_reply, get_encoded_length, - get_max_prompt_length, - stop_everything_event + get_max_prompt_length ) from modules.utils import delete_file, get_available_characters, save_file @@ -1036,13 +1035,6 @@ def handle_remove_last_click(state): return [history, html, last_input] -def handle_stop_click(state): - stop_everything_event() - html = redraw_html(state['history'], state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) - - return html - - def handle_unique_id_select(state): history = load_history(state['unique_id'], state['character_menu'], state['mode']) html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) diff --git a/modules/ui_chat.py b/modules/ui_chat.py index 7ef8df4d..cce1cc49 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -7,6 +7,7 @@ from PIL import Image from modules import chat, shared, ui, utils from modules.html_generator import chat_html_wrapper +from modules.text_generation import stop_everything_event from modules.utils import gradio inputs = ('Chat input', 'interface_state') @@ -221,8 +222,8 @@ def create_event_handlers(): chat.handle_remove_last_click, gradio('interface_state'), gradio('history', 'display', 'textbox'), show_progress=False) shared.gradio['Stop'].click( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - chat.handle_stop_click, gradio('interface_state'), gradio('display'), show_progress=False) + stop_everything_event, None, None, queue=False).then( + chat.redraw_html, gradio(reload_arr), gradio('display')) if not shared.args.multi_user: shared.gradio['unique_id'].select( From 6bab4c2faad9f67154449b850576fed266e6b026 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 26 Jul 2024 23:03:53 -0700 Subject: [PATCH 59/91] UI: add back single $ for equations --- js/main.js | 1 + 1 file changed, 1 insertion(+) diff --git a/js/main.js b/js/main.js index d5287482..899bd8f0 100644 --- a/js/main.js +++ b/js/main.js @@ -213,6 +213,7 @@ function doSyntaxHighlighting() { renderMathInElement(element, { delimiters: [ { left: "$$", right: "$$", display: true }, + { left: "$", right: "$", display: false }, { left: "\\(", right: "\\)", display: false }, { left: "\\[", right: "\\]", display: true }, ], From e4d411b8412f13e8f2bc9e33c9257f94eb9efc7d Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 27 Jul 2024 15:21:44 -0700 Subject: [PATCH 60/91] UI: fix rendering LaTeX enclosed between \[ and \] --- modules/html_generator.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/modules/html_generator.py b/modules/html_generator.py index c5eba5a8..b3152008 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -72,6 +72,14 @@ def replace_blockquote(m): @functools.lru_cache(maxsize=None) def convert_to_markdown(string): + # Make \[ \] LaTeX equations inline + pattern = r'^\s*\\\[\s*\n([\s\S]*?)\n\s*\\\]\s*$' + replacement = r'\\[ \1 \\]' + string = re.sub(pattern, replacement, string, flags=re.MULTILINE) + + # Escape backslashes + string = string.replace('\\', '\\\\') + # Quote to string = replace_quotes(string) From 493f8c324208ca04499d27dff179eff4d34ecb76 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 27 Jul 2024 15:22:34 -0700 Subject: [PATCH 61/91] UI: remove animation after clicking on "Stop" in the Chat tab --- modules/ui_chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ui_chat.py b/modules/ui_chat.py index cce1cc49..57143cd8 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -223,7 +223,7 @@ def create_event_handlers(): shared.gradio['Stop'].click( stop_everything_event, None, None, queue=False).then( - chat.redraw_html, gradio(reload_arr), gradio('display')) + chat.redraw_html, gradio(reload_arr), gradio('display'), show_progress=False) if not shared.args.multi_user: shared.gradio['unique_id'].select( From ffc713f72bae9a44c1c48416bc4e893b7611195c Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 27 Jul 2024 15:33:30 -0700 Subject: [PATCH 62/91] UI: fix multiline LaTeX equations --- modules/html_generator.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/modules/html_generator.py b/modules/html_generator.py index b3152008..d0afd6b2 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -103,12 +103,27 @@ def convert_to_markdown(string): result = '' is_code = False + is_latex = False for line in string.split('\n'): - if line.lstrip(' ').startswith('```'): + stripped_line = line.strip() + + if stripped_line.startswith('```'): is_code = not is_code + elif stripped_line.startswith('$$'): + is_latex = not is_latex + elif stripped_line.endswith('$$'): + is_latex = False + elif stripped_line.startswith('\\\\['): + is_latex = True + elif stripped_line.startswith('\\\\]'): + is_latex = False + elif stripped_line.endswith('\\\\]'): + is_latex = False result += line - if is_code or line.startswith('|'): # Don't add an extra \n for tables or code + + # Don't add an extra \n for tables, code, or LaTeX + if is_code or is_latex or line.startswith('|'): result += '\n' else: result += '\n\n' From 078e8c8969a7f074dbffc84fbcda23d3ef15f4b5 Mon Sep 17 00:00:00 2001 From: Harry <20436268+hocjordan@users.noreply.github.com> Date: Sun, 28 Jul 2024 15:03:19 +0900 Subject: [PATCH 63/91] Make compress_pos_emb float (#6276) --- modules/ui_model_menu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 2938c120..7b9cd1c7 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -105,7 +105,7 @@ def create_ui(): with gr.Blocks(): shared.gradio['alpha_value'] = gr.Number(label='alpha_value', value=shared.args.alpha_value, precision=2, info='Positional embeddings alpha factor for NTK RoPE scaling. Recommended values (NTKv1): 1.75 for 1.5x context, 2.5 for 2x context. Use either this or compress_pos_emb, not both.') shared.gradio['rope_freq_base'] = gr.Number(label='rope_freq_base', value=shared.args.rope_freq_base, precision=0, info='Positional embeddings frequency base for NTK RoPE scaling. Related to alpha_value by rope_freq_base = 10000 * alpha_value ^ (64 / 63). 0 = from model.') - shared.gradio['compress_pos_emb'] = gr.Number(label='compress_pos_emb', value=shared.args.compress_pos_emb, precision=0, info='Positional embeddings compression factor. Should be set to (context length) / (model\'s original context length). Equal to 1/rope_freq_scale.') + shared.gradio['compress_pos_emb'] = gr.Number(label='compress_pos_emb', value=shared.args.compress_pos_emb, precision=2, info='Positional embeddings compression factor. Should be set to (context length) / (model\'s original context length). Equal to 1/rope_freq_scale.') shared.gradio['autogptq_info'] = gr.Markdown('ExLlamav2_HF is recommended over AutoGPTQ for models derived from Llama.') From 7050bb880e5ba74323abb4d4c01c070614eb31fd Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 27 Jul 2024 23:11:53 -0700 Subject: [PATCH 64/91] UI: make n_ctx/max_seq_len/truncation_length numbers rather than sliders --- modules/shared.py | 2 -- modules/training.py | 2 +- modules/ui_model_menu.py | 4 ++-- modules/ui_parameters.py | 2 +- settings-template.yaml | 2 -- 5 files changed, 4 insertions(+), 8 deletions(-) diff --git a/modules/shared.py b/modules/shared.py index fe09a165..2aebb7c6 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -44,8 +44,6 @@ settings = { 'negative_prompt': '', 'seed': -1, 'truncation_length': 2048, - 'truncation_length_min': 0, - 'truncation_length_max': 200000, 'max_tokens_second': 0, 'max_updates_second': 0, 'prompt_lookup_num_tokens': 0, diff --git a/modules/training.py b/modules/training.py index a810fb6e..58d1eb9f 100644 --- a/modules/training.py +++ b/modules/training.py @@ -165,7 +165,7 @@ def create_ui(): stride_length = gr.Slider(label='Stride', minimum=0, maximum=32768, value=512, step=256, info='Used to make the evaluation faster at the cost of accuracy. 1 = slowest but most accurate. 512 is a common value.') with gr.Column(): - max_length = gr.Slider(label='max_length', minimum=0, maximum=shared.settings['truncation_length_max'], value=0, step=256, info='The context for each evaluation. If set to 0, the maximum context length for the model will be used.') + max_length = gr.Number(label='max_length', precision=0, value=0, info='The context for each evaluation. If set to 0, the maximum context length for the model will be used.') with gr.Row(): start_current_evaluation = gr.Button("Evaluate loaded model", interactive=not mu) diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 7b9cd1c7..1883fdca 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -93,7 +93,7 @@ def create_ui(): shared.gradio['hqq_backend'] = gr.Dropdown(label="hqq_backend", choices=["PYTORCH", "PYTORCH_COMPILE", "ATEN"], value=shared.args.hqq_backend) shared.gradio['n_gpu_layers'] = gr.Slider(label="n-gpu-layers", minimum=0, maximum=256, value=shared.args.n_gpu_layers, info='Must be set to more than 0 for your GPU to be used.') - shared.gradio['n_ctx'] = gr.Slider(minimum=0, maximum=shared.settings['truncation_length_max'], step=256, label="n_ctx", value=shared.args.n_ctx, info='Context length. Try lowering this if you run out of memory while loading the model.') + shared.gradio['n_ctx'] = gr.Number(label="n_ctx", precision=0, step=256, value=shared.args.n_ctx, info='Context length. Try lowering this if you run out of memory while loading the model.') shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 60,40') shared.gradio['n_batch'] = gr.Slider(label="n_batch", minimum=1, maximum=2048, step=1, value=shared.args.n_batch) shared.gradio['threads'] = gr.Slider(label="threads", minimum=0, step=1, maximum=256, value=shared.args.threads) @@ -101,7 +101,7 @@ def create_ui(): shared.gradio['wbits'] = gr.Dropdown(label="wbits", choices=["None", 1, 2, 3, 4, 8], value=shared.args.wbits if shared.args.wbits > 0 else "None") shared.gradio['groupsize'] = gr.Dropdown(label="groupsize", choices=["None", 32, 64, 128, 1024], value=shared.args.groupsize if shared.args.groupsize > 0 else "None") shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7') - shared.gradio['max_seq_len'] = gr.Slider(label='max_seq_len', minimum=0, maximum=shared.settings['truncation_length_max'], step=256, info='Context length. Try lowering this if you run out of memory while loading the model.', value=shared.args.max_seq_len) + shared.gradio['max_seq_len'] = gr.Number(label='max_seq_len', precision=0, step=256, value=shared.args.max_seq_len, info='Context length. Try lowering this if you run out of memory while loading the model.') with gr.Blocks(): shared.gradio['alpha_value'] = gr.Number(label='alpha_value', value=shared.args.alpha_value, precision=2, info='Positional embeddings alpha factor for NTK RoPE scaling. Recommended values (NTKv1): 1.75 for 1.5x context, 2.5 for 2x context. Use either this or compress_pos_emb, not both.') shared.gradio['rope_freq_base'] = gr.Number(label='rope_freq_base', value=shared.args.rope_freq_base, precision=0, info='Positional embeddings frequency base for NTK RoPE scaling. Related to alpha_value by rope_freq_base = 10000 * alpha_value ^ (64 / 63). 0 = from model.') diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py index cd4288b3..234e1af2 100644 --- a/modules/ui_parameters.py +++ b/modules/ui_parameters.py @@ -89,7 +89,7 @@ def create_ui(default_preset): shared.gradio['sampler_priority'] = gr.Textbox(value=generate_params['sampler_priority'], lines=12, label='Sampler priority', info='Parameter names separated by new lines or commas.') with gr.Column(): - shared.gradio['truncation_length'] = gr.Slider(value=get_truncation_length(), minimum=shared.settings['truncation_length_min'], maximum=shared.settings['truncation_length_max'], step=256, label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length. Most models require this to be at most 2048.') + shared.gradio['truncation_length'] = gr.Number(precision=0, step=256, value=get_truncation_length(), label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length. Most models require this to be at most 2048.') shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.') shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.') shared.gradio['max_updates_second'] = gr.Slider(value=shared.settings['max_updates_second'], minimum=0, maximum=24, step=1, label='Maximum UI updates/second', info='Set this if you experience lag in the UI during streaming.') diff --git a/settings-template.yaml b/settings-template.yaml index 8d6e8dd8..59c76c35 100644 --- a/settings-template.yaml +++ b/settings-template.yaml @@ -12,8 +12,6 @@ max_new_tokens_max: 4096 negative_prompt: '' seed: -1 truncation_length: 2048 -truncation_length_min: 0 -truncation_length_max: 200000 max_tokens_second: 0 max_updates_second: 0 prompt_lookup_num_tokens: 0 From 5223c009fef58ba70fd22daeb3db8367e4f2f6eb Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 27 Jul 2024 23:13:34 -0700 Subject: [PATCH 65/91] Minor change after previous commit --- modules/training.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/training.py b/modules/training.py index 58d1eb9f..b003fc8c 100644 --- a/modules/training.py +++ b/modules/training.py @@ -165,7 +165,7 @@ def create_ui(): stride_length = gr.Slider(label='Stride', minimum=0, maximum=32768, value=512, step=256, info='Used to make the evaluation faster at the cost of accuracy. 1 = slowest but most accurate. 512 is a common value.') with gr.Column(): - max_length = gr.Number(label='max_length', precision=0, value=0, info='The context for each evaluation. If set to 0, the maximum context length for the model will be used.') + max_length = gr.Number(label='max_length', precision=0, step=256, value=0, info='The context for each evaluation. If set to 0, the maximum context length for the model will be used.') with gr.Row(): start_current_evaluation = gr.Button("Evaluate loaded model", interactive=not mu) From 92ab3a9a6a35d8dabd3ed12a13a033bb11eec622 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 28 Jul 2024 15:13:06 -0700 Subject: [PATCH 66/91] Bump llama-cpp-python to 0.2.84 --- requirements.txt | 24 ++++++++++++------------ requirements_amd.txt | 12 ++++++------ requirements_amd_noavx2.txt | 8 ++++---- requirements_apple_intel.txt | 8 ++++---- requirements_apple_silicon.txt | 12 ++++++------ requirements_cpu_only.txt | 8 ++++---- requirements_cpu_only_noavx2.txt | 8 ++++---- requirements_noavx2.txt | 24 ++++++++++++------------ 8 files changed, 52 insertions(+), 52 deletions(-) diff --git a/requirements.txt b/requirements.txt index d6e755a0..2dc65a6e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -35,22 +35,22 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, no tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.83+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.83+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.83+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.83+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.84+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.84+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.84+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.84+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # llama-cpp-python (CUDA, tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.84+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.84+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.84+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.84+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements_amd.txt b/requirements_amd.txt index 9c34c95d..f72e8dfc 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -32,14 +32,14 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.83+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.83+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.84+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.84+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 5df8f2ba..a1cd56db 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -32,10 +32,10 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 123b6d9b..5bcb22a3 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -32,8 +32,8 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.84-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.84-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.84-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.84-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 08509b05..3865fc84 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -32,10 +32,10 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.83-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.84-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.84-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.84-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.84-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.84-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.84-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index d4913ac8..71380007 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index b468adaf..dc9a5cb0 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 1bbd4091..1c719fff 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -35,22 +35,22 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.83+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, no tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.83+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.83+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.83+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.83+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.84+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.84+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.84+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.84+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # llama-cpp-python (CUDA, tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.83+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.84+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.84+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.84+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.84+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" From 3aa646c1d0774c3e88bd5ec4fbb1abc3a6726423 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 28 Jul 2024 15:26:15 -0700 Subject: [PATCH 67/91] UI: improve the style of headers in chat messages --- css/main.css | 1 + 1 file changed, 1 insertion(+) diff --git a/css/main.css b/css/main.css index b10012a3..cf3babdb 100644 --- a/css/main.css +++ b/css/main.css @@ -404,6 +404,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { .message-body h3, .message-body h4 { color: var(--body-text-color); + margin: 20px 0 10px 0; } .dark .message q { From 514fb2e451b88cddfc73df38d5052ce865f0b9a4 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 28 Jul 2024 18:30:06 -0700 Subject: [PATCH 68/91] Fix UI error caused by --idle-timeout --- modules/models.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/modules/models.py b/modules/models.py index ea046e9b..ff990ae6 100644 --- a/modules/models.py +++ b/modules/models.py @@ -368,14 +368,16 @@ def clear_torch_cache(): torch.cuda.empty_cache() -def unload_model(): +def unload_model(keep_model_name=False): shared.model = shared.tokenizer = None shared.previous_model_name = shared.model_name - shared.model_name = 'None' shared.lora_names = [] shared.model_dirty_from_training = False clear_torch_cache() + if not keep_model_name: + shared.model_name = 'None' + def reload_model(): unload_model() @@ -393,7 +395,7 @@ def unload_model_if_idle(): if time.time() - last_generation_time > shared.args.idle_timeout * 60: if shared.model is not None: logger.info("Unloading the model for inactivity.") - unload_model() + unload_model(keep_model_name=True) finally: shared.generation_lock.release() From addcb52c5697fd612cd43bcb731cd806995b817c Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 28 Jul 2024 18:31:40 -0700 Subject: [PATCH 69/91] Make --idle-timeout work for API requests --- extensions/openai/completions.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/extensions/openai/completions.py b/extensions/openai/completions.py index 44c1df86..646dee2d 100644 --- a/extensions/openai/completions.py +++ b/extensions/openai/completions.py @@ -319,7 +319,6 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p yield {'prompt': prompt} return - token_count = len(encode(prompt)[0]) debug_msg({'prompt': prompt, 'generate_params': generate_params}) if stream: @@ -330,7 +329,6 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p answer = '' seen_content = '' - completion_token_count = 0 for a in generator: answer = a['internal'][-1][1] @@ -345,6 +343,7 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p chunk = chat_streaming_chunk(new_content) yield chunk + token_count = len(encode(prompt)[0]) completion_token_count = len(encode(answer)[0]) stop_reason = "stop" if token_count + completion_token_count >= generate_params['truncation_length'] or completion_token_count >= generate_params['max_new_tokens']: @@ -429,8 +428,6 @@ def completions_common(body: dict, is_legacy: bool = False, stream=False): prompt = decode(prompt)[0] prefix = prompt if echo else '' - token_count = len(encode(prompt)[0]) - total_prompt_token_count += token_count # generate reply ####################################### debug_msg({'prompt': prompt, 'generate_params': generate_params}) @@ -440,6 +437,8 @@ def completions_common(body: dict, is_legacy: bool = False, stream=False): for a in generator: answer = a + token_count = len(encode(prompt)[0]) + total_prompt_token_count += token_count completion_token_count = len(encode(answer)[0]) total_completion_token_count += completion_token_count stop_reason = "stop" From 9dcff21da9f433e5fb9237bd38d8c19429bab884 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 28 Jul 2024 18:35:11 -0700 Subject: [PATCH 70/91] Remove unnecessary shared.previous_model_name variable --- modules/logits.py | 4 ++-- modules/models.py | 1 - modules/shared.py | 1 - modules/text_generation.py | 4 ++-- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/modules/logits.py b/modules/logits.py index 4233c8a5..73cabb41 100644 --- a/modules/logits.py +++ b/modules/logits.py @@ -13,8 +13,8 @@ global_scores = None def get_next_logits(*args, **kwargs): - if shared.args.idle_timeout > 0 and shared.model is None and shared.previous_model_name not in [None, 'None']: - shared.model, shared.tokenizer = load_model(shared.previous_model_name) + if shared.args.idle_timeout > 0 and shared.model is None and shared.model_name not in [None, 'None']: + shared.model, shared.tokenizer = load_model(shared.model_name) needs_lock = not args[2] # use_samplers if needs_lock: diff --git a/modules/models.py b/modules/models.py index ff990ae6..ecef9060 100644 --- a/modules/models.py +++ b/modules/models.py @@ -370,7 +370,6 @@ def clear_torch_cache(): def unload_model(keep_model_name=False): shared.model = shared.tokenizer = None - shared.previous_model_name = shared.model_name shared.lora_names = [] shared.model_dirty_from_training = False clear_torch_cache() diff --git a/modules/shared.py b/modules/shared.py index 2aebb7c6..c27657ff 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -13,7 +13,6 @@ from modules.logging_colors import logger model = None tokenizer = None model_name = 'None' -previous_model_name = 'None' is_seq2seq = False model_dirty_from_training = False lora_names = [] diff --git a/modules/text_generation.py b/modules/text_generation.py index d971a30e..75e5ef36 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -32,8 +32,8 @@ from modules.models import clear_torch_cache, load_model def generate_reply(*args, **kwargs): - if shared.args.idle_timeout > 0 and shared.model is None and shared.previous_model_name not in [None, 'None']: - shared.model, shared.tokenizer = load_model(shared.previous_model_name) + if shared.args.idle_timeout > 0 and shared.model is None and shared.model_name not in [None, 'None']: + shared.model, shared.tokenizer = load_model(shared.model_name) shared.generation_lock.acquire() try: From f4d95f33b80ba1fa6f69590ffe9ddff014dbca1a Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 28 Jul 2024 22:21:56 -0700 Subject: [PATCH 71/91] downloader: better progress bar --- download-model.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/download-model.py b/download-model.py index 08b23d08..0014b689 100644 --- a/download-model.py +++ b/download-model.py @@ -212,11 +212,15 @@ class ModelDownloader: total_size = int(r.headers.get('content-length', 0)) block_size = 1024 * 1024 # 1MB + filename_str = str(filename) # Convert PosixPath to string if necessary + tqdm_kwargs = { 'total': total_size, - 'unit': 'iB', + 'unit': 'B', 'unit_scale': True, - 'bar_format': '{l_bar}{bar}| {n_fmt}/{total_fmt} {rate_fmt}' + 'unit_divisor': 1024, + 'bar_format': '{desc}{percentage:3.0f}%|{bar:50}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]', + 'desc': f"{filename_str}: " } if 'COLAB_GPU' in os.environ: @@ -233,7 +237,7 @@ class ModelDownloader: t.update(len(data)) if total_size != 0 and self.progress_bar is not None: count += len(data) - self.progress_bar(float(count) / float(total_size), f"{filename}") + self.progress_bar(float(count) / float(total_size), f"{filename_str}") break # Exit loop if successful except (RequestException, ConnectionError, Timeout) as e: From 30b4d8c8b2103f89bf60a777ba5c750f5ebaf95a Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 29 Jul 2024 11:51:40 -0700 Subject: [PATCH 72/91] Fix Llama 3.1 template including lengthy "tools" headers --- modules/chat.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/modules/chat.py b/modules/chat.py index 18fee9fa..00c4ffa9 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -92,8 +92,16 @@ def generate_chat_prompt(user_input, state, **kwargs): chat_template_str = replace_character_names(chat_template_str, state['name1'], state['name2']) instruction_template = jinja_env.from_string(state['instruction_template_str']) - instruct_renderer = partial(instruction_template.render, add_generation_prompt=False) chat_template = jinja_env.from_string(chat_template_str) + + instruct_renderer = partial( + instruction_template.render, + builtin_tools=None, + tools=None, + tools_in_user_message=False, + add_generation_prompt=False + ) + chat_renderer = partial( chat_template.render, add_generation_prompt=False, From 608545d28221882f4a2b4556658cf0f9005312bb Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 31 Jul 2024 18:44:46 -0700 Subject: [PATCH 73/91] Bump llama-cpp-python to 0.2.85 --- requirements.txt | 24 ++++++++++++------------ requirements_amd.txt | 12 ++++++------ requirements_amd_noavx2.txt | 8 ++++---- requirements_apple_intel.txt | 8 ++++---- requirements_apple_silicon.txt | 12 ++++++------ requirements_cpu_only.txt | 8 ++++---- requirements_cpu_only_noavx2.txt | 8 ++++---- requirements_noavx2.txt | 24 ++++++++++++------------ 8 files changed, 52 insertions(+), 52 deletions(-) diff --git a/requirements.txt b/requirements.txt index 2dc65a6e..b1c68917 100644 --- a/requirements.txt +++ b/requirements.txt @@ -35,22 +35,22 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, no tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.84+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.84+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.84+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.84+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.85+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.85+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.85+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.85+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # llama-cpp-python (CUDA, tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.84+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.84+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.84+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.84+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.85+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.85+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.85+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.85+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements_amd.txt b/requirements_amd.txt index f72e8dfc..9cef52be 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -32,14 +32,14 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.84+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.84+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.85+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.85+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index a1cd56db..e6df644c 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -32,10 +32,10 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 5bcb22a3..35131b95 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -32,8 +32,8 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.84-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.84-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.84-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.84-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.85-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.85-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.85-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.85-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 3865fc84..ee9876ee 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -32,10 +32,10 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.84-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.84-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.84-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.84-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.84-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.84-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.85-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.85-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.85-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.85-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.85-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.85-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index 71380007..87b1a95c 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index dc9a5cb0..91c30354 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 1c719fff..1adcec6f 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -35,22 +35,22 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.84+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, no tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.84+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.84+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.84+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.84+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.85+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.85+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.85+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.85+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # llama-cpp-python (CUDA, tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.84+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.84+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.84+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.84+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.85+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.85+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.85+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.85+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" From f106e780ba91ad8c2949596f506a7f762e3724c5 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 6 Aug 2024 19:41:12 -0700 Subject: [PATCH 74/91] downloader: use 1 session for all files for better speed --- download-model.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/download-model.py b/download-model.py index 0014b689..306784a3 100644 --- a/download-model.py +++ b/download-model.py @@ -29,6 +29,7 @@ base = os.environ.get("HF_ENDPOINT") or "https://huggingface.co" class ModelDownloader: def __init__(self, max_retries=5): self.max_retries = max_retries + self.session = self.get_session() def get_session(self): session = requests.Session() @@ -72,7 +73,7 @@ class ModelDownloader: return model, branch def get_download_links_from_huggingface(self, model, branch, text_only=False, specific_file=None): - session = self.get_session() + session = self.session page = f"/api/models/{model}/tree/{branch}" cursor = b"" @@ -192,7 +193,7 @@ class ModelDownloader: attempt = 0 while attempt < max_retries: attempt += 1 - session = self.get_session() + session = self.session headers = {} mode = 'wb' From e926c03b3d028ebad854e753517894fa6c637c3e Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 6 Aug 2024 19:41:18 -0700 Subject: [PATCH 75/91] Add a --tokenizer-dir command-line flag for llamacpp_HF --- modules/models.py | 33 ++++++++++++++++++++++----------- modules/shared.py | 1 + 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/modules/models.py b/modules/models.py index ecef9060..b0e2346e 100644 --- a/modules/models.py +++ b/modules/models.py @@ -98,7 +98,7 @@ def load_model(model_name, loader=None): if model is None: return None, None else: - tokenizer = load_tokenizer(model_name, model) + tokenizer = load_tokenizer(model_name) shared.settings.update({k: v for k, v in metadata.items() if k in shared.settings}) if loader.lower().startswith('exllama') or loader.lower().startswith('tensorrt'): @@ -113,9 +113,13 @@ def load_model(model_name, loader=None): return model, tokenizer -def load_tokenizer(model_name, model): +def load_tokenizer(model_name, tokenizer_dir=None): + if tokenizer_dir: + path_to_model = Path(tokenizer_dir) + else: + path_to_model = Path(f"{shared.args.model_dir}/{model_name}/") + tokenizer = None - path_to_model = Path(f"{shared.args.model_dir}/{model_name}/") if path_to_model.exists(): if shared.args.no_use_fast: logger.info('Loading the tokenizer with use_fast=False.') @@ -278,17 +282,24 @@ def llamacpp_loader(model_name): def llamacpp_HF_loader(model_name): from modules.llamacpp_hf import LlamacppHF - path = Path(f'{shared.args.model_dir}/{model_name}') - - # Check if a HF tokenizer is available for the model - if all((path / file).exists() for file in ['tokenizer_config.json']): - logger.info(f'Using tokenizer from: \"{path}\"') + if shared.args.tokenizer_dir: + logger.info(f'Using tokenizer from: \"{shared.args.tokenizer_dir}\"') else: - logger.error("Could not load the model because a tokenizer in Transformers format was not found.") - return None, None + path = Path(f'{shared.args.model_dir}/{model_name}') + # Check if a HF tokenizer is available for the model + if all((path / file).exists() for file in ['tokenizer_config.json']): + logger.info(f'Using tokenizer from: \"{path}\"') + else: + logger.error("Could not load the model because a tokenizer in Transformers format was not found.") + return None, None model = LlamacppHF.from_pretrained(model_name) - return model + + if shared.args.tokenizer_dir: + tokenizer = load_tokenizer(model_name, tokenizer_dir=shared.args.tokenizer_dir) + return model, tokenizer + else: + return model def AutoGPTQ_loader(model_name): diff --git a/modules/shared.py b/modules/shared.py index c27657ff..43533a14 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -132,6 +132,7 @@ group.add_argument('--cache-capacity', type=str, help='Maximum cache capacity (l group.add_argument('--row_split', action='store_true', help='Split the model by rows across GPUs. This may improve multi-gpu performance.') group.add_argument('--streaming-llm', action='store_true', help='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.') group.add_argument('--attention-sink-size', type=int, default=5, help='StreamingLLM: number of sink tokens. Only used if the trimmed prompt does not share a prefix with the old prompt.') +group.add_argument('--tokenizer-dir', type=str, help='Load the tokenizer from this folder. Meant to be used with llamacpp_HF through the command-line.') # ExLlamaV2 group = parser.add_argument_group('ExLlamaV2') From 81773f7f3681c3f67e2b79aa2c0f91cb15cdac25 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 6 Aug 2024 20:07:05 -0700 Subject: [PATCH 76/91] Bump transformers to 4.44 --- requirements.txt | 2 +- requirements_amd.txt | 2 +- requirements_amd_noavx2.txt | 2 +- requirements_apple_intel.txt | 2 +- requirements_apple_silicon.txt | 2 +- requirements_cpu_only.txt | 2 +- requirements_cpu_only_noavx2.txt | 2 +- requirements_noavx2.txt | 2 +- requirements_nowheels.txt | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/requirements.txt b/requirements.txt index b1c68917..3f702ecd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,7 +24,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.43.* +transformers==4.44.* tqdm wandb diff --git a/requirements_amd.txt b/requirements_amd.txt index 9cef52be..c7611af8 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.43.* +transformers==4.44.* tqdm wandb diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index e6df644c..c0dda7e8 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.43.* +transformers==4.44.* tqdm wandb diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 35131b95..3153376d 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.43.* +transformers==4.44.* tqdm wandb diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index ee9876ee..22b676c4 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.43.* +transformers==4.44.* tqdm wandb diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index 87b1a95c..95169f33 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.43.* +transformers==4.44.* tqdm wandb diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index 91c30354..e854cca9 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.43.* +transformers==4.44.* tqdm wandb diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 1adcec6f..dde91987 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -24,7 +24,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.43.* +transformers==4.44.* tqdm wandb diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index bc8a59aa..a95098a5 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -21,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.43.* +transformers==4.44.* tqdm wandb From 089d5a94152ea7802a480bf5c227579d1be8c899 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 7 Aug 2024 20:36:28 -0700 Subject: [PATCH 77/91] Bump llama-cpp-python to 0.2.87 --- requirements.txt | 24 ++++++++++++------------ requirements_amd.txt | 12 ++++++------ requirements_amd_noavx2.txt | 8 ++++---- requirements_apple_intel.txt | 8 ++++---- requirements_apple_silicon.txt | 12 ++++++------ requirements_cpu_only.txt | 8 ++++---- requirements_cpu_only_noavx2.txt | 8 ++++---- requirements_noavx2.txt | 24 ++++++++++++------------ 8 files changed, 52 insertions(+), 52 deletions(-) diff --git a/requirements.txt b/requirements.txt index 3f702ecd..2cdb94fb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -35,22 +35,22 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, no tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.85+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.85+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.85+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.85+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.87+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.87+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.87+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.87+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # llama-cpp-python (CUDA, tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.85+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.85+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.85+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.85+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.87+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.87+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.87+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.87+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements_amd.txt b/requirements_amd.txt index c7611af8..042e8a2e 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -32,14 +32,14 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.85+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.85+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.87+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.87+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index c0dda7e8..0448e54a 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -32,10 +32,10 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 3153376d..1dff0bb7 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -32,8 +32,8 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.85-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.85-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.85-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.85-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.87-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.87-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.87-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.87-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 22b676c4..694f8a72 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -32,10 +32,10 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.85-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.85-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.85-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.85-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.85-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.85-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.87-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.87-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.87-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.87-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.87-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.87-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index 95169f33..00071cd1 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index e854cca9..ecb82e90 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index dde91987..b1ba1eca 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -35,22 +35,22 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.85+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, no tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.85+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.85+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.85+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.85+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.87+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.87+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.87+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.87+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # llama-cpp-python (CUDA, tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.85+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.85+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.85+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.85+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.87+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.87+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.87+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.87+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" From bf8187124dddf99443777aeba86cf682c74f6a07 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 13 Aug 2024 12:40:18 -0700 Subject: [PATCH 78/91] Bump llama-cpp-python to 0.2.88 --- requirements.txt | 24 ++++++++++++------------ requirements_amd.txt | 12 ++++++------ requirements_amd_noavx2.txt | 8 ++++---- requirements_apple_intel.txt | 8 ++++---- requirements_apple_silicon.txt | 12 ++++++------ requirements_cpu_only.txt | 8 ++++---- requirements_cpu_only_noavx2.txt | 8 ++++---- requirements_noavx2.txt | 24 ++++++++++++------------ 8 files changed, 52 insertions(+), 52 deletions(-) diff --git a/requirements.txt b/requirements.txt index 2cdb94fb..45bdf25e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -35,22 +35,22 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, no tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.87+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.87+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.87+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.87+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.88+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.88+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.88+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.88+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # llama-cpp-python (CUDA, tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.87+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.87+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.87+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.87+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.88+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.88+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.88+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.88+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements_amd.txt b/requirements_amd.txt index 042e8a2e..2aa39dc6 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -32,14 +32,14 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.87+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.87+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.88+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.88+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 0448e54a..bd915071 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -32,10 +32,10 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 1dff0bb7..066a5aec 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -32,8 +32,8 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.87-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.87-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.87-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.87-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.88-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.88-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.88-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.88-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 694f8a72..f025e8b7 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -32,10 +32,10 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.87-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.87-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.87-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.87-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.87-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.87-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.88-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.88-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.88-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.88-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.88-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.88-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index 00071cd1..df5a7d60 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index ecb82e90..fd9548ce 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index b1ba1eca..539ace51 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -35,22 +35,22 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.87+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, no tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.87+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.87+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.87+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.87+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.88+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.88+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.88+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.88+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # llama-cpp-python (CUDA, tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.87+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.87+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.87+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.87+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.88+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.88+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.88+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.88+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" From 4d8c1801c2313af70944daea503672ce6d3261dd Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 19 Aug 2024 17:45:01 -0700 Subject: [PATCH 79/91] Bump llama-cpp-python to 0.2.89 --- requirements.txt | 24 ++++++++++++------------ requirements_amd.txt | 12 ++++++------ requirements_amd_noavx2.txt | 8 ++++---- requirements_apple_intel.txt | 8 ++++---- requirements_apple_silicon.txt | 12 ++++++------ requirements_cpu_only.txt | 8 ++++---- requirements_cpu_only_noavx2.txt | 8 ++++---- requirements_noavx2.txt | 24 ++++++++++++------------ 8 files changed, 52 insertions(+), 52 deletions(-) diff --git a/requirements.txt b/requirements.txt index 45bdf25e..a4b864b1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -35,22 +35,22 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, no tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.88+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.88+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.88+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.88+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.89+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.89+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.89+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.89+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # llama-cpp-python (CUDA, tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.88+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.88+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.88+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.88+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.89+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.89+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.89+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.89+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements_amd.txt b/requirements_amd.txt index 2aa39dc6..db765afe 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -32,14 +32,14 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.88+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.88+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.89+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.89+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index bd915071..3751af0e 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -32,10 +32,10 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 066a5aec..c38fad1c 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -32,8 +32,8 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.88-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.88-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.88-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.88-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index f025e8b7..0f2f9656 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -32,10 +32,10 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.88-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.88-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.88-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.88-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.88-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.88-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index df5a7d60..9a46e366 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index fd9548ce..e78ce3e0 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 539ace51..5cb0d19a 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -35,22 +35,22 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.88+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, no tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.88+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.88+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.88+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.88+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.89+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.89+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.89+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.89+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # llama-cpp-python (CUDA, tensor cores) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.88+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.88+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.88+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.88+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.89+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.89+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.89+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.89+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" From bb987ffe6611a13396848b927b34bbd9d97331c7 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 19 Aug 2024 23:06:52 -0300 Subject: [PATCH 80/91] Update README.md --- README.md | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 40ae94d5..017fafcb 100644 --- a/README.md +++ b/README.md @@ -10,27 +10,30 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. ## Features -* 3 interface modes: default (two columns), notebook, and chat. -* Multiple model backends: [Transformers](https://github.com/huggingface/transformers), [llama.cpp](https://github.com/ggerganov/llama.cpp) (through [llama-cpp-python](https://github.com/abetlen/llama-cpp-python)), [ExLlamaV2](https://github.com/turboderp/exllamav2), [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), [AutoAWQ](https://github.com/casper-hansen/AutoAWQ), [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). -* Dropdown menu for quickly switching between different models. -* Large number of extensions (built-in and user-contributed), including Coqui TTS for realistic voice outputs, Whisper STT for voice inputs, translation, [multimodal pipelines](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/multimodal), vector databases, Stable Diffusion integration, and a lot more. See [the wiki](https://github.com/oobabooga/text-generation-webui/wiki/07-%E2%80%90-Extensions) and [the extensions directory](https://github.com/oobabooga/text-generation-webui-extensions) for details. -* [Chat with custom characters](https://github.com/oobabooga/text-generation-webui/wiki/03-%E2%80%90-Parameters-Tab#character). -* Precise chat templates for instruction-following models, including Llama-2-chat, Alpaca, Vicuna, Mistral. -* LoRA: train new LoRAs with your own data, load/unload LoRAs on the fly for generation. -* Transformers library integration: load models in 4-bit or 8-bit precision through bitsandbytes, use llama.cpp with transformers samplers (`llamacpp_HF` loader), CPU inference in 32-bit precision using PyTorch. +* Multiple backends for text generation in a single UI and API, including [Transformers](https://github.com/huggingface/transformers), [llama.cpp](https://github.com/ggerganov/llama.cpp) (through [llama-cpp-python](https://github.com/abetlen/llama-cpp-python)), [ExLlamaV2](https://github.com/turboderp/exllamav2), [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), [AutoAWQ](https://github.com/casper-hansen/AutoAWQ), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). * OpenAI-compatible API server with Chat and Completions endpoints -- see the [examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples). +* Automatic prompt formatting for each model using the Jinja2 template in its metadata, ensuring high-quality outputs without manual intervention. +* Three chat modes: `instruct`, `chat-instruct`, and `chat`, allowing for both task-based interactions and casual conversations with characters. +* High-quality outputs in chat scenarios with the `chat-instruct` mode, which uses the model's template automatically. +* Easy switching between conversations and starting new ones through the "Past chats" menu in the main interface tab. +* Flexible text generation through autocompletion in the Default/Notebook tabs without being limited to chat turns. Send formatted chat conversations from the Chat tab to these tabs. +* Multiple sampling parameters and options for sophisticated text generation control. +* Quick downloading and loading of new models through the interface without restarting, using the "Model" tab. +* Simple LoRA fine-tuning tool to customize models with your data. +* Self-contained dependencies in the `installer_files` folder, avoiding interference with the system's Python environment. Precompiled Python wheels for the backends are in the `requirements.txt` and are transparently compiled using GitHub Actions. +* Extensions support, including numerous built-in and user-contributed extensions. See [the wiki](https://github.com/oobabooga/text-generation-webui/wiki/07-%E2%80%90-Extensions) and [the extensions directory](https://github.com/oobabooga/text-generation-webui-extensions) for details. ## How to install 1) Clone or [download](https://github.com/oobabooga/text-generation-webui/archive/refs/heads/main.zip) the repository. 2) Run the `start_linux.sh`, `start_windows.bat`, `start_macos.sh`, or `start_wsl.bat` script depending on your OS. 3) Select your GPU vendor when asked. -4) Once the installation ends, browse to `http://localhost:7860/?__theme=dark`. +4) Once the installation ends, browse to `http://localhost:7860`. 5) Have fun! -To restart the web UI in the future, just run the `start_` script again. This script creates an `installer_files` folder where it sets up the project's requirements. In case you need to reinstall the requirements, you can simply delete that folder and start the web UI again. +To restart the web UI in the future, just run the `start_` script again. This script creates an `installer_files` folder where it sets up the project's requirements. If you need to reinstall the requirements, you can simply delete that folder and start the web UI again. -The script accepts command-line flags. Alternatively, you can edit the `CMD_FLAGS.txt` file with a text editor and add your flags there. +The script accepts command-line flags. Alternatively, you can edit the `CMD_FLAGS.txt` file with a text editor and add your flags there, such as `--api` in case you need to use the API. To get updates in the future, run `update_wizard_linux.sh`, `update_wizard_windows.bat`, `update_wizard_macos.sh`, or `update_wizard_wsl.bat`. From 8bac1a93822885d1e53ce0dca5803f1ac19e3a55 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 19 Aug 2024 23:10:04 -0300 Subject: [PATCH 81/91] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 017fafcb..fe6a33ca 100644 --- a/README.md +++ b/README.md @@ -404,7 +404,7 @@ https://colab.research.google.com/github/oobabooga/text-generation-webui/blob/ma ## Community -* Subreddit: https://www.reddit.com/r/oobabooga/ +* Subreddit: https://www.reddit.com/r/Oobabooga/ * Discord: https://discord.gg/jwZCF2dPQN ## Acknowledgment From 68f928b5e0f89b90b81f5b029f4e40c077df5e90 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 19 Aug 2024 23:33:56 -0300 Subject: [PATCH 82/91] Update peft requirement from ==0.8.* to ==0.12.* (#6292) --- requirements.txt | 2 +- requirements_amd.txt | 2 +- requirements_amd_noavx2.txt | 2 +- requirements_apple_intel.txt | 2 +- requirements_apple_silicon.txt | 2 +- requirements_cpu_only.txt | 2 +- requirements_cpu_only_noavx2.txt | 2 +- requirements_noavx2.txt | 2 +- requirements_nowheels.txt | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/requirements.txt b/requirements.txt index a4b864b1..4cda446a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,7 +14,7 @@ numba==0.59.* numpy==1.26.* optimum==1.17.* pandas -peft==0.8.* +peft==0.12.* Pillow>=9.5.0 psutil pyyaml diff --git a/requirements_amd.txt b/requirements_amd.txt index db765afe..a4fcf73c 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -11,7 +11,7 @@ numba==0.59.* numpy==1.26.* optimum==1.17.* pandas -peft==0.8.* +peft==0.12.* Pillow>=9.5.0 psutil pyyaml diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 3751af0e..9ae58d17 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -11,7 +11,7 @@ numba==0.59.* numpy==1.26.* optimum==1.17.* pandas -peft==0.8.* +peft==0.12.* Pillow>=9.5.0 psutil pyyaml diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index c38fad1c..3b868edd 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -11,7 +11,7 @@ numba==0.59.* numpy==1.26.* optimum==1.17.* pandas -peft==0.8.* +peft==0.12.* Pillow>=9.5.0 psutil pyyaml diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 0f2f9656..ba9c495a 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -11,7 +11,7 @@ numba==0.59.* numpy==1.26.* optimum==1.17.* pandas -peft==0.8.* +peft==0.12.* Pillow>=9.5.0 psutil pyyaml diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index 9a46e366..80895f17 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -11,7 +11,7 @@ numba==0.59.* numpy==1.26.* optimum==1.17.* pandas -peft==0.8.* +peft==0.12.* Pillow>=9.5.0 psutil pyyaml diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index e78ce3e0..32a9ed20 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -11,7 +11,7 @@ numba==0.59.* numpy==1.26.* optimum==1.17.* pandas -peft==0.8.* +peft==0.12.* Pillow>=9.5.0 psutil pyyaml diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 5cb0d19a..4e33bb0b 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -14,7 +14,7 @@ numba==0.59.* numpy==1.26.* optimum==1.17.* pandas -peft==0.8.* +peft==0.12.* Pillow>=9.5.0 psutil pyyaml diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index a95098a5..5ec157c5 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -11,7 +11,7 @@ numba==0.59.* numpy==1.26.* optimum==1.17.* pandas -peft==0.8.* +peft==0.12.* Pillow>=9.5.0 psutil pyyaml From 64e16e9a46d677e0b25632211efca64062980588 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 19 Aug 2024 23:34:10 -0300 Subject: [PATCH 83/91] Update accelerate requirement from ==0.32.* to ==0.33.* (#6291) --- requirements.txt | 2 +- requirements_amd.txt | 2 +- requirements_amd_noavx2.txt | 2 +- requirements_apple_intel.txt | 2 +- requirements_apple_silicon.txt | 2 +- requirements_cpu_only.txt | 2 +- requirements_cpu_only_noavx2.txt | 2 +- requirements_noavx2.txt | 2 +- requirements_nowheels.txt | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/requirements.txt b/requirements.txt index 4cda446a..08b7d56d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -accelerate==0.32.* +accelerate==0.33.* aqlm[gpu,cpu]==1.1.6; platform_system == "Linux" auto-gptq==0.7.1 bitsandbytes==0.43.* diff --git a/requirements_amd.txt b/requirements_amd.txt index a4fcf73c..52e36510 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -1,4 +1,4 @@ -accelerate==0.32.* +accelerate==0.33.* colorama datasets einops diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 9ae58d17..18a81d04 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -1,4 +1,4 @@ -accelerate==0.32.* +accelerate==0.33.* colorama datasets einops diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 3b868edd..af02904b 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -1,4 +1,4 @@ -accelerate==0.32.* +accelerate==0.33.* colorama datasets einops diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index ba9c495a..8cdd8519 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -1,4 +1,4 @@ -accelerate==0.32.* +accelerate==0.33.* colorama datasets einops diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index 80895f17..807c182a 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -1,4 +1,4 @@ -accelerate==0.32.* +accelerate==0.33.* colorama datasets einops diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index 32a9ed20..e2a89936 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -1,4 +1,4 @@ -accelerate==0.32.* +accelerate==0.33.* colorama datasets einops diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 4e33bb0b..d22eb72c 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -1,4 +1,4 @@ -accelerate==0.32.* +accelerate==0.33.* aqlm[gpu,cpu]==1.1.6; platform_system == "Linux" auto-gptq==0.7.1 bitsandbytes==0.43.* diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index 5ec157c5..ffb45fe3 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -1,4 +1,4 @@ -accelerate==0.32.* +accelerate==0.33.* colorama datasets einops From fd9cb266196f9e90a850f37f5f33b69057329721 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 19 Aug 2024 19:38:43 -0700 Subject: [PATCH 84/91] UI: update the DRY parameters descriptions/order --- modules/ui_parameters.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py index 234e1af2..eff62c20 100644 --- a/modules/ui_parameters.py +++ b/modules/ui_parameters.py @@ -40,9 +40,9 @@ def create_ui(default_preset): shared.gradio['do_sample'] = gr.Checkbox(value=generate_params['do_sample'], label='do_sample') with gr.Blocks(): - shared.gradio['dry_multiplier'] = gr.Slider(0, 5, value=generate_params['dry_multiplier'], step=0.01, label='dry_multiplier', info='Set to value > 0 to enable DRY. Controls the magnitude of the penalty for the shortest penalized sequences.') - shared.gradio['dry_base'] = gr.Slider(1, 4, value=generate_params['dry_base'], step=0.01, label='dry_base', info='Controls how fast the penalty grows with increasing sequence length.') + shared.gradio['dry_multiplier'] = gr.Slider(0, 5, value=generate_params['dry_multiplier'], step=0.01, label='dry_multiplier', info='Set to greater than 0 to enable DRY. Recommended value: 0.8.') shared.gradio['dry_allowed_length'] = gr.Slider(1, 20, value=generate_params['dry_allowed_length'], step=1, label='dry_allowed_length', info='Longest sequence that can be repeated without being penalized.') + shared.gradio['dry_base'] = gr.Slider(1, 4, value=generate_params['dry_base'], step=0.01, label='dry_base', info='Controls how fast the penalty grows with increasing sequence length.') shared.gradio['dry_sequence_breakers'] = gr.Textbox(value=generate_params['dry_sequence_breakers'], label='dry_sequence_breakers', info='Tokens across which sequence matching is not continued. Specified as a comma-separated list of quoted strings.') gr.Markdown("[Learn more](https://github.com/oobabooga/text-generation-webui/wiki/03-%E2%80%90-Parameters-Tab)") From 1b1518aa6a702b3a494f712c3422fc5efcc0daf5 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 20 Aug 2024 00:36:18 -0300 Subject: [PATCH 85/91] Update README.md --- README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index fe6a33ca..230dfccf 100644 --- a/README.md +++ b/README.md @@ -11,10 +11,9 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. ## Features * Multiple backends for text generation in a single UI and API, including [Transformers](https://github.com/huggingface/transformers), [llama.cpp](https://github.com/ggerganov/llama.cpp) (through [llama-cpp-python](https://github.com/abetlen/llama-cpp-python)), [ExLlamaV2](https://github.com/turboderp/exllamav2), [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), [AutoAWQ](https://github.com/casper-hansen/AutoAWQ), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). -* OpenAI-compatible API server with Chat and Completions endpoints -- see the [examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples). +* OpenAI-compatible API server with Chat and Completions endpoints – see the [examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples). * Automatic prompt formatting for each model using the Jinja2 template in its metadata, ensuring high-quality outputs without manual intervention. -* Three chat modes: `instruct`, `chat-instruct`, and `chat`, allowing for both task-based interactions and casual conversations with characters. -* High-quality outputs in chat scenarios with the `chat-instruct` mode, which uses the model's template automatically. +* Three chat modes: `instruct`, `chat-instruct`, and `chat`, allowing for both task-based interactions and casual conversations with characters. `chat-instruct` mode automatically applies the model's template to the chat's prompt, leading to higher quality outputs. * Easy switching between conversations and starting new ones through the "Past chats" menu in the main interface tab. * Flexible text generation through autocompletion in the Default/Notebook tabs without being limited to chat turns. Send formatted chat conversations from the Chat tab to these tabs. * Multiple sampling parameters and options for sophisticated text generation control. From 406995f7224073a7bfbee6a2ce4e67f723a21e36 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 19 Aug 2024 21:24:01 -0700 Subject: [PATCH 86/91] Update README --- README.md | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 230dfccf..0e264c75 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. ## Features -* Multiple backends for text generation in a single UI and API, including [Transformers](https://github.com/huggingface/transformers), [llama.cpp](https://github.com/ggerganov/llama.cpp) (through [llama-cpp-python](https://github.com/abetlen/llama-cpp-python)), [ExLlamaV2](https://github.com/turboderp/exllamav2), [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), [AutoAWQ](https://github.com/casper-hansen/AutoAWQ), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). +* Multiple backends for text generation in a single UI and API, including [Transformers](https://github.com/huggingface/transformers), [llama.cpp](https://github.com/ggerganov/llama.cpp) (through [llama-cpp-python](https://github.com/abetlen/llama-cpp-python)), [ExLlamaV2](https://github.com/turboderp/exllamav2), [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). The Transformers loader also supports models quantized through [AutoAWQ](https://github.com/casper-hansen/AutoAWQ) and [HQQ](https://github.com/mobiusml/hqq). * OpenAI-compatible API server with Chat and Completions endpoints – see the [examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples). * Automatic prompt formatting for each model using the Jinja2 template in its metadata, ensuring high-quality outputs without manual intervention. * Three chat modes: `instruct`, `chat-instruct`, and `chat`, allowing for both task-based interactions and casual conversations with characters. `chat-instruct` mode automatically applies the model's template to the chat's prompt, leading to higher quality outputs. @@ -209,13 +209,13 @@ usage: server.py [-h] [--multi-user] [--character CHARACTER] [--model MODEL] [-- [--force-safetensors] [--no_use_fast] [--use_flash_attention_2] [--use_eager_attention] [--load-in-4bit] [--use_double_quant] [--compute_dtype COMPUTE_DTYPE] [--quant_type QUANT_TYPE] [--flash-attn] [--tensorcores] [--n_ctx N_CTX] [--threads THREADS] [--threads-batch THREADS_BATCH] [--no_mul_mat_q] [--n_batch N_BATCH] [--no-mmap] [--mlock] [--n-gpu-layers N_GPU_LAYERS] [--tensor_split TENSOR_SPLIT] [--numa] [--logits_all] [--no_offload_kqv] [--cache-capacity CACHE_CAPACITY] [--row_split] [--streaming-llm] - [--attention-sink-size ATTENTION_SINK_SIZE] [--gpu-split GPU_SPLIT] [--autosplit] [--max_seq_len MAX_SEQ_LEN] [--cfg-cache] [--no_flash_attn] [--no_xformers] [--no_sdpa] - [--cache_8bit] [--cache_4bit] [--num_experts_per_token NUM_EXPERTS_PER_TOKEN] [--triton] [--no_inject_fused_mlp] [--no_use_cuda_fp16] [--desc_act] [--disable_exllama] - [--disable_exllamav2] [--wbits WBITS] [--groupsize GROUPSIZE] [--no_inject_fused_attention] [--hqq-backend HQQ_BACKEND] [--cpp-runner] [--deepspeed] - [--nvme-offload-dir NVME_OFFLOAD_DIR] [--local_rank LOCAL_RANK] [--alpha_value ALPHA_VALUE] [--rope_freq_base ROPE_FREQ_BASE] [--compress_pos_emb COMPRESS_POS_EMB] [--listen] - [--listen-port LISTEN_PORT] [--listen-host LISTEN_HOST] [--share] [--auto-launch] [--gradio-auth GRADIO_AUTH] [--gradio-auth-path GRADIO_AUTH_PATH] [--ssl-keyfile SSL_KEYFILE] - [--ssl-certfile SSL_CERTFILE] [--subpath SUBPATH] [--api] [--public-api] [--public-api-id PUBLIC_API_ID] [--api-port API_PORT] [--api-key API_KEY] [--admin-key ADMIN_KEY] [--nowebui] - [--multimodal-pipeline MULTIMODAL_PIPELINE] [--model_type MODEL_TYPE] [--pre_layer PRE_LAYER [PRE_LAYER ...]] [--checkpoint CHECKPOINT] [--monkey-patch] + [--attention-sink-size ATTENTION_SINK_SIZE] [--tokenizer-dir TOKENIZER_DIR] [--gpu-split GPU_SPLIT] [--autosplit] [--max_seq_len MAX_SEQ_LEN] [--cfg-cache] [--no_flash_attn] + [--no_xformers] [--no_sdpa] [--cache_8bit] [--cache_4bit] [--num_experts_per_token NUM_EXPERTS_PER_TOKEN] [--triton] [--no_inject_fused_mlp] [--no_use_cuda_fp16] [--desc_act] + [--disable_exllama] [--disable_exllamav2] [--wbits WBITS] [--groupsize GROUPSIZE] [--hqq-backend HQQ_BACKEND] [--cpp-runner] [--deepspeed] [--nvme-offload-dir NVME_OFFLOAD_DIR] + [--local_rank LOCAL_RANK] [--alpha_value ALPHA_VALUE] [--rope_freq_base ROPE_FREQ_BASE] [--compress_pos_emb COMPRESS_POS_EMB] [--listen] [--listen-port LISTEN_PORT] + [--listen-host LISTEN_HOST] [--share] [--auto-launch] [--gradio-auth GRADIO_AUTH] [--gradio-auth-path GRADIO_AUTH_PATH] [--ssl-keyfile SSL_KEYFILE] [--ssl-certfile SSL_CERTFILE] + [--subpath SUBPATH] [--api] [--public-api] [--public-api-id PUBLIC_API_ID] [--api-port API_PORT] [--api-key API_KEY] [--admin-key ADMIN_KEY] [--nowebui] + [--multimodal-pipeline MULTIMODAL_PIPELINE] [--model_type MODEL_TYPE] [--pre_layer PRE_LAYER [PRE_LAYER ...]] [--checkpoint CHECKPOINT] [--monkey-patch] [--no_inject_fused_attention] Text generation web UI @@ -239,7 +239,7 @@ Basic settings: Model loader: --loader LOADER Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, llamacpp_HF, ExLlamav2_HF, ExLlamav2, - AutoGPTQ, AutoAWQ. + AutoGPTQ. Transformers/Accelerate: --cpu Use the CPU to generate text. Warning: Training on CPU is extremely slow. @@ -283,6 +283,7 @@ llama.cpp: --row_split Split the model by rows across GPUs. This may improve multi-gpu performance. --streaming-llm Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed. --attention-sink-size ATTENTION_SINK_SIZE StreamingLLM: number of sink tokens. Only used if the trimmed prompt does not share a prefix with the old prompt. + --tokenizer-dir TOKENIZER_DIR Load the tokenizer from this folder. Meant to be used with llamacpp_HF through the command-line. ExLlamaV2: --gpu-split GPU_SPLIT Comma-separated list of VRAM (in GB) to use per GPU device for model layers. Example: 20,7,7. @@ -306,9 +307,6 @@ AutoGPTQ: --wbits WBITS Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported. --groupsize GROUPSIZE Group size. -AutoAWQ: - --no_inject_fused_attention Disable the use of fused attention, which will use less VRAM at the cost of slower inference. - HQQ: --hqq-backend HQQ_BACKEND Backend for the HQQ loader. Valid options: PYTORCH, PYTORCH_COMPILE, ATEN. From 9d99156ca395455f2c871a77c352bb9fa93e02ca Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 20 Aug 2024 01:27:02 -0300 Subject: [PATCH 87/91] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 0e264c75..6f2d7396 100644 --- a/README.md +++ b/README.md @@ -10,9 +10,9 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. ## Features -* Multiple backends for text generation in a single UI and API, including [Transformers](https://github.com/huggingface/transformers), [llama.cpp](https://github.com/ggerganov/llama.cpp) (through [llama-cpp-python](https://github.com/abetlen/llama-cpp-python)), [ExLlamaV2](https://github.com/turboderp/exllamav2), [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). The Transformers loader also supports models quantized through [AutoAWQ](https://github.com/casper-hansen/AutoAWQ) and [HQQ](https://github.com/mobiusml/hqq). +* Multiple backends for text generation in a single UI and API, including [Transformers](https://github.com/huggingface/transformers), [llama.cpp](https://github.com/ggerganov/llama.cpp) (through [llama-cpp-python](https://github.com/abetlen/llama-cpp-python)), [ExLlamaV2](https://github.com/turboderp/exllamav2), [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). [AutoAWQ](https://github.com/casper-hansen/AutoAWQ), [HQQ](https://github.com/mobiusml/hqq), and [AQLM](https://github.com/Vahe1994/AQLM) are also supported through the Transformers loader. * OpenAI-compatible API server with Chat and Completions endpoints – see the [examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples). -* Automatic prompt formatting for each model using the Jinja2 template in its metadata, ensuring high-quality outputs without manual intervention. +* Automatic prompt formatting for each model using the Jinja2 template in its metadata, ensuring high-quality outputs without manual setup. * Three chat modes: `instruct`, `chat-instruct`, and `chat`, allowing for both task-based interactions and casual conversations with characters. `chat-instruct` mode automatically applies the model's template to the chat's prompt, leading to higher quality outputs. * Easy switching between conversations and starting new ones through the "Past chats" menu in the main interface tab. * Flexible text generation through autocompletion in the Default/Notebook tabs without being limited to chat turns. Send formatted chat conversations from the Chat tab to these tabs. From d9a031fcadbbb241be0d26a4041903a8cc659023 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 20 Aug 2024 01:52:30 -0300 Subject: [PATCH 88/91] Update README.md --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 6f2d7396..9d73f910 100644 --- a/README.md +++ b/README.md @@ -13,13 +13,13 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. * Multiple backends for text generation in a single UI and API, including [Transformers](https://github.com/huggingface/transformers), [llama.cpp](https://github.com/ggerganov/llama.cpp) (through [llama-cpp-python](https://github.com/abetlen/llama-cpp-python)), [ExLlamaV2](https://github.com/turboderp/exllamav2), [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). [AutoAWQ](https://github.com/casper-hansen/AutoAWQ), [HQQ](https://github.com/mobiusml/hqq), and [AQLM](https://github.com/Vahe1994/AQLM) are also supported through the Transformers loader. * OpenAI-compatible API server with Chat and Completions endpoints – see the [examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples). * Automatic prompt formatting for each model using the Jinja2 template in its metadata, ensuring high-quality outputs without manual setup. -* Three chat modes: `instruct`, `chat-instruct`, and `chat`, allowing for both task-based interactions and casual conversations with characters. `chat-instruct` mode automatically applies the model's template to the chat's prompt, leading to higher quality outputs. -* Easy switching between conversations and starting new ones through the "Past chats" menu in the main interface tab. -* Flexible text generation through autocompletion in the Default/Notebook tabs without being limited to chat turns. Send formatted chat conversations from the Chat tab to these tabs. -* Multiple sampling parameters and options for sophisticated text generation control. -* Quick downloading and loading of new models through the interface without restarting, using the "Model" tab. +* Three chat modes: `instruct`, `chat-instruct`, and `chat`, allowing for both instruction-following and casual conversations with characters. `chat-instruct` mode automatically applies the model's template to the chat prompt, leading to higher quality outputs. +* "Past chats" menu to quickly switch between conversations and start new ones in the Chat tab. +* Free-form generation in the Default/Notebook tabs without being limited to chat turns. Send formatted chat conversations from the Chat tab to these tabs. +* Multiple sampling parameters and generation options for sophisticated text generation control. +* Easy switching between different models through the UI without restarting, using the "Model" tab. * Simple LoRA fine-tuning tool to customize models with your data. -* Self-contained dependencies in the `installer_files` folder, avoiding interference with the system's Python environment. Precompiled Python wheels for the backends are in the `requirements.txt` and are transparently compiled using GitHub Actions. +* All in one folder. The requirements are installed in a self-contained `installer_files` folder that doesn't interfere with the system's environment. * Extensions support, including numerous built-in and user-contributed extensions. See [the wiki](https://github.com/oobabooga/text-generation-webui/wiki/07-%E2%80%90-Extensions) and [the extensions directory](https://github.com/oobabooga/text-generation-webui-extensions) for details. ## How to install From 1124f71cf3c4e5edb9c9a5e8e8c49314e4a226c7 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 20 Aug 2024 11:19:46 -0300 Subject: [PATCH 89/91] Update README.md --- README.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 9d73f910..22ceeeb8 100644 --- a/README.md +++ b/README.md @@ -12,9 +12,9 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. * Multiple backends for text generation in a single UI and API, including [Transformers](https://github.com/huggingface/transformers), [llama.cpp](https://github.com/ggerganov/llama.cpp) (through [llama-cpp-python](https://github.com/abetlen/llama-cpp-python)), [ExLlamaV2](https://github.com/turboderp/exllamav2), [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). [AutoAWQ](https://github.com/casper-hansen/AutoAWQ), [HQQ](https://github.com/mobiusml/hqq), and [AQLM](https://github.com/Vahe1994/AQLM) are also supported through the Transformers loader. * OpenAI-compatible API server with Chat and Completions endpoints – see the [examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples). -* Automatic prompt formatting for each model using the Jinja2 template in its metadata, ensuring high-quality outputs without manual setup. -* Three chat modes: `instruct`, `chat-instruct`, and `chat`, allowing for both instruction-following and casual conversations with characters. `chat-instruct` mode automatically applies the model's template to the chat prompt, leading to higher quality outputs. -* "Past chats" menu to quickly switch between conversations and start new ones in the Chat tab. +* Automatic prompt formatting for each model using the Jinja2 template in its metadata. +* Three chat modes: `instruct`, `chat-instruct`, and `chat`, allowing for both instruction-following and casual conversations with characters. `chat-instruct` mode automatically applies the model's template to the chat prompt, ensuring high-quality outputs without manual setup. +* "Past chats" menu to quickly switch between conversations and start new ones. * Free-form generation in the Default/Notebook tabs without being limited to chat turns. Send formatted chat conversations from the Chat tab to these tabs. * Multiple sampling parameters and generation options for sophisticated text generation control. * Easy switching between different models through the UI without restarting, using the "Model" tab. @@ -30,9 +30,11 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. 4) Once the installation ends, browse to `http://localhost:7860`. 5) Have fun! -To restart the web UI in the future, just run the `start_` script again. This script creates an `installer_files` folder where it sets up the project's requirements. If you need to reinstall the requirements, you can simply delete that folder and start the web UI again. +To restart the web UI in the future, run the `start_` script again. -The script accepts command-line flags. Alternatively, you can edit the `CMD_FLAGS.txt` file with a text editor and add your flags there, such as `--api` in case you need to use the API. +This script creates an `installer_files` folder where it sets up the project's requirements. If you need to reinstall the requirements, just delete that folder and start the web UI again. + +The script accepts command-line flags, such as `./start_linux.sh --help`. Alternatively, you can edit the `CMD_FLAGS.txt` file with a text editor and add your flags there, such as `--api` in case you need to use the API. To get updates in the future, run `update_wizard_linux.sh`, `update_wizard_windows.bat`, `update_wizard_macos.sh`, or `update_wizard_wsl.bat`. From c24966c5914e974b509e611f01527e1a2854b4c3 Mon Sep 17 00:00:00 2001 From: joachimchauvet <13382894+joachimchauvet@users.noreply.github.com> Date: Wed, 21 Aug 2024 21:33:45 +0300 Subject: [PATCH 90/91] update API documentation with examples to list/load models (#5902) --- docs/12 - OpenAI API.md | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/docs/12 - OpenAI API.md b/docs/12 - OpenAI API.md index b00a1f34..9b4f89bf 100644 --- a/docs/12 - OpenAI API.md +++ b/docs/12 - OpenAI API.md @@ -19,7 +19,7 @@ Add `--api` to your command-line flags. ### Examples -For the documentation with all the parameters and their types, consult `http://127.0.0.1:5000/docs` or the [typing.py](https://github.com/oobabooga/text-generation-webui/blob/main/extensions/openai/typing.py) file. +For the documentation with all the endpoints, parameters and their types, consult `http://127.0.0.1:5000/docs` or the [typing.py](https://github.com/oobabooga/text-generation-webui/blob/main/extensions/openai/typing.py) file. The official examples in the [OpenAI documentation](https://platform.openai.com/docs/api-reference) should also work, and the same parameters apply (although the API here has more optional parameters). @@ -114,6 +114,30 @@ curl -k http://127.0.0.1:5000/v1/internal/logits \ }' ``` +#### List models + +```shell +curl -k http://127.0.0.1:5000/v1/internal/model/list \ + -H "Content-Type: application/json" +``` + +#### Load model + +```shell +curl -k http://127.0.0.1:5000/v1/internal/model/load \ + -H "Content-Type: application/json" \ + -d '{ + "model_name": "model_name", + "args": { + "load_in_4bit": true, + "n_gpu_layers": 12 + }, + "settings": { + "instruction_template": "Alpaca" + } + }' +``` + #### Python chat example ```python From 1f288b40720bbef6ed32ad5dee1899b9e5949925 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 22 Aug 2024 12:40:15 -0700 Subject: [PATCH 91/91] Bump ExLlamaV2 to 0.1.9 --- requirements.txt | 10 +++++----- requirements_amd.txt | 6 +++--- requirements_amd_noavx2.txt | 6 +++--- requirements_apple_intel.txt | 2 +- requirements_apple_silicon.txt | 2 +- requirements_noavx2.txt | 10 +++++----- 6 files changed, 18 insertions(+), 18 deletions(-) diff --git a/requirements.txt b/requirements.txt index 08b7d56d..a39a7c70 100644 --- a/requirements.txt +++ b/requirements.txt @@ -53,11 +53,11 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/te https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.89+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels -https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements_amd.txt b/requirements_amd.txt index 52e36510..5ff86863 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -40,9 +40,9 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp # AMD wheels https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.89+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.89+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 18a81d04..c0d094e8 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -38,9 +38,9 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels -https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index af02904b..cbcf1175 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -36,4 +36,4 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/me https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl +https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9-py3-none-any.whl diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 8cdd8519..d5cb3675 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -38,4 +38,4 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/me https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl +https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9-py3-none-any.whl diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index d22eb72c..bfe57329 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -53,11 +53,11 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/te https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.89+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels -https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"