From 2a92a842cec3f95739dd1034067a1d8568e44c7a Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 26 Mar 2024 16:32:20 -0300 Subject: [PATCH 01/12] Bump gradio to 4.23 (#5758) --- css/main.css | 5 +++ extensions/gallery/script.py | 2 +- extensions/whisper_stt/script.py | 2 +- js/switch_tabs.js | 8 ++-- modules/block_requests.py | 3 +- modules/gradio_hijack.py | 72 ++++++++++++++++++++++++++++++++ modules/shared.py | 1 - modules/text_generation.py | 14 ++----- modules/ui.py | 1 - modules/ui_chat.py | 32 +++++++------- modules/ui_default.py | 6 +-- modules/ui_model_menu.py | 2 +- modules/ui_notebook.py | 6 +-- modules/ui_parameters.py | 1 - modules/ui_session.py | 4 +- requirements.txt | 2 +- requirements_amd.txt | 2 +- requirements_amd_noavx2.txt | 2 +- requirements_apple_intel.txt | 2 +- requirements_apple_silicon.txt | 2 +- requirements_cpu_only.txt | 2 +- requirements_cpu_only_noavx2.txt | 2 +- requirements_noavx2.txt | 2 +- requirements_nowheels.txt | 2 +- server.py | 15 +++---- settings-template.yaml | 1 - 26 files changed, 130 insertions(+), 63 deletions(-) create mode 100644 modules/gradio_hijack.py diff --git a/css/main.css b/css/main.css index b41985d8..9681a5e3 100644 --- a/css/main.css +++ b/css/main.css @@ -89,6 +89,11 @@ div.svelte-15lo0d8 > *, div.svelte-15lo0d8 > .form > * { flex-wrap: nowrap; } +gradio-app > :first-child { + padding-left: var(--size-4) !important; + padding-right: var(--size-4) !important; +} + .header_bar { background-color: #f7f7f7; box-shadow: 0 2px 3px rgba(22 22 22 / 35%); diff --git a/extensions/gallery/script.py b/extensions/gallery/script.py index 1cb7f27f..1bb8068a 100644 --- a/extensions/gallery/script.py +++ b/extensions/gallery/script.py @@ -119,7 +119,7 @@ def ui(): samples_per_page=settings["gallery-items_per_page"] ) - filter_box.change(lambda: None, None, None, _js=f'() => {{{custom_js()}; gotoFirstPage()}}').success( + filter_box.change(lambda: None, None, None, js=f'() => {{{custom_js()}; gotoFirstPage()}}').success( filter_cards, filter_box, gallery).then( lambda x: gr.update(elem_classes='highlighted-border' if x != '' else ''), filter_box, filter_box, show_progress=False) diff --git a/extensions/whisper_stt/script.py b/extensions/whisper_stt/script.py index cdc55687..efa58ce9 100644 --- a/extensions/whisper_stt/script.py +++ b/extensions/whisper_stt/script.py @@ -64,7 +64,7 @@ def ui(): audio.change( auto_transcribe, [audio, auto_submit, whipser_model, whipser_language], [shared.gradio['textbox'], audio]).then( - None, auto_submit, None, _js="(check) => {if (check) { document.getElementById('Generate').click() }}") + None, auto_submit, None, js="(check) => {if (check) { document.getElementById('Generate').click() }}") whipser_model.change(lambda x: params.update({"whipser_model": x}), whipser_model, None) whipser_language.change(lambda x: params.update({"whipser_language": x}), whipser_language, None) diff --git a/js/switch_tabs.js b/js/switch_tabs.js index 75d56367..0564f891 100644 --- a/js/switch_tabs.js +++ b/js/switch_tabs.js @@ -32,27 +32,27 @@ function switch_to_chat() { } function switch_to_default() { - let default_tab_button = main_parent.childNodes[0].childNodes[4]; + let default_tab_button = main_parent.childNodes[0].childNodes[5]; default_tab_button.click(); scrollToTop(); } function switch_to_notebook() { - let notebook_tab_button = main_parent.childNodes[0].childNodes[7]; + let notebook_tab_button = main_parent.childNodes[0].childNodes[9]; notebook_tab_button.click(); findButtonsByText("Raw")[1].click(); scrollToTop(); } function switch_to_generation_parameters() { - let parameters_tab_button = main_parent.childNodes[0].childNodes[10]; + let parameters_tab_button = main_parent.childNodes[0].childNodes[13]; parameters_tab_button.click(); findButtonsByText("Generation")[0].click(); scrollToTop(); } function switch_to_character() { - let parameters_tab_button = main_parent.childNodes[0].childNodes[10]; + let parameters_tab_button = main_parent.childNodes[0].childNodes[13]; parameters_tab_button.click(); findButtonsByText("Character")[0].click(); scrollToTop(); diff --git a/modules/block_requests.py b/modules/block_requests.py index fbc45de4..ac6c6800 100644 --- a/modules/block_requests.py +++ b/modules/block_requests.py @@ -43,8 +43,9 @@ def my_open(*args, **kwargs): with original_open(*args, **kwargs) as f: file_contents = f.read() - file_contents = file_contents.replace(b'\t\t', b'') + file_contents = file_contents.replace(b'\t\t', b'') file_contents = file_contents.replace(b'cdnjs.cloudflare.com', b'127.0.0.1') + return io.BytesIO(file_contents) else: return original_open(*args, **kwargs) diff --git a/modules/gradio_hijack.py b/modules/gradio_hijack.py new file mode 100644 index 00000000..2ddd983a --- /dev/null +++ b/modules/gradio_hijack.py @@ -0,0 +1,72 @@ +''' +Copied from: https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14184 +''' + +import inspect +import warnings +from functools import wraps + +import gradio as gr + + +class GradioDeprecationWarning(DeprecationWarning): + pass + + +def repair(grclass): + if not getattr(grclass, 'EVENTS', None): + return + + @wraps(grclass.__init__) + def __repaired_init__(self, *args, tooltip=None, source=None, original=grclass.__init__, **kwargs): + if source: + kwargs["sources"] = [source] + + allowed_kwargs = inspect.signature(original).parameters + fixed_kwargs = {} + for k, v in kwargs.items(): + if k in allowed_kwargs: + fixed_kwargs[k] = v + else: + warnings.warn(f"unexpected argument for {grclass.__name__}: {k}", GradioDeprecationWarning, stacklevel=2) + + original(self, *args, **fixed_kwargs) + + self.webui_tooltip = tooltip + + for event in self.EVENTS: + replaced_event = getattr(self, str(event)) + + def fun(*xargs, _js=None, replaced_event=replaced_event, **xkwargs): + if _js: + xkwargs['js'] = _js + + return replaced_event(*xargs, **xkwargs) + + setattr(self, str(event), fun) + + grclass.__init__ = __repaired_init__ + grclass.update = gr.update + + +for component in set(gr.components.__all__ + gr.layouts.__all__): + repair(getattr(gr, component, None)) + + +class Dependency(gr.events.Dependency): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def then(*xargs, _js=None, **xkwargs): + if _js: + xkwargs['js'] = _js + + return original_then(*xargs, **xkwargs) + + original_then = self.then + self.then = then + + +gr.events.Dependency = Dependency + +gr.Box = gr.Group diff --git a/modules/shared.py b/modules/shared.py index c2a44eb8..ecfdb3be 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -46,7 +46,6 @@ settings = { 'truncation_length_min': 0, 'truncation_length_max': 200000, 'max_tokens_second': 0, - 'max_updates_second': 0, 'prompt_lookup_num_tokens': 0, 'custom_stopping_strings': '', 'custom_token_bans': '', diff --git a/modules/text_generation.py b/modules/text_generation.py index d1a59a9d..d4b0bb00 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -81,19 +81,16 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap state = copy.deepcopy(state) state['stream'] = True - min_update_interval = 0 - if state.get('max_updates_second', 0) > 0: - min_update_interval = 1 / state['max_updates_second'] - # Generate for reply in generate_func(question, original_question, seed, state, stopping_strings, is_chat=is_chat): reply, stop_found = apply_stopping_strings(reply, all_stop_strings) if escape_html: reply = html.escape(reply) + if is_stream: cur_time = time.time() - # Maximum number of tokens/second + # Limit number of tokens/second to make text readable in real time if state['max_tokens_second'] > 0: diff = 1 / state['max_tokens_second'] - (cur_time - last_update) if diff > 0: @@ -101,13 +98,8 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap last_update = time.time() yield reply - - # Limit updates to avoid lag in the Gradio UI - # API updates are not limited else: - if cur_time - last_update > min_update_interval: - last_update = cur_time - yield reply + yield reply if stop_found or (state['max_tokens_second'] > 0 and shared.stop_everything): break diff --git a/modules/ui.py b/modules/ui.py index f973fa6f..a21752e3 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -117,7 +117,6 @@ def list_interface_input_elements(): 'max_new_tokens', 'auto_max_new_tokens', 'max_tokens_second', - 'max_updates_second', 'prompt_lookup_num_tokens', 'seed', 'temperature', diff --git a/modules/ui_chat.py b/modules/ui_chat.py index 293d253e..8ce7b610 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -181,7 +181,7 @@ def create_event_handlers(): chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then( - lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') + lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['textbox'].submit( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( @@ -189,28 +189,28 @@ def create_event_handlers(): chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then( - lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') + lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Regenerate'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( partial(chat.generate_chat_reply_wrapper, regenerate=True), gradio(inputs), gradio('display', 'history'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then( - lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') + lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Continue'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( partial(chat.generate_chat_reply_wrapper, _continue=True), gradio(inputs), gradio('display', 'history'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then( - lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') + lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Impersonate'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda x: x, gradio('textbox'), gradio('Chat input'), show_progress=False).then( chat.impersonate_wrapper, gradio(inputs), gradio('textbox', 'display'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') + lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Replace last reply'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( @@ -288,7 +288,7 @@ def create_event_handlers(): chat.redraw_html, gradio(reload_arr), gradio('display')).then( lambda x: gr.update(choices=(histories := chat.find_all_histories(x)), value=histories[0]), gradio('interface_state'), gradio('unique_id')).then( chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then( - lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_chat()}}') + lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_chat()}}') shared.gradio['character_menu'].change( chat.load_character, gradio('character_menu', 'name1', 'name2'), gradio('name1', 'name2', 'character_picture', 'greeting', 'context')).success( @@ -296,7 +296,7 @@ def create_event_handlers(): chat.load_latest_history, gradio('interface_state'), gradio('history')).then( chat.redraw_html, gradio(reload_arr), gradio('display')).then( lambda x: gr.update(choices=(histories := chat.find_all_histories(x)), value=histories[0]), gradio('interface_state'), gradio('unique_id')).then( - lambda: None, None, None, _js=f'() => {{{ui.update_big_picture_js}; updateBigPicture()}}') + lambda: None, None, None, js=f'() => {{{ui.update_big_picture_js}; updateBigPicture()}}') shared.gradio['mode'].change( lambda x: [gr.update(visible=x != 'instruct'), gr.update(visible=x == 'chat-instruct')], gradio('mode'), gradio('chat_style', 'chat-instruct_command'), show_progress=False).then( @@ -332,15 +332,15 @@ def create_event_handlers(): shared.gradio['save_chat_history'].click( lambda x: json.dumps(x, indent=4), gradio('history'), gradio('temporary_text')).then( - None, gradio('temporary_text', 'character_menu', 'mode'), None, _js=f'(hist, char, mode) => {{{ui.save_files_js}; saveHistory(hist, char, mode)}}') + None, gradio('temporary_text', 'character_menu', 'mode'), None, js=f'(hist, char, mode) => {{{ui.save_files_js}; saveHistory(hist, char, mode)}}') shared.gradio['Submit character'].click( chat.upload_character, gradio('upload_json', 'upload_img_bot'), gradio('character_menu')).then( - lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}') + lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}') shared.gradio['Submit tavern character'].click( chat.upload_tavern_character, gradio('upload_img_tavern', 'tavern_json'), gradio('character_menu')).then( - lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}') + lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}') shared.gradio['upload_json'].upload(lambda: gr.update(interactive=True), None, gradio('Submit character')) shared.gradio['upload_json'].clear(lambda: gr.update(interactive=False), None, gradio('Submit character')) @@ -354,28 +354,28 @@ def create_event_handlers(): ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda x: x.update({'mode': 'instruct', 'history': {'internal': [], 'visible': []}}), gradio('interface_state'), None).then( partial(chat.generate_chat_prompt, 'Input'), gradio('interface_state'), gradio('textbox-default')).then( - lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_default()}}') + lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_default()}}') shared.gradio['send_instruction_to_notebook'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda x: x.update({'mode': 'instruct', 'history': {'internal': [], 'visible': []}}), gradio('interface_state'), None).then( partial(chat.generate_chat_prompt, 'Input'), gradio('interface_state'), gradio('textbox-notebook')).then( - lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}') + lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}') shared.gradio['send_instruction_to_negative_prompt'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda x: x.update({'mode': 'instruct', 'history': {'internal': [], 'visible': []}}), gradio('interface_state'), None).then( partial(chat.generate_chat_prompt, 'Input'), gradio('interface_state'), gradio('negative_prompt')).then( - lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_generation_parameters()}}') + lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_generation_parameters()}}') shared.gradio['send-chat-to-default'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( partial(chat.generate_chat_prompt, '', _continue=True), gradio('interface_state'), gradio('textbox-default')).then( - lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_default()}}') + lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_default()}}') shared.gradio['send-chat-to-notebook'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( partial(chat.generate_chat_prompt, '', _continue=True), gradio('interface_state'), gradio('textbox-notebook')).then( - lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}') + lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}') - shared.gradio['show_controls'].change(None, gradio('show_controls'), None, _js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}') + shared.gradio['show_controls'].change(None, gradio('show_controls'), None, js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}') diff --git a/modules/ui_default.py b/modules/ui_default.py index 7db6f0d9..1f962551 100644 --- a/modules/ui_default.py +++ b/modules/ui_default.py @@ -67,21 +67,21 @@ def create_event_handlers(): ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') + lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['textbox-default'].submit( lambda x: x, gradio('textbox-default'), gradio('last_input-default')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') + lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['markdown_render-default'].click(lambda x: x, gradio('output_textbox'), gradio('markdown-default'), queue=False) shared.gradio['Continue-default'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, [shared.gradio['output_textbox']] + gradio(inputs)[1:], gradio(outputs), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') + lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Stop-default'].click(stop_everything_event, None, None, queue=False) shared.gradio['prompt_menu-default'].change(load_prompt, gradio('prompt_menu-default'), gradio('textbox-default'), show_progress=False) diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index a31bbcf5..9f2729e2 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -75,7 +75,7 @@ def create_ui(): with gr.Row(): with gr.Column(): shared.gradio['loader'] = gr.Dropdown(label="Model loader", choices=loaders.loaders_and_params.keys(), value=None) - with gr.Box(): + with gr.Blocks(): with gr.Row(): with gr.Column(): with gr.Blocks(): diff --git a/modules/ui_notebook.py b/modules/ui_notebook.py index 6bd5c919..a7c62baf 100644 --- a/modules/ui_notebook.py +++ b/modules/ui_notebook.py @@ -67,14 +67,14 @@ def create_event_handlers(): ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') + lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['textbox-notebook'].submit( lambda x: x, gradio('textbox-notebook'), gradio('last_input-notebook')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') + lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Undo'].click(lambda x: x, gradio('last_input-notebook'), gradio('textbox-notebook'), show_progress=False) shared.gradio['markdown_render-notebook'].click(lambda x: x, gradio('textbox-notebook'), gradio('markdown-notebook'), queue=False) @@ -83,7 +83,7 @@ def create_event_handlers(): ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') + lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}') shared.gradio['Stop-notebook'].click(stop_everything_event, None, None, queue=False) shared.gradio['prompt_menu-notebook'].change(load_prompt, gradio('prompt_menu-notebook'), gradio('textbox-notebook'), show_progress=False) diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py index f809b535..51db8929 100644 --- a/modules/ui_parameters.py +++ b/modules/ui_parameters.py @@ -73,7 +73,6 @@ def create_ui(default_preset): with gr.Column(): shared.gradio['truncation_length'] = gr.Slider(value=get_truncation_length(), minimum=shared.settings['truncation_length_min'], maximum=shared.settings['truncation_length_max'], step=256, label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length. Most models require this to be at most 2048.') shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.') - shared.gradio['max_updates_second'] = gr.Slider(value=shared.settings['max_updates_second'], minimum=0, maximum=24, step=1, label='Maximum UI updates/second', info='Set this if you experience lag in the UI during streaming.') shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.') shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=2, value=shared.settings["custom_stopping_strings"] or None, label='Custom stopping strings', info='In addition to the defaults. Written between "" and separated by commas.', placeholder='"\\n", "\\nYou:"') diff --git a/modules/ui_session.py b/modules/ui_session.py index 989046ea..08929c33 100644 --- a/modules/ui_session.py +++ b/modules/ui_session.py @@ -32,10 +32,10 @@ def create_ui(): # Reset interface event shared.gradio['reset_interface'].click( set_interface_arguments, gradio('extensions_menu', 'bool_menu'), None).then( - lambda: None, None, None, _js='() => {document.body.innerHTML=\'

Reloading...

\'; setTimeout(function(){location.reload()},2500); return []}') + lambda: None, None, None, js='() => {document.body.innerHTML=\'

Reloading...

\'; setTimeout(function(){location.reload()},2500); return []}') shared.gradio['toggle_dark_mode'].click( - lambda: None, None, None, _js='() => {document.getElementsByTagName("body")[0].classList.toggle("dark")}').then( + lambda: None, None, None, js='() => {document.getElementsByTagName("body")[0].classList.toggle("dark")}').then( lambda x: 'dark' if x == 'light' else 'light', gradio('theme_state'), gradio('theme_state')) shared.gradio['save_settings'].click( diff --git a/requirements.txt b/requirements.txt index 8ab4e61b..cc12866e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ bitsandbytes==0.43.* colorama datasets einops -gradio==3.50.* +gradio==4.23.* hqq==0.1.5 jinja2==3.1.2 lm_eval==0.3.0 diff --git a/requirements_amd.txt b/requirements_amd.txt index f3045d56..19689c97 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -2,7 +2,7 @@ accelerate==0.27.* colorama datasets einops -gradio==3.50.* +gradio==4.23.* hqq==0.1.5 jinja2==3.1.2 lm_eval==0.3.0 diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 7004e388..07aa8526 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -2,7 +2,7 @@ accelerate==0.27.* colorama datasets einops -gradio==3.50.* +gradio==4.23.* hqq==0.1.5 jinja2==3.1.2 lm_eval==0.3.0 diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 2a398f91..729bb1d6 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -2,7 +2,7 @@ accelerate==0.27.* colorama datasets einops -gradio==3.50.* +gradio==4.23.* hqq==0.1.5 jinja2==3.1.2 lm_eval==0.3.0 diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index fc10bee8..3e96ed99 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -2,7 +2,7 @@ accelerate==0.27.* colorama datasets einops -gradio==3.50.* +gradio==4.23.* hqq==0.1.5 jinja2==3.1.2 lm_eval==0.3.0 diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index 05cad5cf..e0330536 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -2,7 +2,7 @@ accelerate==0.27.* colorama datasets einops -gradio==3.50.* +gradio==4.23.* hqq==0.1.5 jinja2==3.1.2 lm_eval==0.3.0 diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index 4eeef9e9..41157d1b 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -2,7 +2,7 @@ accelerate==0.27.* colorama datasets einops -gradio==3.50.* +gradio==4.23.* hqq==0.1.5 jinja2==3.1.2 lm_eval==0.3.0 diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 295680fc..b6f0f7e5 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -4,7 +4,7 @@ bitsandbytes==0.43.* colorama datasets einops -gradio==3.50.* +gradio==4.23.* hqq==0.1.5 jinja2==3.1.2 lm_eval==0.3.0 diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index abb9d45d..72507d1a 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -2,7 +2,7 @@ accelerate==0.27.* colorama datasets einops -gradio==3.50.* +gradio==4.23.* hqq==0.1.5 jinja2==3.1.2 lm_eval==0.3.0 diff --git a/server.py b/server.py index 681fe4e7..c6a01830 100644 --- a/server.py +++ b/server.py @@ -18,6 +18,7 @@ warnings.filterwarnings('ignore', category=UserWarning, message='The value passe warnings.filterwarnings('ignore', category=UserWarning, message='Field "model_names" has conflict') with RequestBlocker(): + from modules import gradio_hijack import gradio as gr import matplotlib @@ -145,11 +146,9 @@ def create_interface(): ui_model_menu.create_event_handlers() # Interface launch events - if shared.settings['dark_theme']: - shared.gradio['interface'].load(lambda: None, None, None, _js="() => document.getElementsByTagName('body')[0].classList.add('dark')") - - shared.gradio['interface'].load(lambda: None, None, None, _js=f"() => {{{js}}}") - shared.gradio['interface'].load(None, gradio('show_controls'), None, _js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}') + shared.gradio['interface'].load(lambda: None, None, None, js=f"() => {{if ({str(shared.settings['dark_theme']).lower()}) {{ document.getElementsByTagName('body')[0].classList.add('dark'); }} }}") + shared.gradio['interface'].load(lambda: None, None, None, js=f"() => {{{js}}}") + shared.gradio['interface'].load(lambda x: None, gradio('show_controls'), None, js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}') shared.gradio['interface'].load(partial(ui.apply_interface_values, {}, use_persistent=True), None, gradio(ui.list_interface_input_elements()), show_progress=False) shared.gradio['interface'].load(chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display')) @@ -157,9 +156,10 @@ def create_interface(): extensions_module.create_extensions_block() # Extensions block # Launch the interface - shared.gradio['interface'].queue(concurrency_count=64) + shared.gradio['interface'].queue() with OpenMonkeyPatch(): shared.gradio['interface'].launch( + max_threads=64, prevent_thread_lock=True, share=shared.args.share, server_name=None if not shared.args.listen else (shared.args.listen_host or '0.0.0.0'), @@ -168,7 +168,8 @@ def create_interface(): auth=auth or None, ssl_verify=False if (shared.args.ssl_keyfile or shared.args.ssl_certfile) else True, ssl_keyfile=shared.args.ssl_keyfile, - ssl_certfile=shared.args.ssl_certfile + ssl_certfile=shared.args.ssl_certfile, + allowed_paths=["."] ) diff --git a/settings-template.yaml b/settings-template.yaml index bf057be7..c239057f 100644 --- a/settings-template.yaml +++ b/settings-template.yaml @@ -15,7 +15,6 @@ truncation_length: 2048 truncation_length_min: 0 truncation_length_max: 200000 max_tokens_second: 0 -max_updates_second: 0 prompt_lookup_num_tokens: 0 custom_stopping_strings: '' custom_token_bans: '' From 8c9aca239a3da0279431f19caeecc67c60b72404 Mon Sep 17 00:00:00 2001 From: Yiximail Date: Wed, 27 Mar 2024 03:33:09 +0800 Subject: [PATCH 02/12] Fix prompt incorrectly set to empty when suffix is empty string (#5757) --- modules/chat.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/modules/chat.py b/modules/chat.py index c8516c59..5fb5523c 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -161,12 +161,14 @@ def generate_chat_prompt(user_input, state, **kwargs): prompt = instruction_template.render(messages=outer_messages) suffix = get_generation_prompt(instruct_renderer, impersonate=False)[1] - prompt = prompt[:-len(suffix)] + if len(suffix) > 0: + prompt = prompt[:-len(suffix)] else: if _continue: suffix = get_generation_prompt(renderer, impersonate=impersonate)[1] - prompt = prompt[:-len(suffix)] + if len(suffix) > 0: + prompt = prompt[:-len(suffix)] else: prefix = get_generation_prompt(renderer, impersonate=impersonate)[0] if state['mode'] == 'chat' and not impersonate: From bdcf31035f06d666f01339c9af2447d6c2b83bcc Mon Sep 17 00:00:00 2001 From: Yiximail Date: Wed, 27 Mar 2024 03:34:03 +0800 Subject: [PATCH 03/12] Set a default empty string for user_bio to fix #5717 issue (#5722) --- extensions/openai/typing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/openai/typing.py b/extensions/openai/typing.py index af7b094f..5e26ce0c 100644 --- a/extensions/openai/typing.py +++ b/extensions/openai/typing.py @@ -107,7 +107,7 @@ class ChatCompletionRequestParams(BaseModel): context: str | None = Field(default=None, description="Overwrites the value set by character field.") greeting: str | None = Field(default=None, description="Overwrites the value set by character field.") user_name: str | None = Field(default=None, description="Your name (the user). By default, it's \"You\".", alias="name1") - user_bio: str | None = Field(default=None, description="The user description/personality.") + user_bio: str | None = Field(default='', description="The user description/personality.") chat_template_str: str | None = Field(default=None, description="Jinja2 template for chat.") chat_instruct_command: str | None = None From 7cbafc0540a37409ad294d5535685e1c64a92e2b Mon Sep 17 00:00:00 2001 From: wldhx Date: Tue, 26 Mar 2024 23:34:53 +0400 Subject: [PATCH 04/12] docker: Remove obsolete CLI_ARGS variable (#5726) --- docker/.env.example | 2 -- 1 file changed, 2 deletions(-) diff --git a/docker/.env.example b/docker/.env.example index 6e2ad6ac..2de9f0ab 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -2,8 +2,6 @@ # however for me to work i had to specify the exact version for my card ( 2060 ) it was 7.5 # https://developer.nvidia.com/cuda-gpus you can find the version for your card here TORCH_CUDA_ARCH_LIST=7.5 -# your command-line flags go here: -CLI_ARGS=--listen # the port the webui binds to on the host HOST_PORT=7860 # the port the webui binds to inside the container From 9ad116a6e2dcd1fb0690794559012f606734f944 Mon Sep 17 00:00:00 2001 From: Bartowski Date: Tue, 26 Mar 2024 15:35:29 -0400 Subject: [PATCH 05/12] Add config for hyperion and hercules models to use chatml (#5742) --- models/config.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/models/config.yaml b/models/config.yaml index 6bd4afe3..fc7bc44f 100644 --- a/models/config.yaml +++ b/models/config.yaml @@ -190,3 +190,5 @@ instruction_template: 'ChatML' .*synthia: instruction_template: 'Synthia' +.*(hercules|hyperion): + instruction_template: 'ChatML' From 3609ea69e4c4461a4f998bd12cc559d5a016f328 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 26 Mar 2024 16:36:16 -0300 Subject: [PATCH 06/12] Bump aqlm[cpu,gpu] from 1.1.0 to 1.1.2 (#5728) --- requirements.txt | 2 +- requirements_noavx2.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index cc12866e..da47bdfc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ accelerate==0.27.* -aqlm[gpu,cpu]==1.1.0; platform_system == "Linux" +aqlm[gpu,cpu]==1.1.2; platform_system == "Linux" bitsandbytes==0.43.* colorama datasets diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index b6f0f7e5..d8da3136 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -1,5 +1,5 @@ accelerate==0.27.* -aqlm[gpu,cpu]==1.1.0; platform_system == "Linux" +aqlm[gpu,cpu]==1.1.2; platform_system == "Linux" bitsandbytes==0.43.* colorama datasets From 35da6b989d65b546715a1f41b5223931451a9317 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 28 Mar 2024 16:45:03 -0300 Subject: [PATCH 07/12] Organize the parameters tab (#5767) --- docs/03 - Parameters Tab.md | 4 -- extensions/openai/typing.py | 4 -- modules/loaders.py | 6 --- modules/presets.py | 12 +++--- modules/text_generation.py | 2 +- modules/ui.py | 4 -- modules/ui_parameters.py | 73 +++++++++++++++++-------------------- 7 files changed, 40 insertions(+), 65 deletions(-) diff --git a/docs/03 - Parameters Tab.md b/docs/03 - Parameters Tab.md index ca1c203b..06eebe50 100644 --- a/docs/03 - Parameters Tab.md +++ b/docs/03 - Parameters Tab.md @@ -61,10 +61,6 @@ For more information about the parameters, the [transformers documentation](http * **Seed**: Set the Pytorch seed to this number. Note that some loaders do not use Pytorch (notably llama.cpp), and others are not deterministic (ExLlamaV2). For these loaders, the seed has no effect. * **encoder_repetition_penalty**: Also known as the "Hallucinations filter". Used to penalize tokens that are *not* in the prior text. Higher value = more likely to stay in context, lower value = more likely to diverge. * **no_repeat_ngram_size**: If not set to 0, specifies the length of token sets that are completely blocked from repeating at all. Higher values = blocks larger phrases, lower values = blocks words or letters from repeating. Only 0 or high values are a good idea in most cases. -* **min_length**: Minimum generation length in tokens. This is a built-in parameter in the transformers library that has never been very useful. Typically you want to check "Ban the eos_token" instead. -* **num_beams**: Number of beams for beam search. 1 means no beam search. -* **length_penalty**: Used by beam search only. `length_penalty > 0.0` promotes longer sequences, while `length_penalty < 0.0` encourages shorter sequences. -* **early_stopping**: Used by beam search only. When checked, the generation stops as soon as there are "num_beams" complete candidates; otherwise, a heuristic is applied and the generation stops when is it very unlikely to find better candidates (I just copied this from the transformers documentation and have never gotten beam search to generate good results). To the right (or below if you are on mobile), the following parameters are present: diff --git a/extensions/openai/typing.py b/extensions/openai/typing.py index 5e26ce0c..dbfa7167 100644 --- a/extensions/openai/typing.py +++ b/extensions/openai/typing.py @@ -33,10 +33,6 @@ class GenerationOptions(BaseModel): seed: int = -1 encoder_repetition_penalty: float = 1 no_repeat_ngram_size: int = 0 - min_length: int = 0 - num_beams: int = 1 - length_penalty: float = 1 - early_stopping: bool = False truncation_length: int = 0 max_tokens_second: int = 0 prompt_lookup_num_tokens: int = 0 diff --git a/modules/loaders.py b/modules/loaders.py index f1c44a90..60fe8aa6 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -185,13 +185,9 @@ def transformers_samplers(): 'repetition_penalty_range', 'encoder_repetition_penalty', 'no_repeat_ngram_size', - 'min_length', 'seed', 'do_sample', 'penalty_alpha', - 'num_beams', - 'length_penalty', - 'early_stopping', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', @@ -262,7 +258,6 @@ loaders_samplers = { 'repetition_penalty_range', 'encoder_repetition_penalty', 'no_repeat_ngram_size', - 'min_length', 'seed', 'do_sample', 'mirostat_mode', @@ -321,7 +316,6 @@ loaders_samplers = { 'repetition_penalty_range', 'encoder_repetition_penalty', 'no_repeat_ngram_size', - 'min_length', 'seed', 'do_sample', 'mirostat_mode', diff --git a/modules/presets.py b/modules/presets.py index 7a041311..a7cda31c 100644 --- a/modules/presets.py +++ b/modules/presets.py @@ -40,10 +40,6 @@ def default_preset(): 'do_sample': True, 'encoder_repetition_penalty': 1, 'no_repeat_ngram_size': 0, - 'min_length': 0, - 'num_beams': 1, - 'length_penalty': 1, - 'early_stopping': False, 'sampler_priority': 'temperature\ndynamic_temperature\nquadratic_sampling\ntop_k\ntop_p\ntypical_p\nepsilon_cutoff\neta_cutoff\ntfs\ntop_a\nmin_p\nmirostat' } @@ -52,7 +48,7 @@ def presets_params(): return [k for k in default_preset()] -def load_preset(name): +def load_preset(name, verbose=False): generate_params = default_preset() if name not in ['None', None, '']: path = Path(f'presets/{name}.yaml') @@ -65,6 +61,10 @@ def load_preset(name): else: logger.error(f"The preset \"{name}\" does not exist under \"{path}\". Using the default parameters.") + if verbose: + logger.info(f"\"{name}\" preset:") + pprint.PrettyPrinter(indent=4, width=1, sort_dicts=False).pprint(remove_defaults(generate_params)) + return generate_params @@ -74,7 +74,7 @@ def load_preset_memoized(name): def load_preset_for_ui(name, state): - generate_params = load_preset(name) + generate_params = load_preset(name, verbose=True) state.update(generate_params) return state, *[generate_params[k] for k in presets_params()] diff --git a/modules/text_generation.py b/modules/text_generation.py index d4b0bb00..43488852 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -279,7 +279,7 @@ def get_reply_from_output_ids(output_ids, state=None, starting_from=0): def generate_reply_HF(question, original_question, seed, state, stopping_strings=None, is_chat=False): generate_params = {} - for k in ['max_new_tokens', 'temperature', 'temperature_last', 'dynamic_temperature', 'dynatemp_low', 'dynatemp_high', 'dynatemp_exponent', 'smoothing_factor', 'smoothing_curve', 'top_p', 'min_p', 'top_k', 'repetition_penalty', 'presence_penalty', 'frequency_penalty', 'repetition_penalty_range', 'typical_p', 'tfs', 'top_a', 'guidance_scale', 'penalty_alpha', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'do_sample', 'encoder_repetition_penalty', 'no_repeat_ngram_size', 'min_length', 'num_beams', 'length_penalty', 'early_stopping']: + for k in ['max_new_tokens', 'temperature', 'temperature_last', 'dynamic_temperature', 'dynatemp_low', 'dynatemp_high', 'dynatemp_exponent', 'smoothing_factor', 'smoothing_curve', 'top_p', 'min_p', 'top_k', 'repetition_penalty', 'presence_penalty', 'frequency_penalty', 'repetition_penalty_range', 'typical_p', 'tfs', 'top_a', 'guidance_scale', 'penalty_alpha', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'do_sample', 'encoder_repetition_penalty', 'no_repeat_ngram_size']: if k in state: generate_params[k] = state[k] diff --git a/modules/ui.py b/modules/ui.py index a21752e3..56b1518c 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -139,12 +139,8 @@ def list_interface_input_elements(): 'repetition_penalty_range', 'encoder_repetition_penalty', 'no_repeat_ngram_size', - 'min_length', 'do_sample', 'penalty_alpha', - 'num_beams', - 'length_penalty', - 'early_stopping', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py index 51db8929..c020b511 100644 --- a/modules/ui_parameters.py +++ b/modules/ui_parameters.py @@ -30,25 +30,48 @@ def create_ui(default_preset): shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens']) shared.gradio['temperature'] = gr.Slider(0.01, 5, value=generate_params['temperature'], step=0.01, label='temperature') shared.gradio['top_p'] = gr.Slider(0.0, 1.0, value=generate_params['top_p'], step=0.01, label='top_p') - shared.gradio['min_p'] = gr.Slider(0.0, 1.0, value=generate_params['min_p'], step=0.01, label='min_p') shared.gradio['top_k'] = gr.Slider(0, 200, value=generate_params['top_k'], step=1, label='top_k') + shared.gradio['typical_p'] = gr.Slider(0.0, 1.0, value=generate_params['typical_p'], step=0.01, label='typical_p') shared.gradio['repetition_penalty'] = gr.Slider(1.0, 1.5, value=generate_params['repetition_penalty'], step=0.01, label='repetition_penalty') shared.gradio['presence_penalty'] = gr.Slider(0, 2, value=generate_params['presence_penalty'], step=0.05, label='presence_penalty') shared.gradio['frequency_penalty'] = gr.Slider(0, 2, value=generate_params['frequency_penalty'], step=0.05, label='frequency_penalty') shared.gradio['repetition_penalty_range'] = gr.Slider(0, 4096, step=64, value=generate_params['repetition_penalty_range'], label='repetition_penalty_range') - shared.gradio['typical_p'] = gr.Slider(0.0, 1.0, value=generate_params['typical_p'], step=0.01, label='typical_p') - shared.gradio['tfs'] = gr.Slider(0.0, 1.0, value=generate_params['tfs'], step=0.01, label='tfs') - shared.gradio['top_a'] = gr.Slider(0.0, 1.0, value=generate_params['top_a'], step=0.01, label='top_a') - shared.gradio['epsilon_cutoff'] = gr.Slider(0, 9, value=generate_params['epsilon_cutoff'], step=0.01, label='epsilon_cutoff') - shared.gradio['eta_cutoff'] = gr.Slider(0, 20, value=generate_params['eta_cutoff'], step=0.01, label='eta_cutoff') + shared.gradio['do_sample'] = gr.Checkbox(value=generate_params['do_sample'], label='do_sample') + gr.Markdown("[Learn more](https://github.com/oobabooga/text-generation-webui/wiki/03-%E2%80%90-Parameters-Tab)") with gr.Column(): + with gr.Group(): + shared.gradio['auto_max_new_tokens'] = gr.Checkbox(value=shared.settings['auto_max_new_tokens'], label='auto_max_new_tokens', info='Expand max_new_tokens to the available context length.') + shared.gradio['ban_eos_token'] = gr.Checkbox(value=shared.settings['ban_eos_token'], label='Ban the eos_token', info='Forces the model to never end the generation prematurely.') + shared.gradio['add_bos_token'] = gr.Checkbox(value=shared.settings['add_bos_token'], label='Add the bos_token to the beginning of prompts', info='Disabling this can make the replies more creative.') + shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=2, value=shared.settings["custom_stopping_strings"] or None, label='Custom stopping strings', info='In addition to the defaults. Written between "" and separated by commas.', placeholder='"\\n", "\\nYou:"') + shared.gradio['custom_token_bans'] = gr.Textbox(value=shared.settings['custom_token_bans'] or None, label='Token bans', info='Specific token IDs to ban from generating, comma-separated. The IDs can be found in the Default or Notebook tab.') + + shared.gradio['penalty_alpha'] = gr.Slider(0, 5, value=generate_params['penalty_alpha'], label='penalty_alpha', info='For Contrastive Search. do_sample must be unchecked.') shared.gradio['guidance_scale'] = gr.Slider(-0.5, 2.5, step=0.05, value=generate_params['guidance_scale'], label='guidance_scale', info='For CFG. 1.5 is a good value.') shared.gradio['negative_prompt'] = gr.Textbox(value=shared.settings['negative_prompt'], label='Negative prompt', lines=3, elem_classes=['add_scrollbar']) - shared.gradio['penalty_alpha'] = gr.Slider(0, 5, value=generate_params['penalty_alpha'], label='penalty_alpha', info='For Contrastive Search. do_sample must be unchecked.') shared.gradio['mirostat_mode'] = gr.Slider(0, 2, step=1, value=generate_params['mirostat_mode'], label='mirostat_mode', info='mode=1 is for llama.cpp only.') shared.gradio['mirostat_tau'] = gr.Slider(0, 10, step=0.01, value=generate_params['mirostat_tau'], label='mirostat_tau') shared.gradio['mirostat_eta'] = gr.Slider(0, 1, step=0.01, value=generate_params['mirostat_eta'], label='mirostat_eta') + shared.gradio['epsilon_cutoff'] = gr.Slider(0, 9, value=generate_params['epsilon_cutoff'], step=0.01, label='epsilon_cutoff') + shared.gradio['eta_cutoff'] = gr.Slider(0, 20, value=generate_params['eta_cutoff'], step=0.01, label='eta_cutoff') + shared.gradio['encoder_repetition_penalty'] = gr.Slider(0.8, 1.5, value=generate_params['encoder_repetition_penalty'], step=0.01, label='encoder_repetition_penalty') + shared.gradio['no_repeat_ngram_size'] = gr.Slider(0, 20, step=1, value=generate_params['no_repeat_ngram_size'], label='no_repeat_ngram_size') + + with gr.Column(): + with gr.Row() as shared.gradio['grammar_file_row']: + shared.gradio['grammar_file'] = gr.Dropdown(value='None', choices=utils.get_available_grammars(), label='Load grammar from file (.gbnf)', elem_classes='slim-dropdown') + ui.create_refresh_button(shared.gradio['grammar_file'], lambda: None, lambda: {'choices': utils.get_available_grammars()}, 'refresh-button', interactive=not mu) + shared.gradio['save_grammar'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu) + shared.gradio['delete_grammar'] = gr.Button('🗑️ ', elem_classes='refresh-button', interactive=not mu) + + shared.gradio['grammar_string'] = gr.Textbox(value='', label='Grammar', lines=16, elem_classes=['add_scrollbar', 'monospace']) + + with gr.Row(): + with gr.Column(): + shared.gradio['min_p'] = gr.Slider(0.0, 1.0, value=generate_params['min_p'], step=0.01, label='min_p') + shared.gradio['tfs'] = gr.Slider(0.0, 1.0, value=generate_params['tfs'], step=0.01, label='tfs') + shared.gradio['top_a'] = gr.Slider(0.0, 1.0, value=generate_params['top_a'], step=0.01, label='top_a') shared.gradio['smoothing_factor'] = gr.Slider(0.0, 10.0, value=generate_params['smoothing_factor'], step=0.01, label='smoothing_factor', info='Activates Quadratic Sampling.') shared.gradio['smoothing_curve'] = gr.Slider(1.0, 10.0, value=generate_params['smoothing_curve'], step=0.01, label='smoothing_curve', info='Adjusts the dropoff curve of Quadratic Sampling.') shared.gradio['dynamic_temperature'] = gr.Checkbox(value=generate_params['dynamic_temperature'], label='dynamic_temperature') @@ -56,46 +79,16 @@ def create_ui(default_preset): shared.gradio['dynatemp_high'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_high'], step=0.01, label='dynatemp_high', visible=generate_params['dynamic_temperature']) shared.gradio['dynatemp_exponent'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_exponent'], step=0.01, label='dynatemp_exponent', visible=generate_params['dynamic_temperature']) shared.gradio['temperature_last'] = gr.Checkbox(value=generate_params['temperature_last'], label='temperature_last', info='Moves temperature/dynamic temperature/quadratic sampling to the end of the sampler stack, ignoring their positions in "Sampler priority".') - shared.gradio['do_sample'] = gr.Checkbox(value=generate_params['do_sample'], label='do_sample') - shared.gradio['seed'] = gr.Number(value=shared.settings['seed'], label='Seed (-1 for random)') - with gr.Accordion('Other parameters', open=False): - shared.gradio['encoder_repetition_penalty'] = gr.Slider(0.8, 1.5, value=generate_params['encoder_repetition_penalty'], step=0.01, label='encoder_repetition_penalty') - shared.gradio['no_repeat_ngram_size'] = gr.Slider(0, 20, step=1, value=generate_params['no_repeat_ngram_size'], label='no_repeat_ngram_size') - shared.gradio['min_length'] = gr.Slider(0, 2000, step=1, value=generate_params['min_length'], label='min_length') - shared.gradio['num_beams'] = gr.Slider(1, 20, step=1, value=generate_params['num_beams'], label='num_beams', info='For Beam Search, along with length_penalty and early_stopping.') - shared.gradio['length_penalty'] = gr.Slider(-5, 5, value=generate_params['length_penalty'], label='length_penalty') - shared.gradio['early_stopping'] = gr.Checkbox(value=generate_params['early_stopping'], label='early_stopping') + shared.gradio['sampler_priority'] = gr.Textbox(value=generate_params['sampler_priority'], lines=12, label='Sampler priority', info='Parameter names separated by new lines or commas.') - gr.Markdown("[Learn more](https://github.com/oobabooga/text-generation-webui/wiki/03-%E2%80%90-Parameters-Tab)") - - with gr.Column(): - with gr.Row(): with gr.Column(): shared.gradio['truncation_length'] = gr.Slider(value=get_truncation_length(), minimum=shared.settings['truncation_length_min'], maximum=shared.settings['truncation_length_max'], step=256, label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length. Most models require this to be at most 2048.') - shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.') shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.') - - shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=2, value=shared.settings["custom_stopping_strings"] or None, label='Custom stopping strings', info='In addition to the defaults. Written between "" and separated by commas.', placeholder='"\\n", "\\nYou:"') - shared.gradio['custom_token_bans'] = gr.Textbox(value=shared.settings['custom_token_bans'] or None, label='Custom token bans', info='Specific token IDs to ban from generating, comma-separated. The IDs can be found in the Default or Notebook tab.') - - with gr.Column(): - shared.gradio['auto_max_new_tokens'] = gr.Checkbox(value=shared.settings['auto_max_new_tokens'], label='auto_max_new_tokens', info='Expand max_new_tokens to the available context length.') - shared.gradio['ban_eos_token'] = gr.Checkbox(value=shared.settings['ban_eos_token'], label='Ban the eos_token', info='Forces the model to never end the generation prematurely.') - shared.gradio['add_bos_token'] = gr.Checkbox(value=shared.settings['add_bos_token'], label='Add the bos_token to the beginning of prompts', info='Disabling this can make the replies more creative.') + shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.') + shared.gradio['seed'] = gr.Number(value=shared.settings['seed'], label='Seed (-1 for random)') shared.gradio['skip_special_tokens'] = gr.Checkbox(value=shared.settings['skip_special_tokens'], label='Skip special tokens', info='Some specific models need this unset.') shared.gradio['stream'] = gr.Checkbox(value=shared.settings['stream'], label='Activate text streaming') - with gr.Blocks(): - shared.gradio['sampler_priority'] = gr.Textbox(value=generate_params['sampler_priority'], lines=12, label='Sampler priority', info='Parameter names separated by new lines or commas.') - - with gr.Row() as shared.gradio['grammar_file_row']: - shared.gradio['grammar_file'] = gr.Dropdown(value='None', choices=utils.get_available_grammars(), label='Load grammar from file (.gbnf)', elem_classes='slim-dropdown') - ui.create_refresh_button(shared.gradio['grammar_file'], lambda: None, lambda: {'choices': utils.get_available_grammars()}, 'refresh-button', interactive=not mu) - shared.gradio['save_grammar'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu) - shared.gradio['delete_grammar'] = gr.Button('🗑️ ', elem_classes='refresh-button', interactive=not mu) - - shared.gradio['grammar_string'] = gr.Textbox(value='', label='Grammar', lines=16, elem_classes=['add_scrollbar', 'monospace']) - ui_chat.create_chat_settings_ui() From 723f912c167652243e364a1e18ca5c459c21d7d9 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 28 Mar 2024 12:57:35 -0700 Subject: [PATCH 08/12] Fix the "typing dots" position in latest Gradio version --- css/main.css | 1 + 1 file changed, 1 insertion(+) diff --git a/css/main.css b/css/main.css index 9681a5e3..5a79489c 100644 --- a/css/main.css +++ b/css/main.css @@ -527,6 +527,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { position: absolute; background-color: transparent; left: -2px; + top: 4px; padding: var(--block-padding); } From e0e28ecb0bfe9ad7c9d55012f0fec75a03300226 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 28 Mar 2024 15:10:54 -0700 Subject: [PATCH 09/12] Set the gradio 4 allowed_paths --- server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server.py b/server.py index c6a01830..4b5185be 100644 --- a/server.py +++ b/server.py @@ -169,7 +169,7 @@ def create_interface(): ssl_verify=False if (shared.args.ssl_keyfile or shared.args.ssl_certfile) else True, ssl_keyfile=shared.args.ssl_keyfile, ssl_certfile=shared.args.ssl_certfile, - allowed_paths=["."] + allowed_paths=["cache", "css", "extensions", "js"] ) From 3ce0d9221b1a0549135cbf3eb81a7bc5b1d64408 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 28 Mar 2024 19:40:31 -0700 Subject: [PATCH 10/12] Bump transformers to 4.39 --- requirements.txt | 2 +- requirements_amd.txt | 2 +- requirements_amd_noavx2.txt | 2 +- requirements_apple_intel.txt | 2 +- requirements_apple_silicon.txt | 2 +- requirements_cpu_only.txt | 2 +- requirements_cpu_only_noavx2.txt | 2 +- requirements_noavx2.txt | 2 +- requirements_nowheels.txt | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/requirements.txt b/requirements.txt index da47bdfc..510a6b17 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,7 +22,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.38.* +transformers==4.39.* tqdm wandb diff --git a/requirements_amd.txt b/requirements_amd.txt index 19689c97..39b5119b 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -20,7 +20,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.38.* +transformers==4.39.* tqdm wandb diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 07aa8526..7aef1cdd 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -20,7 +20,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.38.* +transformers==4.39.* tqdm wandb diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 729bb1d6..8c99dbe6 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -20,7 +20,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.38.* +transformers==4.39.* tqdm wandb diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 3e96ed99..cc46f1a5 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -20,7 +20,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.38.* +transformers==4.39.* tqdm wandb diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index e0330536..a58e3d18 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -20,7 +20,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.38.* +transformers==4.39.* tqdm wandb diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index 41157d1b..b615f644 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -20,7 +20,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.38.* +transformers==4.39.* tqdm wandb diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index d8da3136..652cc638 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -22,7 +22,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.38.* +transformers==4.39.* tqdm wandb diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index 72507d1a..63b2956f 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -20,7 +20,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.38.* +transformers==4.39.* tqdm wandb From 9653a9176c779d3946ba459473dfd9e700246b82 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 29 Mar 2024 10:41:24 -0700 Subject: [PATCH 11/12] Minor improvements to Parameters tab --- modules/ui_parameters.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py index c020b511..7090d01d 100644 --- a/modules/ui_parameters.py +++ b/modules/ui_parameters.py @@ -33,8 +33,8 @@ def create_ui(default_preset): shared.gradio['top_k'] = gr.Slider(0, 200, value=generate_params['top_k'], step=1, label='top_k') shared.gradio['typical_p'] = gr.Slider(0.0, 1.0, value=generate_params['typical_p'], step=0.01, label='typical_p') shared.gradio['repetition_penalty'] = gr.Slider(1.0, 1.5, value=generate_params['repetition_penalty'], step=0.01, label='repetition_penalty') - shared.gradio['presence_penalty'] = gr.Slider(0, 2, value=generate_params['presence_penalty'], step=0.05, label='presence_penalty') shared.gradio['frequency_penalty'] = gr.Slider(0, 2, value=generate_params['frequency_penalty'], step=0.05, label='frequency_penalty') + shared.gradio['presence_penalty'] = gr.Slider(0, 2, value=generate_params['presence_penalty'], step=0.05, label='presence_penalty') shared.gradio['repetition_penalty_range'] = gr.Slider(0, 4096, step=64, value=generate_params['repetition_penalty_range'], label='repetition_penalty_range') shared.gradio['do_sample'] = gr.Checkbox(value=generate_params['do_sample'], label='do_sample') gr.Markdown("[Learn more](https://github.com/oobabooga/text-generation-webui/wiki/03-%E2%80%90-Parameters-Tab)") @@ -44,8 +44,8 @@ def create_ui(default_preset): shared.gradio['auto_max_new_tokens'] = gr.Checkbox(value=shared.settings['auto_max_new_tokens'], label='auto_max_new_tokens', info='Expand max_new_tokens to the available context length.') shared.gradio['ban_eos_token'] = gr.Checkbox(value=shared.settings['ban_eos_token'], label='Ban the eos_token', info='Forces the model to never end the generation prematurely.') shared.gradio['add_bos_token'] = gr.Checkbox(value=shared.settings['add_bos_token'], label='Add the bos_token to the beginning of prompts', info='Disabling this can make the replies more creative.') - shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=2, value=shared.settings["custom_stopping_strings"] or None, label='Custom stopping strings', info='In addition to the defaults. Written between "" and separated by commas.', placeholder='"\\n", "\\nYou:"') - shared.gradio['custom_token_bans'] = gr.Textbox(value=shared.settings['custom_token_bans'] or None, label='Token bans', info='Specific token IDs to ban from generating, comma-separated. The IDs can be found in the Default or Notebook tab.') + shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=2, value=shared.settings["custom_stopping_strings"] or None, label='Custom stopping strings', info='Written between "" and separated by commas.', placeholder='"\\n", "\\nYou:"') + shared.gradio['custom_token_bans'] = gr.Textbox(value=shared.settings['custom_token_bans'] or None, label='Token bans', info='Token IDs to ban, separated by commas. The IDs can be found in the Default or Notebook tab.') shared.gradio['penalty_alpha'] = gr.Slider(0, 5, value=generate_params['penalty_alpha'], label='penalty_alpha', info='For Contrastive Search. do_sample must be unchecked.') shared.gradio['guidance_scale'] = gr.Slider(-0.5, 2.5, step=0.05, value=generate_params['guidance_scale'], label='guidance_scale', info='For CFG. 1.5 is a good value.') From c37f792afacd1e10938e397d8e04d87eafa43874 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 29 Mar 2024 10:54:01 -0700 Subject: [PATCH 12/12] Better way to handle user_bio default in the API (alternative to bdcf31035f06d666f01339c9af2447d6c2b83bcc) --- extensions/openai/completions.py | 2 ++ extensions/openai/typing.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/extensions/openai/completions.py b/extensions/openai/completions.py index 3bc5170a..5925101a 100644 --- a/extensions/openai/completions.py +++ b/extensions/openai/completions.py @@ -261,6 +261,7 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False) - name2 = body['bot_name'] or name2 context = body['context'] or context greeting = body['greeting'] or greeting + user_bio = body['user_bio'] or '' # History user_input, custom_system_message, history = convert_history(messages) @@ -271,6 +272,7 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False) - 'name2': name2, 'context': context, 'greeting': greeting, + 'user_bio': user_bio, 'instruction_template_str': instruction_template_str, 'custom_system_message': custom_system_message, 'chat_template_str': chat_template_str, diff --git a/extensions/openai/typing.py b/extensions/openai/typing.py index dbfa7167..c3ef0404 100644 --- a/extensions/openai/typing.py +++ b/extensions/openai/typing.py @@ -103,7 +103,7 @@ class ChatCompletionRequestParams(BaseModel): context: str | None = Field(default=None, description="Overwrites the value set by character field.") greeting: str | None = Field(default=None, description="Overwrites the value set by character field.") user_name: str | None = Field(default=None, description="Your name (the user). By default, it's \"You\".", alias="name1") - user_bio: str | None = Field(default='', description="The user description/personality.") + user_bio: str | None = Field(default=None, description="The user description/personality.") chat_template_str: str | None = Field(default=None, description="Jinja2 template for chat.") chat_instruct_command: str | None = None