From 8513028968f810bb5326fb89cb903228b69d5816 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 12 Dec 2023 13:00:38 -0800 Subject: [PATCH] Fix lag in the chat tab during streaming --- modules/chat.py | 10 +++++----- modules/text_generation.py | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/modules/chat.py b/modules/chat.py index 4f0434ba..8756adb3 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -205,7 +205,7 @@ def get_stopping_strings(state): return list(set(stopping_strings)) -def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_message=True): +def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_message=True, for_ui=False): history = state['history'] output = copy.deepcopy(history) output = apply_extensions('history', output) @@ -256,7 +256,7 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess # Generate reply = None - for j, reply in enumerate(generate_reply(prompt, state, stopping_strings=stopping_strings, is_chat=True)): + for j, reply in enumerate(generate_reply(prompt, state, stopping_strings=stopping_strings, is_chat=True, for_ui=for_ui)): # Extract the reply visible_reply = reply @@ -311,7 +311,7 @@ def impersonate_wrapper(text, state): return -def generate_chat_reply(text, state, regenerate=False, _continue=False, loading_message=True): +def generate_chat_reply(text, state, regenerate=False, _continue=False, loading_message=True, for_ui=False): history = state['history'] if regenerate or _continue: text = '' @@ -319,7 +319,7 @@ def generate_chat_reply(text, state, regenerate=False, _continue=False, loading_ yield history return - for history in chatbot_wrapper(text, state, regenerate=regenerate, _continue=_continue, loading_message=loading_message): + for history in chatbot_wrapper(text, state, regenerate=regenerate, _continue=_continue, loading_message=loading_message, for_ui=for_ui): yield history @@ -351,7 +351,7 @@ def generate_chat_reply_wrapper(text, state, regenerate=False, _continue=False): send_dummy_message(text, state) send_dummy_reply(state['start_with'], state) - for i, history in enumerate(generate_chat_reply(text, state, regenerate, _continue, loading_message=True)): + for i, history in enumerate(generate_chat_reply(text, state, regenerate, _continue, loading_message=True, for_ui=True)): yield chat_html_wrapper(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']), history diff --git a/modules/text_generation.py b/modules/text_generation.py index 3815fe70..f576ba83 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -33,7 +33,7 @@ def generate_reply(*args, **kwargs): shared.generation_lock.release() -def _generate_reply(question, state, stopping_strings=None, is_chat=False, escape_html=False): +def _generate_reply(question, state, stopping_strings=None, is_chat=False, escape_html=False, for_ui=False): # Find the appropriate generation function generate_func = apply_extensions('custom_generate_reply') @@ -96,7 +96,7 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap # Limit updates to 24 or 5 per second to avoid lag in the Gradio UI # API updates are not limited else: - min_update_interval = 0 if not escape_html else 0.2 if (shared.args.listen or shared.args.share) else 0.0417 + min_update_interval = 0 if not for_ui else 0.2 if (shared.args.listen or shared.args.share) else 0.0417 if cur_time - last_update > min_update_interval: last_update = cur_time yield reply @@ -178,7 +178,7 @@ def generate_reply_wrapper(question, state, stopping_strings=None): reply = question if not shared.is_seq2seq else '' yield formatted_outputs(reply, shared.model_name) - for reply in generate_reply(question, state, stopping_strings, is_chat=False, escape_html=True): + for reply in generate_reply(question, state, stopping_strings, is_chat=False, escape_html=True, for_ui=True): if not shared.is_seq2seq: reply = question + reply