From ad6b699503eeabcad141efb6172ff43dc1976522 Mon Sep 17 00:00:00 2001
From: Xan <70198941+xanthousm@users.noreply.github.com>
Date: Wed, 8 Mar 2023 22:02:17 +1100
Subject: [PATCH 01/33] Better TTS with autoplay

- Adds "still_streaming" to shared module for extensions to know if generation is complete
- Changed TTS extension with new options:
   - Show text under the audio widget
   - Automatically play the audio once text generation finishes
   - manage the generated wav files (only keep files for finished generations, optional max file limit)
   - [wip] ability to change voice pitch and speed
- added 'tensorboard' to requirements, since python sent "tensorboard not found" errors after a fresh installation.
---
 extensions/silero_tts/requirements.txt |  1 +
 extensions/silero_tts/script.py        | 60 +++++++++++++++++++++++---
 modules/shared.py                      |  1 +
 modules/text_generation.py             | 11 ++++-
 requirements.txt                       |  1 +
 5 files changed, 67 insertions(+), 7 deletions(-)

diff --git a/extensions/silero_tts/requirements.txt b/extensions/silero_tts/requirements.txt
index f2f0bff5..b4444306 100644
--- a/extensions/silero_tts/requirements.txt
+++ b/extensions/silero_tts/requirements.txt
@@ -4,3 +4,4 @@ pydub
 PyYAML
 torch
 torchaudio
+simpleaudio
diff --git a/extensions/silero_tts/script.py b/extensions/silero_tts/script.py
index f697d0e2..03319dbf 100644
--- a/extensions/silero_tts/script.py
+++ b/extensions/silero_tts/script.py
@@ -4,20 +4,36 @@ from pathlib import Path
 import gradio as gr
 import torch
 
+import modules.shared as shared
+import simpleaudio as sa
+
 torch._C._jit_set_profiling_mode(False)
 
 params = {
     'activate': True,
-    'speaker': 'en_56',
+    'speaker': 'en_5',
     'language': 'en',
     'model_id': 'v3_en',
     'sample_rate': 48000,
     'device': 'cpu',
+    'max_wavs': 20,
+    'play_audio': True,
+    'show_text': True,
 }
 current_params = params.copy()
 voices_by_gender = ['en_99', 'en_45', 'en_18', 'en_117', 'en_49', 'en_51', 'en_68', 'en_0', 'en_26', 'en_56', 'en_74', 'en_5', 'en_38', 'en_53', 'en_21', 'en_37', 'en_107', 'en_10', 'en_82', 'en_16', 'en_41', 'en_12', 'en_67', 'en_61', 'en_14', 'en_11', 'en_39', 'en_52', 'en_24', 'en_97', 'en_28', 'en_72', 'en_94', 'en_36', 'en_4', 'en_43', 'en_88', 'en_25', 'en_65', 'en_6', 'en_44', 'en_75', 'en_91', 'en_60', 'en_109', 'en_85', 'en_101', 'en_108', 'en_50', 'en_96', 'en_64', 'en_92', 'en_76', 'en_33', 'en_116', 'en_48', 'en_98', 'en_86', 'en_62', 'en_54', 'en_95', 'en_55', 'en_111', 'en_3', 'en_83', 'en_8', 'en_47', 'en_59', 'en_1', 'en_2', 'en_7', 'en_9', 'en_13', 'en_15', 'en_17', 'en_19', 'en_20', 'en_22', 'en_23', 'en_27', 'en_29', 'en_30', 'en_31', 'en_32', 'en_34', 'en_35', 'en_40', 'en_42', 'en_46', 'en_57', 'en_58', 'en_63', 'en_66', 'en_69', 'en_70', 'en_71', 'en_73', 'en_77', 'en_78', 'en_79', 'en_80', 'en_81', 'en_84', 'en_87', 'en_89', 'en_90', 'en_93', 'en_100', 'en_102', 'en_103', 'en_104', 'en_105', 'en_106', 'en_110', 'en_112', 'en_113', 'en_114', 'en_115']
 wav_idx = 0
 
+table = str.maketrans({
+    "<": "&lt;",
+    ">": "&gt;",
+    "&": "&amp;",
+    "'": "&apos;",
+    '"': "&quot;",
+})
+def xmlesc(txt):
+    return txt.translate(table)
+
 def load_model():
     model, example_text = torch.hub.load(repo_or_dir='snakers4/silero-models', model='silero_tts', language=params['language'], speaker=params['model_id'])
     model.to(params['device'])
@@ -58,20 +74,45 @@ def output_modifier(string):
     if params['activate'] == False:
         return string
 
+    orig_string = string
     string = remove_surrounded_chars(string)
     string = string.replace('"', '')
     string = string.replace('“', '')
     string = string.replace('\n', ' ')
     string = string.strip()
 
+    auto_playable=True
     if string == '':
-        string = 'empty reply, try regenerating'
+            string = 'empty reply, try regenerating'
+            auto_playable=False
+            
 
+    #x-slow, slow, medium, fast, x-fast
+    #x-low, low, medium, high, x-high
+    #prosody='<prosody rate="fast" pitch="medium">'
+    prosody='<prosody rate="fast">'
+    string ='<speak>'+prosody+xmlesc(string)+'</prosody></speak>'
+        
     output_file = Path(f'extensions/silero_tts/outputs/{wav_idx:06d}.wav')
-    audio = model.save_wav(text=string, speaker=params['speaker'], sample_rate=int(params['sample_rate']), audio_path=str(output_file))
-
+    audio = model.save_wav(ssml_text=string, speaker=params['speaker'], sample_rate=int(params['sample_rate']), audio_path=str(output_file))
     string = f'<audio src="file/{output_file.as_posix()}" controls></audio>'
-    wav_idx += 1
+    
+    #reset if too many wavs. set max to -1 for unlimited.
+    if wav_idx < params['max_wavs'] and params['max_wavs'] > 0:
+        #only increment if starting a new stream, else replace during streaming. Does not update duration on webui sometimes?
+        if not shared.still_streaming:
+            wav_idx += 1
+    else:
+        wav_idx = 0
+        
+    if params['show_text']:
+        string+='\n\n'+orig_string
+    
+    #if params['play_audio'] == True and auto_playable and shared.stop_everything:
+    if params['play_audio'] == True and auto_playable and not shared.still_streaming:
+        stop_autoplay()
+        wave_obj = sa.WaveObject.from_wave_file(output_file.as_posix())
+        wave_obj.play()
 
     return string
 
@@ -84,11 +125,20 @@ def bot_prefix_modifier(string):
 
     return string
 
+def stop_autoplay():
+    sa.stop_all()
+
 def ui():
     # Gradio elements
     activate = gr.Checkbox(value=params['activate'], label='Activate TTS')
+    show_text = gr.Checkbox(value=params['show_text'], label='Show message text under audio player')
+    play_audio = gr.Checkbox(value=params['play_audio'], label='Play TTS automatically')
+    stop_audio = gr.Button("Stop Auto-Play")
     voice = gr.Dropdown(value=params['speaker'], choices=voices_by_gender, label='TTS voice')
 
     # Event functions to update the parameters in the backend
     activate.change(lambda x: params.update({"activate": x}), activate, None)
+    play_audio.change(lambda x: params.update({"play_audio": x}), play_audio, None)
+    show_text.change(lambda x: params.update({"show_text": x}), show_text, None)
+    stop_audio.click(stop_autoplay)
     voice.change(lambda x: params.update({"speaker": x}), voice, None)
diff --git a/modules/shared.py b/modules/shared.py
index e9dfdaa2..90adb320 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -12,6 +12,7 @@ is_LLaMA = False
 history = {'internal': [], 'visible': []}
 character = 'None'
 stop_everything = False
+still_streaming = False
 
 # UI elements (buttons, sliders, HTML, etc)
 gradio = {}
diff --git a/modules/text_generation.py b/modules/text_generation.py
index f9082a31..c9f4fc6a 100644
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@@ -182,6 +182,7 @@ def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typi
     # Generate the reply 8 tokens at a time
     else:
         yield formatted_outputs(original_question, shared.model_name)
+        shared.still_streaming = True
         for i in tqdm(range(max_new_tokens//8+1)):
             with torch.no_grad():
                 output = eval(f"shared.model.generate({', '.join(generate_params)}){cuda}")[0]
@@ -191,8 +192,7 @@ def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typi
             reply = decode(output)
             if not (shared.args.chat or shared.args.cai_chat):
                 reply = original_question + apply_extensions(reply[len(question):], "output")
-            yield formatted_outputs(reply, shared.model_name)
-
+            
             if not shared.args.flexgen:
                 if output[-1] == n:
                     break
@@ -201,6 +201,13 @@ def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typi
                 if np.count_nonzero(input_ids[0] == n) < np.count_nonzero(output == n):
                     break
                 input_ids = np.reshape(output, (1, output.shape[0]))
+                
+            #Mid-stream yield, ran if no breaks
+            yield formatted_outputs(reply, shared.model_name)
 
             if shared.soft_prompt:
                 inputs_embeds, filler_input_ids = generate_softprompt_input_tensors(input_ids)
+                
+        #Stream finished from max tokens or break. Do final yield.
+        shared.still_streaming = False
+        yield formatted_outputs(reply, shared.model_name)
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 55aeb8fd..48ca1e4e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,3 +6,4 @@ numpy
 rwkv==0.0.6
 safetensors==0.2.8
 git+https://github.com/huggingface/transformers
+tensorboard

From 738be6dd59a6f9c2ee215093675f2d55111d89ca Mon Sep 17 00:00:00 2001
From: Xan <70198941+xanthousm@users.noreply.github.com>
Date: Wed, 8 Mar 2023 22:25:55 +1100
Subject: [PATCH 02/33] Fix merge errors and unlimited wav bug

---
 extensions/silero_tts/script.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/extensions/silero_tts/script.py b/extensions/silero_tts/script.py
index 53bd554c..eaf56159 100644
--- a/extensions/silero_tts/script.py
+++ b/extensions/silero_tts/script.py
@@ -93,11 +93,11 @@ def output_modifier(string):
     string ='<speak>'+prosody+xmlesc(string)+'</prosody></speak>'
         
     output_file = Path(f'extensions/silero_tts/outputs/{wav_idx:06d}.wav')
-    model.save_wav(text=string, speaker=params['speaker'], sample_rate=int(params['sample_rate']), audio_path=str(output_file))
+    model.save_wav(ssml_text=string, speaker=params['speaker'], sample_rate=int(params['sample_rate']), audio_path=str(output_file))
     string = f'<audio src="file/{output_file.as_posix()}" controls></audio>'
     
     #reset if too many wavs. set max to -1 for unlimited.
-    if wav_idx < params['max_wavs'] and params['max_wavs'] > 0:
+    if wav_idx < params['max_wavs'] or params['max_wavs'] < 0:
         #only increment if starting a new stream, else replace during streaming. Does not update duration on webui sometimes?
         if not shared.still_streaming:
             wav_idx += 1

From a2b5383398adc6da5c46811179bfadaefa5e23f7 Mon Sep 17 00:00:00 2001
From: Xan <70198941+xanthousm@users.noreply.github.com>
Date: Thu, 9 Mar 2023 10:48:44 +1100
Subject: [PATCH 03/33] Merge in audio generation only on text stream finish.,
 postpone audioblock autoplay

- Keeping simpleaudio until audio block "autoplay" doesn't play previous messages
- Only generate audio for finished messages
- Better name for autoplay, clean up comments
- set default to unlimited wav files. Still a few bugs when wav id resets

Co-Authored-By: Christoph Hess <9931495+ChristophHess@users.noreply.github.com>
---
 extensions/silero_tts/script.py | 34 +++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/extensions/silero_tts/script.py b/extensions/silero_tts/script.py
index eaf56159..334b02b9 100644
--- a/extensions/silero_tts/script.py
+++ b/extensions/silero_tts/script.py
@@ -15,14 +15,15 @@ params = {
     'model_id': 'v3_en',
     'sample_rate': 48000,
     'device': 'cpu',
-    'max_wavs': 20,
-    'play_audio': True,
+    'max_wavs': -1,
+    'autoplay': True,
     'show_text': True,
 }
 current_params = params.copy()
 voices_by_gender = ['en_99', 'en_45', 'en_18', 'en_117', 'en_49', 'en_51', 'en_68', 'en_0', 'en_26', 'en_56', 'en_74', 'en_5', 'en_38', 'en_53', 'en_21', 'en_37', 'en_107', 'en_10', 'en_82', 'en_16', 'en_41', 'en_12', 'en_67', 'en_61', 'en_14', 'en_11', 'en_39', 'en_52', 'en_24', 'en_97', 'en_28', 'en_72', 'en_94', 'en_36', 'en_4', 'en_43', 'en_88', 'en_25', 'en_65', 'en_6', 'en_44', 'en_75', 'en_91', 'en_60', 'en_109', 'en_85', 'en_101', 'en_108', 'en_50', 'en_96', 'en_64', 'en_92', 'en_76', 'en_33', 'en_116', 'en_48', 'en_98', 'en_86', 'en_62', 'en_54', 'en_95', 'en_55', 'en_111', 'en_3', 'en_83', 'en_8', 'en_47', 'en_59', 'en_1', 'en_2', 'en_7', 'en_9', 'en_13', 'en_15', 'en_17', 'en_19', 'en_20', 'en_22', 'en_23', 'en_27', 'en_29', 'en_30', 'en_31', 'en_32', 'en_34', 'en_35', 'en_40', 'en_42', 'en_46', 'en_57', 'en_58', 'en_63', 'en_66', 'en_69', 'en_70', 'en_71', 'en_73', 'en_77', 'en_78', 'en_79', 'en_80', 'en_81', 'en_84', 'en_87', 'en_89', 'en_90', 'en_93', 'en_100', 'en_102', 'en_103', 'en_104', 'en_105', 'en_106', 'en_110', 'en_112', 'en_113', 'en_114', 'en_115']
 wav_idx = 0
 
+#Used for making text xml compatible, needed for voice pitch and speed control
 table = str.maketrans({
     "<": "&lt;",
     ">": "&gt;",
@@ -88,27 +89,32 @@ def output_modifier(string):
 
     #x-slow, slow, medium, fast, x-fast
     #x-low, low, medium, high, x-high
-    #prosody='<prosody rate="fast" pitch="medium">'
-    prosody='<prosody rate="fast">'
+    prosody='<prosody rate="medium" pitch="medium">'
     string ='<speak>'+prosody+xmlesc(string)+'</prosody></speak>'
         
     output_file = Path(f'extensions/silero_tts/outputs/{wav_idx:06d}.wav')
-    model.save_wav(ssml_text=string, speaker=params['speaker'], sample_rate=int(params['sample_rate']), audio_path=str(output_file))
-    string = f'<audio src="file/{output_file.as_posix()}" controls></audio>'
+    autoplay_str = ''
+    if not shared.still_streaming:
+        model.save_wav(ssml_text=string, speaker=params['speaker'], sample_rate=int(params['sample_rate']), audio_path=str(output_file))
+        #diabled until autoplay doesn't run on previous messages
+        #autoplay = 'autoplay' if (params['autoplay'] and auto_playable) else ''
+        string = f'<audio src="file/{output_file.as_posix()}" controls {autoplay_str}></audio>\n\n'
+    else:
+        #placeholder so text doesnt shift around so much
+        string =f'<audio controls {autoplay_str}></audio>\n\n'
     
     #reset if too many wavs. set max to -1 for unlimited.
     if wav_idx < params['max_wavs'] or params['max_wavs'] < 0:
-        #only increment if starting a new stream, else replace during streaming. Does not update duration on webui sometimes?
+        #only increment if starting a new stream, else replace during streaming.
         if not shared.still_streaming:
             wav_idx += 1
     else:
         wav_idx = 0
-        
+
     if params['show_text']:
-        string+='\n\n'+orig_string
-    
-    #if params['play_audio'] == True and auto_playable and shared.stop_everything:
-    if params['play_audio'] == True and auto_playable and not shared.still_streaming:
+        string+=orig_string
+
+    if params['autoplay'] == True and auto_playable and not shared.still_streaming:
         stop_autoplay()
         wave_obj = sa.WaveObject.from_wave_file(output_file.as_posix())
         wave_obj.play()
@@ -131,13 +137,13 @@ def ui():
     # Gradio elements
     activate = gr.Checkbox(value=params['activate'], label='Activate TTS')
     show_text = gr.Checkbox(value=params['show_text'], label='Show message text under audio player')
-    play_audio = gr.Checkbox(value=params['play_audio'], label='Play TTS automatically')
+    autoplay = gr.Checkbox(value=params['autoplay'], label='Play TTS automatically')
     stop_audio = gr.Button("Stop Auto-Play")
     voice = gr.Dropdown(value=params['speaker'], choices=voices_by_gender, label='TTS voice')
 
     # Event functions to update the parameters in the backend
     activate.change(lambda x: params.update({"activate": x}), activate, None)
-    play_audio.change(lambda x: params.update({"play_audio": x}), play_audio, None)
+    autoplay.change(lambda x: params.update({"autoplay": x}), autoplay, None)
     show_text.change(lambda x: params.update({"show_text": x}), show_text, None)
     stop_audio.click(stop_autoplay)
     voice.change(lambda x: params.update({"speaker": x}), voice, None)

From 0dfac4b777009d415d848c2f0bc718ec1bbac7e5 Mon Sep 17 00:00:00 2001
From: Xan <70198941+xanthousm@users.noreply.github.com>
Date: Sat, 11 Mar 2023 16:34:59 +1100
Subject: [PATCH 04/33] Working html autoplay, clean up, improve wav naming

- New autoplay using html tag, removed from old message when new input provided
- Add voice pitch and speed control
- Group settings together
- Use name + conversation history to match wavs to messages, minimize problems when changing characters

Current minor bugs:
- Gradio seems to cache the audio files, so using "clear history" and generating new messages will play the old audio (the new messages are saving correctly). Gradio will clear cache and use correct audio after a few messages or after a page refresh.
- Switching characters does not immediately update the message ID used for the audio. ID is updated after the first new message, but that message will use the wrong ID
---
 extensions/silero_tts/requirements.txt |  1 -
 extensions/silero_tts/script.py        | 79 +++++++++++++-------------
 2 files changed, 38 insertions(+), 42 deletions(-)

diff --git a/extensions/silero_tts/requirements.txt b/extensions/silero_tts/requirements.txt
index b4444306..f2f0bff5 100644
--- a/extensions/silero_tts/requirements.txt
+++ b/extensions/silero_tts/requirements.txt
@@ -4,4 +4,3 @@ pydub
 PyYAML
 torch
 torchaudio
-simpleaudio
diff --git a/extensions/silero_tts/script.py b/extensions/silero_tts/script.py
index 334b02b9..b66963e2 100644
--- a/extensions/silero_tts/script.py
+++ b/extensions/silero_tts/script.py
@@ -4,7 +4,6 @@ import gradio as gr
 import torch
 
 import modules.shared as shared
-import simpleaudio as sa
 
 torch._C._jit_set_profiling_mode(False)
 
@@ -15,13 +14,16 @@ params = {
     'model_id': 'v3_en',
     'sample_rate': 48000,
     'device': 'cpu',
-    'max_wavs': -1,
-    'autoplay': True,
     'show_text': True,
+    'autoplay': True,
+    'voice_pitch': 'medium',
+    'voice_speed': 'medium',
 }
 current_params = params.copy()
 voices_by_gender = ['en_99', 'en_45', 'en_18', 'en_117', 'en_49', 'en_51', 'en_68', 'en_0', 'en_26', 'en_56', 'en_74', 'en_5', 'en_38', 'en_53', 'en_21', 'en_37', 'en_107', 'en_10', 'en_82', 'en_16', 'en_41', 'en_12', 'en_67', 'en_61', 'en_14', 'en_11', 'en_39', 'en_52', 'en_24', 'en_97', 'en_28', 'en_72', 'en_94', 'en_36', 'en_4', 'en_43', 'en_88', 'en_25', 'en_65', 'en_6', 'en_44', 'en_75', 'en_91', 'en_60', 'en_109', 'en_85', 'en_101', 'en_108', 'en_50', 'en_96', 'en_64', 'en_92', 'en_76', 'en_33', 'en_116', 'en_48', 'en_98', 'en_86', 'en_62', 'en_54', 'en_95', 'en_55', 'en_111', 'en_3', 'en_83', 'en_8', 'en_47', 'en_59', 'en_1', 'en_2', 'en_7', 'en_9', 'en_13', 'en_15', 'en_17', 'en_19', 'en_20', 'en_22', 'en_23', 'en_27', 'en_29', 'en_30', 'en_31', 'en_32', 'en_34', 'en_35', 'en_40', 'en_42', 'en_46', 'en_57', 'en_58', 'en_63', 'en_66', 'en_69', 'en_70', 'en_71', 'en_73', 'en_77', 'en_78', 'en_79', 'en_80', 'en_81', 'en_84', 'en_87', 'en_89', 'en_90', 'en_93', 'en_100', 'en_102', 'en_103', 'en_104', 'en_105', 'en_106', 'en_110', 'en_112', 'en_113', 'en_114', 'en_115']
-wav_idx = 0
+voice_pitches = ['x-low', 'low', 'medium', 'high', 'x-high']
+voice_speeds = ['x-slow', 'slow', 'medium', 'fast', 'x-fast']
+last_msg_id = 0
 
 #Used for making text xml compatible, needed for voice pitch and speed control
 table = str.maketrans({
@@ -55,6 +57,14 @@ def input_modifier(string):
     This function is applied to your text inputs before
     they are fed into the model.
     """
+    #remove autoplay from previous
+    if len(shared.history['internal'])>0:
+        [text, reply] = shared.history['internal'][-1]
+        [visible_text, visible_reply] = shared.history['visible'][-1]
+        rep_clean = reply.replace('controls autoplay>','controls>')
+        vis_rep_clean = visible_reply.replace('controls autoplay>','controls>')
+        shared.history['internal'][-1] = [text, rep_clean]
+        shared.history['visible'][-1] = [visible_text, vis_rep_clean]
 
     return string
 
@@ -63,7 +73,7 @@ def output_modifier(string):
     This function is applied to the model outputs.
     """
 
-    global wav_idx, model, current_params
+    global model, current_params
 
     for i in params:
         if params[i] != current_params[i]:
@@ -81,44 +91,31 @@ def output_modifier(string):
     string = string.replace('\n', ' ')
     string = string.strip()
 
-    auto_playable=True
+    silent_string = False #Used to prevent unnecessary audio file generation
     if string == '':
             string = 'empty reply, try regenerating'
-            auto_playable=False
-            
+            silent_string = True
 
     #x-slow, slow, medium, fast, x-fast
     #x-low, low, medium, high, x-high
-    prosody='<prosody rate="medium" pitch="medium">'
+    pitch = params['voice_pitch']
+    speed = params['voice_speed']
+    prosody=f'<prosody rate="{speed}" pitch="{pitch}">'
     string ='<speak>'+prosody+xmlesc(string)+'</prosody></speak>'
-        
-    output_file = Path(f'extensions/silero_tts/outputs/{wav_idx:06d}.wav')
-    autoplay_str = ''
-    if not shared.still_streaming:
+
+    current_msg_id=len(shared.history['visible'])#check length here, since output_modifier can run many times on the same message
+    output_file = Path(f'extensions/silero_tts/outputs/{shared.character}_{current_msg_id:06d}.wav')
+    if not shared.still_streaming and not silent_string:
         model.save_wav(ssml_text=string, speaker=params['speaker'], sample_rate=int(params['sample_rate']), audio_path=str(output_file))
-        #diabled until autoplay doesn't run on previous messages
-        #autoplay = 'autoplay' if (params['autoplay'] and auto_playable) else ''
-        string = f'<audio src="file/{output_file.as_posix()}" controls {autoplay_str}></audio>\n\n'
+        string = f'<audio id="audio_{current_msg_id:06d}" src="file/{output_file.as_posix()}" controls autoplay></audio>\n\n'
     else:
-        #placeholder so text doesnt shift around so much
-        string =f'<audio controls {autoplay_str}></audio>\n\n'
-    
-    #reset if too many wavs. set max to -1 for unlimited.
-    if wav_idx < params['max_wavs'] or params['max_wavs'] < 0:
-        #only increment if starting a new stream, else replace during streaming.
-        if not shared.still_streaming:
-            wav_idx += 1
-    else:
-        wav_idx = 0
+        #placeholder so text doesn't shift around so much
+        string ='<audio controls></audio>\n\n'
 
     if params['show_text']:
+        #string+=f'*[{current_msg_id}]:*'+orig_string #Debug, looks like there is a delay in "current_msg_id" being updated when switching characters (updates after new message sent). Can't find the source. "shared.character" is updating properly.
         string+=orig_string
 
-    if params['autoplay'] == True and auto_playable and not shared.still_streaming:
-        stop_autoplay()
-        wave_obj = sa.WaveObject.from_wave_file(output_file.as_posix())
-        wave_obj.play()
-
     return string
 
 def bot_prefix_modifier(string):
@@ -130,20 +127,20 @@ def bot_prefix_modifier(string):
 
     return string
 
-def stop_autoplay():
-    sa.stop_all()
-
 def ui():
     # Gradio elements
-    activate = gr.Checkbox(value=params['activate'], label='Activate TTS')
-    show_text = gr.Checkbox(value=params['show_text'], label='Show message text under audio player')
-    autoplay = gr.Checkbox(value=params['autoplay'], label='Play TTS automatically')
-    stop_audio = gr.Button("Stop Auto-Play")
-    voice = gr.Dropdown(value=params['speaker'], choices=voices_by_gender, label='TTS voice')
+    with gr.Accordion("Silero TTS"):
+        activate = gr.Checkbox(value=params['activate'], label='Activate TTS')
+        show_text = gr.Checkbox(value=params['show_text'], label='Show message text under audio player')
+        autoplay = gr.Checkbox(value=params['autoplay'], label='Play TTS automatically')
+        voice = gr.Dropdown(value=params['speaker'], choices=voices_by_gender, label='TTS voice')
+        v_pitch = gr.Dropdown(value=params['voice_pitch'], choices=voice_pitches, label='Voice pitch')
+        v_speed = gr.Dropdown(value=params['voice_speed'], choices=voice_speeds, label='Voice speed')
 
     # Event functions to update the parameters in the backend
     activate.change(lambda x: params.update({"activate": x}), activate, None)
-    autoplay.change(lambda x: params.update({"autoplay": x}), autoplay, None)
     show_text.change(lambda x: params.update({"show_text": x}), show_text, None)
-    stop_audio.click(stop_autoplay)
+    autoplay.change(lambda x: params.update({"autoplay": x}), autoplay, None)
     voice.change(lambda x: params.update({"speaker": x}), voice, None)
+    v_pitch.change(lambda x: params.update({"voice_pitch": x}), v_pitch, None)
+    v_speed.change(lambda x: params.update({"voice_speed": x}), v_speed, None)

From b8f7d34c1df5b12e60491e4c8a6494d5e6aec20e Mon Sep 17 00:00:00 2001
From: Xan <70198941+xanthousm@users.noreply.github.com>
Date: Sat, 11 Mar 2023 17:05:09 +1100
Subject: [PATCH 05/33] Undo changes to requirements

needing to manually install tensorboard might be a windows-only problem. Can be easily solved manually.
---
 requirements.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index a8a6eada..47c56a45 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,6 +5,5 @@ gradio==3.18.0
 numpy
 rwkv==0.1.0
 safetensors==0.2.8
-tensorboard
 sentencepiece
 git+https://github.com/oobabooga/transformers@llama_push

From 8f8da6707d7e71c2eef01c2d33ca6623cebf080c Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 11 Mar 2023 11:17:13 -0300
Subject: [PATCH 06/33] Minor style changes to silero_tts

---
 extensions/silero_tts/script.py | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/extensions/silero_tts/script.py b/extensions/silero_tts/script.py
index b66963e2..7e63d8b7 100644
--- a/extensions/silero_tts/script.py
+++ b/extensions/silero_tts/script.py
@@ -14,18 +14,19 @@ params = {
     'model_id': 'v3_en',
     'sample_rate': 48000,
     'device': 'cpu',
-    'show_text': True,
+    'show_text': False,
     'autoplay': True,
     'voice_pitch': 'medium',
     'voice_speed': 'medium',
 }
+
 current_params = params.copy()
 voices_by_gender = ['en_99', 'en_45', 'en_18', 'en_117', 'en_49', 'en_51', 'en_68', 'en_0', 'en_26', 'en_56', 'en_74', 'en_5', 'en_38', 'en_53', 'en_21', 'en_37', 'en_107', 'en_10', 'en_82', 'en_16', 'en_41', 'en_12', 'en_67', 'en_61', 'en_14', 'en_11', 'en_39', 'en_52', 'en_24', 'en_97', 'en_28', 'en_72', 'en_94', 'en_36', 'en_4', 'en_43', 'en_88', 'en_25', 'en_65', 'en_6', 'en_44', 'en_75', 'en_91', 'en_60', 'en_109', 'en_85', 'en_101', 'en_108', 'en_50', 'en_96', 'en_64', 'en_92', 'en_76', 'en_33', 'en_116', 'en_48', 'en_98', 'en_86', 'en_62', 'en_54', 'en_95', 'en_55', 'en_111', 'en_3', 'en_83', 'en_8', 'en_47', 'en_59', 'en_1', 'en_2', 'en_7', 'en_9', 'en_13', 'en_15', 'en_17', 'en_19', 'en_20', 'en_22', 'en_23', 'en_27', 'en_29', 'en_30', 'en_31', 'en_32', 'en_34', 'en_35', 'en_40', 'en_42', 'en_46', 'en_57', 'en_58', 'en_63', 'en_66', 'en_69', 'en_70', 'en_71', 'en_73', 'en_77', 'en_78', 'en_79', 'en_80', 'en_81', 'en_84', 'en_87', 'en_89', 'en_90', 'en_93', 'en_100', 'en_102', 'en_103', 'en_104', 'en_105', 'en_106', 'en_110', 'en_112', 'en_113', 'en_114', 'en_115']
 voice_pitches = ['x-low', 'low', 'medium', 'high', 'x-high']
 voice_speeds = ['x-slow', 'slow', 'medium', 'fast', 'x-fast']
 last_msg_id = 0
 
-#Used for making text xml compatible, needed for voice pitch and speed control
+# Used for making text xml compatible, needed for voice pitch and speed control
 table = str.maketrans({
     "<": "&lt;",
     ">": "&gt;",
@@ -33,6 +34,7 @@ table = str.maketrans({
     "'": "&apos;",
     '"': "&quot;",
 })
+
 def xmlesc(txt):
     return txt.translate(table)
 
@@ -57,7 +59,8 @@ def input_modifier(string):
     This function is applied to your text inputs before
     they are fed into the model.
     """
-    #remove autoplay from previous
+
+    # Remove autoplay from previous
     if len(shared.history['internal'])>0:
         [text, reply] = shared.history['internal'][-1]
         [visible_text, visible_reply] = shared.history['visible'][-1]
@@ -91,30 +94,30 @@ def output_modifier(string):
     string = string.replace('\n', ' ')
     string = string.strip()
 
-    silent_string = False #Used to prevent unnecessary audio file generation
+    silent_string = False # Used to prevent unnecessary audio file generation
     if string == '':
-            string = 'empty reply, try regenerating'
-            silent_string = True
+        string = 'empty reply, try regenerating'
+        silent_string = True
 
-    #x-slow, slow, medium, fast, x-fast
-    #x-low, low, medium, high, x-high
+    # x-slow, slow, medium, fast, x-fast
+    # x-low, low, medium, high, x-high
     pitch = params['voice_pitch']
     speed = params['voice_speed']
     prosody=f'<prosody rate="{speed}" pitch="{pitch}">'
-    string ='<speak>'+prosody+xmlesc(string)+'</prosody></speak>'
+    string = '<speak>'+prosody+xmlesc(string)+'</prosody></speak>'
 
-    current_msg_id=len(shared.history['visible'])#check length here, since output_modifier can run many times on the same message
+    current_msg_id = len(shared.history['visible']) # Check length here, since output_modifier can run many times on the same message
     output_file = Path(f'extensions/silero_tts/outputs/{shared.character}_{current_msg_id:06d}.wav')
     if not shared.still_streaming and not silent_string:
         model.save_wav(ssml_text=string, speaker=params['speaker'], sample_rate=int(params['sample_rate']), audio_path=str(output_file))
         string = f'<audio id="audio_{current_msg_id:06d}" src="file/{output_file.as_posix()}" controls autoplay></audio>\n\n'
     else:
-        #placeholder so text doesn't shift around so much
-        string ='<audio controls></audio>\n\n'
+        # Placeholder so text doesn't shift around so much
+        string = '<audio controls></audio>\n\n'
 
     if params['show_text']:
-        #string+=f'*[{current_msg_id}]:*'+orig_string #Debug, looks like there is a delay in "current_msg_id" being updated when switching characters (updates after new message sent). Can't find the source. "shared.character" is updating properly.
-        string+=orig_string
+        #string += f'*[{current_msg_id}]:*'+orig_string #Debug, looks like there is a delay in "current_msg_id" being updated when switching characters (updates after new message sent). Can't find the source. "shared.character" is updating properly.
+        string += orig_string
 
     return string
 

From d4afed4e44a748c22d9fa97edb3f818ae8af191f Mon Sep 17 00:00:00 2001
From: Xan <70198941+xanthousm@users.noreply.github.com>
Date: Sun, 12 Mar 2023 17:56:57 +1100
Subject: [PATCH 07/33] Fixes and polish

- Change wav naming to be completely unique using timestamp instead of message ID, stops browser using cached audio when new audio is made with the same file name (eg after regenerate or clear history).
- Make the autoplay setting actually disable autoplay.
- Make Settings panel a bit more compact.
- Hide html errors when audio file of chat history is missing.
- Add button to permanently convert TTS history to normal text messages
- Changed the "show message text" toggle to affect the chat history.
---
 extensions/silero_tts/script.py | 89 ++++++++++++++++++++++++++-------
 1 file changed, 72 insertions(+), 17 deletions(-)

diff --git a/extensions/silero_tts/script.py b/extensions/silero_tts/script.py
index 7e63d8b7..1a60c901 100644
--- a/extensions/silero_tts/script.py
+++ b/extensions/silero_tts/script.py
@@ -2,8 +2,10 @@ from pathlib import Path
 
 import gradio as gr
 import torch
-
+import time
+import re
 import modules.shared as shared
+import modules.chat as chat
 
 torch._C._jit_set_profiling_mode(False)
 
@@ -54,19 +56,57 @@ def remove_surrounded_chars(string):
             new_string += char
     return new_string
 
+def remove_tts_from_history():
+    suffix = '_pygmalion' if 'pygmalion' in shared.model_name.lower() else ''
+    for i, entry in enumerate(shared.history['internal']):
+        reply = entry[1]
+        reply = re.sub("(<USER>|<user>|{{user}})", shared.settings[f'name1{suffix}'], reply)
+        if shared.args.chat:
+            reply = reply.replace('\n', '<br>')
+        shared.history['visible'][i][1] = reply
+
+    if shared.args.cai_chat:
+        return chat.generate_chat_html(shared.history['visible'], shared.settings[f'name1{suffix}'], shared.settings[f'name1{suffix}'], shared.character)
+    else:
+        return shared.history['visible']
+
+def toggle_text_in_history():
+    suffix = '_pygmalion' if 'pygmalion' in shared.model_name.lower() else ''
+    audio_str='\n\n' # The '\n\n' used after </audio>
+    if shared.args.chat:
+         audio_str='<br><br>'
+
+    if params['show_text']==True:
+        #for i, entry in enumerate(shared.history['internal']):
+        for i, entry in enumerate(shared.history['visible']):
+            vis_reply = entry[1]
+            if vis_reply.startswith('<audio'):
+                reply = shared.history['internal'][i][1]
+                reply = re.sub("(<USER>|<user>|{{user}})", shared.settings[f'name1{suffix}'], reply)
+                if shared.args.chat:
+                    reply = reply.replace('\n', '<br>')
+                shared.history['visible'][i][1] = vis_reply.split(audio_str,1)[0]+audio_str+reply
+    else:
+        for i, entry in enumerate(shared.history['visible']):
+            vis_reply = entry[1]
+            if vis_reply.startswith('<audio'):
+                shared.history['visible'][i][1] = vis_reply.split(audio_str,1)[0]+audio_str
+
+    if shared.args.cai_chat:
+        return chat.generate_chat_html(shared.history['visible'], shared.settings[f'name1{suffix}'], shared.settings[f'name1{suffix}'], shared.character)
+    else:
+        return shared.history['visible']
+
 def input_modifier(string):
     """
     This function is applied to your text inputs before
     they are fed into the model.
     """
 
-    # Remove autoplay from previous
-    if len(shared.history['internal'])>0:
-        [text, reply] = shared.history['internal'][-1]
+    # Remove autoplay from previous chat history
+    if (shared.args.chat or shared.args.cai_chat)and len(shared.history['internal'])>0:
         [visible_text, visible_reply] = shared.history['visible'][-1]
-        rep_clean = reply.replace('controls autoplay>','controls>')
         vis_rep_clean = visible_reply.replace('controls autoplay>','controls>')
-        shared.history['internal'][-1] = [text, rep_clean]
         shared.history['visible'][-1] = [visible_text, vis_rep_clean]
 
     return string
@@ -99,24 +139,21 @@ def output_modifier(string):
         string = 'empty reply, try regenerating'
         silent_string = True
 
-    # x-slow, slow, medium, fast, x-fast
-    # x-low, low, medium, high, x-high
     pitch = params['voice_pitch']
     speed = params['voice_speed']
     prosody=f'<prosody rate="{speed}" pitch="{pitch}">'
     string = '<speak>'+prosody+xmlesc(string)+'</prosody></speak>'
 
-    current_msg_id = len(shared.history['visible']) # Check length here, since output_modifier can run many times on the same message
-    output_file = Path(f'extensions/silero_tts/outputs/{shared.character}_{current_msg_id:06d}.wav')
     if not shared.still_streaming and not silent_string:
+        output_file = Path(f'extensions/silero_tts/outputs/{shared.character}_{int(time.time())}.wav')
         model.save_wav(ssml_text=string, speaker=params['speaker'], sample_rate=int(params['sample_rate']), audio_path=str(output_file))
-        string = f'<audio id="audio_{current_msg_id:06d}" src="file/{output_file.as_posix()}" controls autoplay></audio>\n\n'
+        autoplay_str = ' autoplay' if params['autoplay'] else ''
+        string = f'<audio src="file/{output_file.as_posix()}" controls{autoplay_str}></audio>\n\n'
     else:
         # Placeholder so text doesn't shift around so much
         string = '<audio controls></audio>\n\n'
 
     if params['show_text']:
-        #string += f'*[{current_msg_id}]:*'+orig_string #Debug, looks like there is a delay in "current_msg_id" being updated when switching characters (updates after new message sent). Can't find the source. "shared.character" is updating properly.
         string += orig_string
 
     return string
@@ -133,16 +170,34 @@ def bot_prefix_modifier(string):
 def ui():
     # Gradio elements
     with gr.Accordion("Silero TTS"):
-        activate = gr.Checkbox(value=params['activate'], label='Activate TTS')
+        with gr.Row():
+            activate = gr.Checkbox(value=params['activate'], label='Activate TTS')
+            autoplay = gr.Checkbox(value=params['autoplay'], label='Play TTS automatically')
         show_text = gr.Checkbox(value=params['show_text'], label='Show message text under audio player')
-        autoplay = gr.Checkbox(value=params['autoplay'], label='Play TTS automatically')
         voice = gr.Dropdown(value=params['speaker'], choices=voices_by_gender, label='TTS voice')
-        v_pitch = gr.Dropdown(value=params['voice_pitch'], choices=voice_pitches, label='Voice pitch')
-        v_speed = gr.Dropdown(value=params['voice_speed'], choices=voice_speeds, label='Voice speed')
+        with gr.Row():
+            v_pitch = gr.Dropdown(value=params['voice_pitch'], choices=voice_pitches, label='Voice pitch')
+            v_speed = gr.Dropdown(value=params['voice_speed'], choices=voice_speeds, label='Voice speed')
+        with gr.Row():
+            convert = gr.Button('Permanently replace chat history audio with message text')
+            convert_confirm = gr.Button('Confirm (cannot be undone)', variant="stop", visible=False)
+            convert_cancel = gr.Button('Cancel', visible=False)
+
+    # Convert history with confirmation
+    convert_arr = [convert_confirm, convert, convert_cancel]
+    convert.click(lambda :[gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, convert_arr)
+    convert_confirm.click(lambda :[gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr)
+    convert_confirm.click(remove_tts_from_history, [], shared.gradio['display'])
+    convert_confirm.click(lambda : chat.save_history(timestamp=False), [], [], show_progress=False)
+    convert_cancel.click(lambda :[gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr)
+
+    # Toggle message text in history
+    show_text.change(lambda x: params.update({"show_text": x}), show_text, None)
+    show_text.change(toggle_text_in_history, [], shared.gradio['display'])
+    show_text.change(lambda : chat.save_history(timestamp=False), [], [], show_progress=False)
 
     # Event functions to update the parameters in the backend
     activate.change(lambda x: params.update({"activate": x}), activate, None)
-    show_text.change(lambda x: params.update({"show_text": x}), show_text, None)
     autoplay.change(lambda x: params.update({"autoplay": x}), autoplay, None)
     voice.change(lambda x: params.update({"speaker": x}), voice, None)
     v_pitch.change(lambda x: params.update({"voice_pitch": x}), v_pitch, None)

From 9276af3561df4d6b25cadc85dd9e51fe167fe807 Mon Sep 17 00:00:00 2001
From: Xan <70198941+xanthousm@users.noreply.github.com>
Date: Sun, 12 Mar 2023 19:06:24 +1100
Subject: [PATCH 08/33] clean up

---
 .idea/workspace.xml | 64 ---------------------------------------------
 1 file changed, 64 deletions(-)
 delete mode 100644 .idea/workspace.xml

diff --git a/.idea/workspace.xml b/.idea/workspace.xml
deleted file mode 100644
index 404920a8..00000000
--- a/.idea/workspace.xml
+++ /dev/null
@@ -1,64 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ChangeListManager">
-    <list default="true" id="edbf3935-4476-45aa-aea0-f1e7cbcf4b9a" name="Changes" comment="">
-      <change afterPath="$PROJECT_DIR$/extensions/llama_prompts/script.py" afterDir="false" />
-      <change afterPath="$PROJECT_DIR$/modules/callbacks.py" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/modules/RWKV.py" beforeDir="false" afterPath="$PROJECT_DIR$/modules/RWKV.py" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/modules/chat.py" beforeDir="false" afterPath="$PROJECT_DIR$/modules/chat.py" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/modules/shared.py" beforeDir="false" afterPath="$PROJECT_DIR$/modules/shared.py" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/modules/stopping_criteria.py" beforeDir="false" />
-      <change beforePath="$PROJECT_DIR$/modules/text_generation.py" beforeDir="false" afterPath="$PROJECT_DIR$/modules/text_generation.py" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/requirements.txt" beforeDir="false" afterPath="$PROJECT_DIR$/requirements.txt" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/server.py" beforeDir="false" afterPath="$PROJECT_DIR$/server.py" afterDir="false" />
-    </list>
-    <option name="SHOW_DIALOG" value="false" />
-    <option name="HIGHLIGHT_CONFLICTS" value="true" />
-    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
-    <option name="LAST_RESOLUTION" value="IGNORE" />
-  </component>
-  <component name="Git.Settings">
-    <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
-  </component>
-  <component name="MarkdownSettingsMigration">
-    <option name="stateVersion" value="1" />
-  </component>
-  <component name="ProjectId" id="2MtdH03e5QdbSP16WYYfDkhyFUC" />
-  <component name="ProjectLevelVcsManager" settingsEditedManually="true" />
-  <component name="ProjectViewState">
-    <option name="showLibraryContents" value="true" />
-  </component>
-  <component name="PropertiesComponent"><![CDATA[{
-  "keyToString": {
-    "ASKED_SHARE_PROJECT_CONFIGURATION_FILES": "true",
-    "RunOnceActivity.OpenProjectViewOnStart": "true",
-    "RunOnceActivity.ShowReadmeOnStart": "true"
-  }
-}]]></component>
-  <component name="RunManager">
-    <configuration default="true" type="JetRunConfigurationType">
-      <module name="text-generation-webui" />
-      <method v="2">
-        <option name="Make" enabled="true" />
-      </method>
-    </configuration>
-    <configuration default="true" type="KotlinStandaloneScriptRunConfigurationType">
-      <module name="text-generation-webui" />
-      <option name="filePath" />
-      <method v="2">
-        <option name="Make" enabled="true" />
-      </method>
-    </configuration>
-  </component>
-  <component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
-  <component name="TaskManager">
-    <task active="true" id="Default" summary="Default task">
-      <changelist id="edbf3935-4476-45aa-aea0-f1e7cbcf4b9a" name="Changes" comment="" />
-      <created>1678590722207</created>
-      <option name="number" value="Default" />
-      <option name="presentableId" value="Default" />
-      <updated>1678590722207</updated>
-    </task>
-    <servers />
-  </component>
-</project>
\ No newline at end of file

From 4dc1d8c091461de4489b29660930ae929d60b171 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 12 Mar 2023 12:46:53 -0300
Subject: [PATCH 09/33] Update README.md

---
 README.md | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 3d0c3a23..95fb85ae 100644
--- a/README.md
+++ b/README.md
@@ -179,14 +179,10 @@ Check the [wiki](https://github.com/oobabooga/text-generation-webui/wiki/System-
 
 Pull requests, suggestions, and issue reports are welcome.
 
-Before reporting a bug, make sure that you have created a conda environment and installed the dependencies exactly as in the *Installation* section above.
+Before reporting a bug, make sure that you have:
 
-These issues are known:
-
-* 8-bit doesn't work properly on Windows or older GPUs.
-* DeepSpeed doesn't work properly on Windows.
-
-For these two, please try commenting on an existing issue instead of creating a new one.
+1. Created a conda environment and installed the dependencies exactly as in the *Installation* section above.
+2. [Searched](https://github.com/oobabooga/text-generation-webui/issues) to see if an issue already exists for the issue you encountered.
 
 ## Credits
 

From 4066ab4c0ca608bc4f95f50fe7c7f11334192946 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 12 Mar 2023 13:36:18 -0300
Subject: [PATCH 10/33] Reorder the imports

---
 extensions/silero_tts/script.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/extensions/silero_tts/script.py b/extensions/silero_tts/script.py
index 1a60c901..62d4b441 100644
--- a/extensions/silero_tts/script.py
+++ b/extensions/silero_tts/script.py
@@ -1,11 +1,12 @@
+import re
+import time
 from pathlib import Path
 
 import gradio as gr
 import torch
-import time
-import re
-import modules.shared as shared
+
 import modules.chat as chat
+import modules.shared as shared
 
 torch._C._jit_set_profiling_mode(False)
 

From 441e993c51ac100fa4565419791cd9a88fd8d3df Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 12 Mar 2023 14:25:14 -0300
Subject: [PATCH 11/33] Bump accelerate, RWKV and safetensors

---
 requirements.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index ceaa0b70..b078ecf4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,11 +1,11 @@
-accelerate==0.16.0
+accelerate==0.17.0
 bitsandbytes==0.37.0
 flexgen==0.1.7
 gradio==3.18.0
 numpy
 requests
-rwkv==0.1.0
-safetensors==0.2.8
+rwkv==0.3.1
+safetensors==0.3.0
 sentencepiece
 tqdm
 git+https://github.com/zphang/transformers@llama_push

From 17210ff88f55dc650c3dc2ff1c8692f27734851c Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 12 Mar 2023 14:31:24 -0300
Subject: [PATCH 12/33] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 95fb85ae..26e70d76 100644
--- a/README.md
+++ b/README.md
@@ -27,7 +27,7 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.
 * [FlexGen offload](https://github.com/oobabooga/text-generation-webui/wiki/FlexGen).
 * [DeepSpeed ZeRO-3 offload](https://github.com/oobabooga/text-generation-webui/wiki/DeepSpeed).
 * Get responses via API, [with](https://github.com/oobabooga/text-generation-webui/blob/main/api-example-streaming.py) or [without](https://github.com/oobabooga/text-generation-webui/blob/main/api-example.py) streaming.
-* [Supports the LLaMA model](https://github.com/oobabooga/text-generation-webui/wiki/LLaMA-model).
+* [Supports the LLaMA model, including 4-bit mode](https://github.com/oobabooga/text-generation-webui/wiki/LLaMA-model).
 * [Supports the RWKV model](https://github.com/oobabooga/text-generation-webui/wiki/RWKV-model).
 * Supports softprompts.
 * [Supports extensions](https://github.com/oobabooga/text-generation-webui/wiki/Extensions).

From c7aa51faa6488f019447c7f2eba26013105281e7 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 12 Mar 2023 14:54:58 -0300
Subject: [PATCH 13/33] Use a list of eos_tokens instead of just a number

This might be the cause of LLaMA ramblings that some people have experienced.
---
 modules/text_generation.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/modules/text_generation.py b/modules/text_generation.py
index 6f53e416..7cf68c06 100644
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@@ -119,7 +119,9 @@ def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typi
     original_input_ids = input_ids
     output = input_ids[0]
     cuda = "" if (shared.args.cpu or shared.args.deepspeed or shared.args.flexgen) else ".cuda()"
-    n = shared.tokenizer.eos_token_id if eos_token is None else int(encode(eos_token)[0][-1])
+    eos_token_ids = [shared.tokenizer.eos_token_id]
+    if eos_token is not None:
+        eos_token_ids.append(int(encode(eos_token)[0][-1]))
     stopping_criteria_list = transformers.StoppingCriteriaList()
     if stopping_string is not None:
         # Copied from https://github.com/PygmalionAI/gradio-ui/blob/master/src/model.py
@@ -129,7 +131,7 @@ def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typi
     if not shared.args.flexgen:
         generate_params = [
             f"max_new_tokens=max_new_tokens",
-            f"eos_token_id={n}",
+            f"eos_token_id={eos_token_ids}",
             f"stopping_criteria=stopping_criteria_list",
             f"do_sample={do_sample}",
             f"temperature={temperature}",
@@ -149,7 +151,7 @@ def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typi
             f"max_new_tokens={max_new_tokens if shared.args.no_stream else 8}",
             f"do_sample={do_sample}",
             f"temperature={temperature}",
-            f"stop={n}",
+            f"stop={eos_token_ids[-1]}",
         ]
     if shared.args.deepspeed:
         generate_params.append("synced_gpus=True")
@@ -198,7 +200,7 @@ def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typi
                     if not (shared.args.chat or shared.args.cai_chat):
                         reply = original_question + apply_extensions(reply[len(question):], "output")
 
-                    if output[-1] == n:
+                    if output[-1] in eos_token_ids:
                         break
                     yield formatted_outputs(reply, shared.model_name)
 
@@ -219,7 +221,7 @@ def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typi
                 if not (shared.args.chat or shared.args.cai_chat):
                     reply = original_question + apply_extensions(reply[len(question):], "output")
 
-                if np.count_nonzero(input_ids[0] == n) < np.count_nonzero(output == n):
+                if np.count_nonzero(np.isin(input_ids[0], eos_token_ids)) < np.count_nonzero(np.isin(output, eos_token_ids)):
                     break
                 yield formatted_outputs(reply, shared.model_name)
 

From 3375eaece0851b318a7d77fade12ac6a264c6b64 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 12 Mar 2023 15:01:32 -0300
Subject: [PATCH 14/33] Update README

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 26e70d76..dc5ed659 100644
--- a/README.md
+++ b/README.md
@@ -139,7 +139,7 @@ Optionally, you can use the following command-line flags:
 | `--cpu`       | Use the CPU to generate text.|
 | `--load-in-8bit`  | Load the model with 8-bit precision.|
 | `--load-in-4bit`  | Load the model with 4-bit precision. Currently only works with LLaMA.|
-| `--gptq-bits`  |  Load a pre-quantized model with specified precision. 2, 3, 4 and 8bit are supported. Currently only works with LLaMA. |
+| `--gptq-bits GPTQ_BITS`  |  Load a pre-quantized model with specified precision. 2, 3, 4 and 8 (bit) are supported. Currently only works with LLaMA. |
 | `--bf16`  | Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU. |
 | `--auto-devices` | Automatically split the model across the available GPU(s) and CPU.|
 | `--disk` | If the model is too large for your GPU(s) and CPU combined, send the remaining layers to the disk. |

From 4bcd675ccdd3e2ef5e83c4ca2e709e4e0197de02 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 12 Mar 2023 15:23:33 -0300
Subject: [PATCH 15/33] Add *Is typing...* to regenerate as well

---
 modules/chat.py | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/modules/chat.py b/modules/chat.py
index 2048e2c5..f727ce8b 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -22,6 +22,12 @@ def clean_chat_message(text):
     text = text.strip()
     return text
 
+def generate_chat_output(history, name1, name2, character):
+    if shared.args.cai_chat:
+        return generate_chat_html(history, name1, name2, character)
+    else:
+        return history
+
 def generate_chat_prompt(user_input, max_new_tokens, name1, name2, context, chat_prompt_size, impersonate=False):
     user_input = clean_chat_message(user_input)
     rows = [f"{context.strip()}\n"]
@@ -182,21 +188,18 @@ def cai_chatbot_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typ
 
 def regenerate_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, name1, name2, context, check, chat_prompt_size, chat_generation_attempts=1):
     if (shared.character != 'None' and len(shared.history['visible']) == 1) or len(shared.history['internal']) == 0:
-        if shared.args.cai_chat:
-            yield generate_chat_html(shared.history['visible'], name1, name2, shared.character)
-        else:
-            yield shared.history['visible']
+        yield generate_chat_output(shared.history['visible'], name1, name2, shared.character)
     else:
         last_visible = shared.history['visible'].pop()
         last_internal = shared.history['internal'].pop()
 
+        yield generate_chat_output(shared.history['visible']+[[last_visible[0], '*Is typing...*']], name1, name2, shared.character)
         for _history in chatbot_wrapper(last_internal[0], max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, name1, name2, context, check, chat_prompt_size, chat_generation_attempts, regenerate=True):
             if shared.args.cai_chat:
                 shared.history['visible'][-1] = [last_visible[0], _history[-1][1]]
-                yield generate_chat_html(shared.history['visible'], name1, name2, shared.character)
             else:
                 shared.history['visible'][-1] = (last_visible[0], _history[-1][1])
-                yield shared.history['visible']
+            yield generate_chat_output(shared.history['visible'], name1, name2, shared.character)
 
 def remove_last_message(name1, name2):
     if len(shared.history['visible']) > 0 and not shared.history['internal'][-1][0] == '<|BEGIN-VISIBLE-CHAT|>':
@@ -204,6 +207,7 @@ def remove_last_message(name1, name2):
         shared.history['internal'].pop()
     else:
         last = ['', '']
+
     if shared.args.cai_chat:
         return generate_chat_html(shared.history['visible'], name1, name2, shared.character), last[0]
     else:
@@ -223,10 +227,7 @@ def replace_last_reply(text, name1, name2):
             shared.history['visible'][-1] = (shared.history['visible'][-1][0], text)
         shared.history['internal'][-1][1] = apply_extensions(text, "input")
 
-    if shared.args.cai_chat:
-        return generate_chat_html(shared.history['visible'], name1, name2, shared.character)
-    else:
-        return shared.history['visible']
+    return generate_chat_output(shared.history['visible'], name1, name2, shared.character)
 
 def clear_html():
     return generate_chat_html([], "", "", shared.character)
@@ -246,10 +247,8 @@ def clear_chat_log(name1, name2):
     else:
         shared.history['internal'] = []
         shared.history['visible'] = []
-    if shared.args.cai_chat:
-        return generate_chat_html(shared.history['visible'], name1, name2, shared.character)
-    else:
-        return shared.history['visible']
+
+    return generate_chat_output(shared.history['visible'], name1, name2, shared.character)
 
 def redraw_html(name1, name2):
     return generate_chat_html(shared.history['visible'], name1, name2, shared.character)

From cebe8b390da2dc118964fc224e1ce3b3cc572f87 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 12 Mar 2023 15:50:38 -0300
Subject: [PATCH 16/33] Remove useless "substring_found" variable

---
 modules/chat.py | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/modules/chat.py b/modules/chat.py
index f727ce8b..47398afc 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -59,7 +59,6 @@ def generate_chat_prompt(user_input, max_new_tokens, name1, name2, context, chat
 
 def extract_message_from_reply(question, reply, name1, name2, check, impersonate=False):
     next_character_found = False
-    substring_found = False
 
     asker = name1 if not impersonate else name2
     replier = name2 if not impersonate else name1
@@ -85,15 +84,15 @@ def extract_message_from_reply(question, reply, name1, name2, check, impersonate
             next_character_found = True
         reply = clean_chat_message(reply)
 
-        # Detect if something like "\nYo" is generated just before
-        # "\nYou:" is completed
-        tmp = f"\n{asker}:"
-        for j in range(1, len(tmp)):
-            if reply[-j:] == tmp[:j]:
+        # If something like "\nYo" is generated just before "\nYou:"
+        # is completed, trim it
+        next_turn = f"\n{asker}:"
+        for j in range(len(next_turn)-1, 0, -1):
+            if reply[-j:] == next_turn[:j]:
                 reply = reply[:-j]
-                substring_found = True
+                break
 
-    return reply, next_character_found, substring_found
+    return reply, next_character_found
 
 def stop_everything_event():
     shared.stop_everything = True
@@ -137,7 +136,7 @@ def chatbot_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typical
         for reply in generate_reply(f"{prompt}{' ' if len(reply) > 0 else ''}{reply}", max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, eos_token=eos_token, stopping_string=f"\n{name1}:"):
 
             # Extracting the reply
-            reply, next_character_found, substring_found = extract_message_from_reply(prompt, reply, name1, name2, check)
+            reply, next_character_found = extract_message_from_reply(prompt, reply, name1, name2, check)
             visible_reply = re.sub("(<USER>|<user>|{{user}})", name1_original, reply)
             visible_reply = apply_extensions(visible_reply, "output")
             if shared.args.chat:
@@ -154,7 +153,7 @@ def chatbot_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typical
 
             shared.history['internal'][-1] = [text, reply]
             shared.history['visible'][-1] = [visible_text, visible_reply]
-            if not substring_found and not shared.args.no_stream:
+            if not shared.args.no_stream:
                 yield shared.history['visible']
             if next_character_found:
                 break
@@ -175,9 +174,8 @@ def impersonate_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typ
     reply = ''
     for i in range(chat_generation_attempts):
         for reply in generate_reply(prompt+reply, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, eos_token=eos_token, stopping_string=f"\n{name2}:"):
-            reply, next_character_found, substring_found = extract_message_from_reply(prompt, reply, name1, name2, check, impersonate=True)
-            if not substring_found:
-                yield reply
+            reply, next_character_found = extract_message_from_reply(prompt, reply, name1, name2, check, impersonate=True)
+            yield reply
             if next_character_found:
                 break
         yield reply

From 54e8f0c31f8dfcba0300ab8a1d9861214b40be6f Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 12 Mar 2023 16:58:00 -0300
Subject: [PATCH 17/33] Update README.md

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index dc5ed659..1e9a093c 100644
--- a/README.md
+++ b/README.md
@@ -62,9 +62,9 @@ conda install pytorch torchvision torchaudio git -c pytorch
 
 ## Installation option 2: one-click installers
 
-[oobabooga-windows.zip](https://github.com/oobabooga/text-generation-webui/releases/download/installers/oobabooga-windows.zip)
+[oobabooga-windows.zip](https://github.com/oobabooga/one-click-installers/archive/refs/heads/oobabooga-windows.zip)
 
-[oobabooga-linux.zip](https://github.com/oobabooga/text-generation-webui/releases/download/installers/oobabooga-linux.zip)
+[oobabooga-linux.zip](https://github.com/oobabooga/one-click-installers/archive/refs/heads/oobabooga-linux.zip)
 
 Just download the zip above, extract it, and double click on "install". The web UI and all its dependencies will be installed in the same folder.
 

From d168b6e1f708f13292a0a7428950e3b42ee41d8a Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 12 Mar 2023 17:54:07 -0300
Subject: [PATCH 18/33] Update README.md

---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index 1e9a093c..89b567f2 100644
--- a/README.md
+++ b/README.md
@@ -60,6 +60,9 @@ pip3 install torch torchvision torchaudio --extra-index-url https://download.pyt
 conda install pytorch torchvision torchaudio git -c pytorch
 ```
 
+See also: [Installation instructions for human beings
+](https://github.com/oobabooga/text-generation-webui/wiki/Installation-instructions-for-human-beings)
+
 ## Installation option 2: one-click installers
 
 [oobabooga-windows.zip](https://github.com/oobabooga/one-click-installers/archive/refs/heads/oobabooga-windows.zip)

From a95592fc56929fe1ba55ec30b41800de614bb4fd Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 12 Mar 2023 20:38:40 -0300
Subject: [PATCH 19/33] Add back a progress indicator to --no-stream

---
 server.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/server.py b/server.py
index 47e9c8ed..08b1a478 100644
--- a/server.py
+++ b/server.py
@@ -269,10 +269,10 @@ if shared.args.chat or shared.args.cai_chat:
 
         function_call = 'chat.cai_chatbot_wrapper' if shared.args.cai_chat else 'chat.chatbot_wrapper'
 
-        gen_events.append(shared.gradio['Generate'].click(eval(function_call), shared.input_params, shared.gradio['display'], show_progress=False, api_name='textgen'))
-        gen_events.append(shared.gradio['textbox'].submit(eval(function_call), shared.input_params, shared.gradio['display'], show_progress=False))
-        gen_events.append(shared.gradio['Regenerate'].click(chat.regenerate_wrapper, shared.input_params, shared.gradio['display'], show_progress=False))
-        gen_events.append(shared.gradio['Impersonate'].click(chat.impersonate_wrapper, shared.input_params, shared.gradio['textbox'], show_progress=False))
+        gen_events.append(shared.gradio['Generate'].click(eval(function_call), shared.input_params, shared.gradio['display'], show_progress=shared.args.no_stream, api_name='textgen'))
+        gen_events.append(shared.gradio['textbox'].submit(eval(function_call), shared.input_params, shared.gradio['display'], show_progress=shared.args.no_stream))
+        gen_events.append(shared.gradio['Regenerate'].click(chat.regenerate_wrapper, shared.input_params, shared.gradio['display'], show_progress=shared.args.no_stream))
+        gen_events.append(shared.gradio['Impersonate'].click(chat.impersonate_wrapper, shared.input_params, shared.gradio['textbox'], show_progress=shared.args.no_stream))
         shared.gradio['Stop'].click(chat.stop_everything_event, [], [], cancels=gen_events)
 
         shared.gradio['Copy last reply'].click(chat.send_last_reply_to_input, [], shared.gradio['textbox'], show_progress=shared.args.no_stream)

From 1ddcd4d0ba27a3a5f77a52a3169d01dea070b65d Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 12 Mar 2023 23:42:49 -0300
Subject: [PATCH 20/33] Clean up silero_tts

This should only be used with --no-stream.

The shared.still_streaming implementation was faulty by design:
output_modifier should never be called when streaming is already over.
---
 extensions/silero_tts/script.py | 99 +++++++++++----------------------
 modules/shared.py               |  1 -
 modules/text_generation.py      |  4 --
 3 files changed, 31 insertions(+), 73 deletions(-)

diff --git a/extensions/silero_tts/script.py b/extensions/silero_tts/script.py
index 62d4b441..4a02abaa 100644
--- a/extensions/silero_tts/script.py
+++ b/extensions/silero_tts/script.py
@@ -1,4 +1,4 @@
-import re
+import os
 import time
 from pathlib import Path
 
@@ -12,7 +12,7 @@ torch._C._jit_set_profiling_mode(False)
 
 params = {
     'activate': True,
-    'speaker': 'en_5',
+    'speaker': 'en_56',
     'language': 'en',
     'model_id': 'v3_en',
     'sample_rate': 48000,
@@ -27,7 +27,6 @@ current_params = params.copy()
 voices_by_gender = ['en_99', 'en_45', 'en_18', 'en_117', 'en_49', 'en_51', 'en_68', 'en_0', 'en_26', 'en_56', 'en_74', 'en_5', 'en_38', 'en_53', 'en_21', 'en_37', 'en_107', 'en_10', 'en_82', 'en_16', 'en_41', 'en_12', 'en_67', 'en_61', 'en_14', 'en_11', 'en_39', 'en_52', 'en_24', 'en_97', 'en_28', 'en_72', 'en_94', 'en_36', 'en_4', 'en_43', 'en_88', 'en_25', 'en_65', 'en_6', 'en_44', 'en_75', 'en_91', 'en_60', 'en_109', 'en_85', 'en_101', 'en_108', 'en_50', 'en_96', 'en_64', 'en_92', 'en_76', 'en_33', 'en_116', 'en_48', 'en_98', 'en_86', 'en_62', 'en_54', 'en_95', 'en_55', 'en_111', 'en_3', 'en_83', 'en_8', 'en_47', 'en_59', 'en_1', 'en_2', 'en_7', 'en_9', 'en_13', 'en_15', 'en_17', 'en_19', 'en_20', 'en_22', 'en_23', 'en_27', 'en_29', 'en_30', 'en_31', 'en_32', 'en_34', 'en_35', 'en_40', 'en_42', 'en_46', 'en_57', 'en_58', 'en_63', 'en_66', 'en_69', 'en_70', 'en_71', 'en_73', 'en_77', 'en_78', 'en_79', 'en_80', 'en_81', 'en_84', 'en_87', 'en_89', 'en_90', 'en_93', 'en_100', 'en_102', 'en_103', 'en_104', 'en_105', 'en_106', 'en_110', 'en_112', 'en_113', 'en_114', 'en_115']
 voice_pitches = ['x-low', 'low', 'medium', 'high', 'x-high']
 voice_speeds = ['x-slow', 'slow', 'medium', 'fast', 'x-fast']
-last_msg_id = 0
 
 # Used for making text xml compatible, needed for voice pitch and speed control
 table = str.maketrans({
@@ -57,46 +56,21 @@ def remove_surrounded_chars(string):
             new_string += char
     return new_string
 
-def remove_tts_from_history():
-    suffix = '_pygmalion' if 'pygmalion' in shared.model_name.lower() else ''
+def remove_tts_from_history(name1, name2):
     for i, entry in enumerate(shared.history['internal']):
-        reply = entry[1]
-        reply = re.sub("(<USER>|<user>|{{user}})", shared.settings[f'name1{suffix}'], reply)
-        if shared.args.chat:
-            reply = reply.replace('\n', '<br>')
-        shared.history['visible'][i][1] = reply
+        shared.history['visible'][i][1] = entry[1]
+    return chat.generate_chat_output(shared.history['visible'], name1, name2, shared.character)
 
-    if shared.args.cai_chat:
-        return chat.generate_chat_html(shared.history['visible'], shared.settings[f'name1{suffix}'], shared.settings[f'name1{suffix}'], shared.character)
-    else:
-        return shared.history['visible']
-
-def toggle_text_in_history():
-    suffix = '_pygmalion' if 'pygmalion' in shared.model_name.lower() else ''
-    audio_str='\n\n' # The '\n\n' used after </audio>
-    if shared.args.chat:
-         audio_str='<br><br>'
-
-    if params['show_text']==True:
-        #for i, entry in enumerate(shared.history['internal']):
-        for i, entry in enumerate(shared.history['visible']):
-            vis_reply = entry[1]
-            if vis_reply.startswith('<audio'):
+def toggle_text_in_history(name1, name2):
+    for i, entry in enumerate(shared.history['visible']):
+        visible_reply = entry[1]
+        if visible_reply.startswith('<audio'):
+            if params['show_text']:
                 reply = shared.history['internal'][i][1]
-                reply = re.sub("(<USER>|<user>|{{user}})", shared.settings[f'name1{suffix}'], reply)
-                if shared.args.chat:
-                    reply = reply.replace('\n', '<br>')
-                shared.history['visible'][i][1] = vis_reply.split(audio_str,1)[0]+audio_str+reply
-    else:
-        for i, entry in enumerate(shared.history['visible']):
-            vis_reply = entry[1]
-            if vis_reply.startswith('<audio'):
-                shared.history['visible'][i][1] = vis_reply.split(audio_str,1)[0]+audio_str
-
-    if shared.args.cai_chat:
-        return chat.generate_chat_html(shared.history['visible'], shared.settings[f'name1{suffix}'], shared.settings[f'name1{suffix}'], shared.character)
-    else:
-        return shared.history['visible']
+                shared.history['visible'][i][1] = f"{visible_reply.split('</audio>')[0]}</audio>\n\n{reply}"
+            else:
+                shared.history['visible'][i][1] = f"{visible_reply.split('</audio>')[0]}</audio>"
+    return chat.generate_chat_output(shared.history['visible'], name1, name2, shared.character)
 
 def input_modifier(string):
     """
@@ -104,11 +78,9 @@ def input_modifier(string):
     they are fed into the model.
     """
 
-    # Remove autoplay from previous chat history
-    if (shared.args.chat or shared.args.cai_chat)and len(shared.history['internal'])>0:
-        [visible_text, visible_reply] = shared.history['visible'][-1]
-        vis_rep_clean = visible_reply.replace('controls autoplay>','controls>')
-        shared.history['visible'][-1] = [visible_text, vis_rep_clean]
+    # Remove autoplay from the last reply
+    if (shared.args.chat or shared.args.cai_chat) and len(shared.history['internal']) > 0:
+        shared.history['visible'][-1][1] = shared.history['visible'][-1][1].replace('controls autoplay>','controls>')
 
     return string
 
@@ -128,34 +100,25 @@ def output_modifier(string):
     if params['activate'] == False:
         return string
 
-    orig_string = string
+    original_string = string
     string = remove_surrounded_chars(string)
     string = string.replace('"', '')
     string = string.replace('“', '')
     string = string.replace('\n', ' ')
     string = string.strip()
 
-    silent_string = False # Used to prevent unnecessary audio file generation
     if string == '':
-        string = 'empty reply, try regenerating'
-        silent_string = True
-
-    pitch = params['voice_pitch']
-    speed = params['voice_speed']
-    prosody=f'<prosody rate="{speed}" pitch="{pitch}">'
-    string = '<speak>'+prosody+xmlesc(string)+'</prosody></speak>'
-
-    if not shared.still_streaming and not silent_string:
-        output_file = Path(f'extensions/silero_tts/outputs/{shared.character}_{int(time.time())}.wav')
-        model.save_wav(ssml_text=string, speaker=params['speaker'], sample_rate=int(params['sample_rate']), audio_path=str(output_file))
-        autoplay_str = ' autoplay' if params['autoplay'] else ''
-        string = f'<audio src="file/{output_file.as_posix()}" controls{autoplay_str}></audio>\n\n'
+        string = '*Empty reply, try regenerating*'
     else:
-        # Placeholder so text doesn't shift around so much
-        string = '<audio controls></audio>\n\n'
+        output_file = Path(f'extensions/silero_tts/outputs/{shared.character}_{int(time.time())}.wav')
+        prosody = '<prosody rate="{}" pitch="{}">'.format(params['voice_speed'], params['voice_pitch'])
+        silero_input = f'<speak>{prosody}{xmlesc(string)}</prosody></speak>'
+        model.save_wav(ssml_text=silero_input, speaker=params['speaker'], sample_rate=int(params['sample_rate']), audio_path=os.path.abspath(output_file))
 
-    if params['show_text']:
-        string += orig_string
+        autoplay = 'autoplay' if params['autoplay'] else ''
+        string = f'<audio src="file/{output_file.as_posix()}" controls {autoplay}></audio>'
+        if params['show_text']:
+            string += f'\n\n{original_string}'
 
     return string
 
@@ -180,21 +143,21 @@ def ui():
             v_pitch = gr.Dropdown(value=params['voice_pitch'], choices=voice_pitches, label='Voice pitch')
             v_speed = gr.Dropdown(value=params['voice_speed'], choices=voice_speeds, label='Voice speed')
         with gr.Row():
-            convert = gr.Button('Permanently replace chat history audio with message text')
-            convert_confirm = gr.Button('Confirm (cannot be undone)', variant="stop", visible=False)
+            convert = gr.Button('Permanently replace audios with the message texts')
             convert_cancel = gr.Button('Cancel', visible=False)
+            convert_confirm = gr.Button('Confirm (cannot be undone)', variant="stop", visible=False)
 
     # Convert history with confirmation
     convert_arr = [convert_confirm, convert, convert_cancel]
     convert.click(lambda :[gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, convert_arr)
     convert_confirm.click(lambda :[gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr)
-    convert_confirm.click(remove_tts_from_history, [], shared.gradio['display'])
+    convert_confirm.click(remove_tts_from_history, [shared.gradio['name1'], shared.gradio['name2']], shared.gradio['display'])
     convert_confirm.click(lambda : chat.save_history(timestamp=False), [], [], show_progress=False)
     convert_cancel.click(lambda :[gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr)
 
     # Toggle message text in history
     show_text.change(lambda x: params.update({"show_text": x}), show_text, None)
-    show_text.change(toggle_text_in_history, [], shared.gradio['display'])
+    show_text.change(toggle_text_in_history, [shared.gradio['name1'], shared.gradio['name2']], shared.gradio['display'])
     show_text.change(lambda : chat.save_history(timestamp=False), [], [], show_progress=False)
 
     # Event functions to update the parameters in the backend
diff --git a/modules/shared.py b/modules/shared.py
index a06c9774..5f6c01f3 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -11,7 +11,6 @@ is_RWKV = False
 history = {'internal': [], 'visible': []}
 character = 'None'
 stop_everything = False
-still_streaming = False
 
 # UI elements (buttons, sliders, HTML, etc)
 gradio = {}
diff --git a/modules/text_generation.py b/modules/text_generation.py
index 7cf68c06..6ee9d931 100644
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@@ -189,7 +189,6 @@ def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typi
             def generate_with_streaming(**kwargs):
                 return Iteratorize(generate_with_callback, kwargs, callback=None)
 
-            shared.still_streaming = True
             yield formatted_outputs(original_question, shared.model_name)
             with eval(f"generate_with_streaming({', '.join(generate_params)})") as generator:
                 for output in generator:
@@ -204,12 +203,10 @@ def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typi
                         break
                     yield formatted_outputs(reply, shared.model_name)
 
-                shared.still_streaming = False
                 yield formatted_outputs(reply, shared.model_name)
 
         # Stream the output naively for FlexGen since it doesn't support 'stopping_criteria'
         else:
-            shared.still_streaming = True
             for i in range(max_new_tokens//8+1):
                 clear_torch_cache()
                 with torch.no_grad():
@@ -229,7 +226,6 @@ def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typi
                 if shared.soft_prompt:
                     inputs_embeds, filler_input_ids = generate_softprompt_input_tensors(input_ids)
 
-            shared.still_streaming = False
             yield formatted_outputs(reply, shared.model_name)
 
     finally:

From b9e0712b92ab81eee50740253798d90ed835a43a Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 12 Mar 2023 23:58:25 -0300
Subject: [PATCH 21/33] Fix Open Assistant

---
 modules/text_generation.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/modules/text_generation.py b/modules/text_generation.py
index 6ee9d931..f5d2b8d0 100644
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@@ -37,9 +37,13 @@ def encode(prompt, tokens_to_generate=0, add_special_tokens=True):
             return input_ids.cuda()
 
 def decode(output_ids):
-    reply = shared.tokenizer.decode(output_ids, skip_special_tokens=True)
-    reply = reply.replace(r'<|endoftext|>', '')
-    return reply
+    # Open Assistant relies on special tokens like <|endoftext|>
+    if re.match('oasst-*', shared.model_name.lower()):
+        return shared.tokenizer.decode(output_ids, skip_special_tokens=False)
+    else:
+        reply = shared.tokenizer.decode(output_ids, skip_special_tokens=True)
+        reply = reply.replace(r'<|endoftext|>', '')
+        return reply
 
 def generate_softprompt_input_tensors(input_ids):
     inputs_embeds = shared.model.transformer.wte(input_ids)

From 77294b27ddce0c098a8f51b8cd9bd8c151a506f8 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 13 Mar 2023 00:08:01 -0300
Subject: [PATCH 22/33] Use str(Path) instead of os.path.abspath(Path)

---
 extensions/silero_tts/script.py | 3 +--
 modules/RWKV.py                 | 8 ++++----
 modules/quantized_LLaMA.py      | 5 ++---
 3 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/extensions/silero_tts/script.py b/extensions/silero_tts/script.py
index 4a02abaa..bc660483 100644
--- a/extensions/silero_tts/script.py
+++ b/extensions/silero_tts/script.py
@@ -1,4 +1,3 @@
-import os
 import time
 from pathlib import Path
 
@@ -113,7 +112,7 @@ def output_modifier(string):
         output_file = Path(f'extensions/silero_tts/outputs/{shared.character}_{int(time.time())}.wav')
         prosody = '<prosody rate="{}" pitch="{}">'.format(params['voice_speed'], params['voice_pitch'])
         silero_input = f'<speak>{prosody}{xmlesc(string)}</prosody></speak>'
-        model.save_wav(ssml_text=silero_input, speaker=params['speaker'], sample_rate=int(params['sample_rate']), audio_path=os.path.abspath(output_file))
+        model.save_wav(ssml_text=silero_input, speaker=params['speaker'], sample_rate=int(params['sample_rate']), audio_path=str(output_file))
 
         autoplay = 'autoplay' if params['autoplay'] else ''
         string = f'<audio src="file/{output_file.as_posix()}" controls {autoplay}></audio>'
diff --git a/modules/RWKV.py b/modules/RWKV.py
index d97c1706..5cf8937a 100644
--- a/modules/RWKV.py
+++ b/modules/RWKV.py
@@ -25,10 +25,10 @@ class RWKVModel:
         tokenizer_path = Path(f"{path.parent}/20B_tokenizer.json")
 
         if shared.args.rwkv_strategy is None:
-            model = RWKV(model=os.path.abspath(path), strategy=f'{device} {dtype}')
+            model = RWKV(model=str(path), strategy=f'{device} {dtype}')
         else:
-            model = RWKV(model=os.path.abspath(path), strategy=shared.args.rwkv_strategy)
-        pipeline = PIPELINE(model, os.path.abspath(tokenizer_path))
+            model = RWKV(model=str(path), strategy=shared.args.rwkv_strategy)
+        pipeline = PIPELINE(model, str(tokenizer_path))
 
         result = self()
         result.pipeline = pipeline
@@ -61,7 +61,7 @@ class RWKVTokenizer:
     @classmethod
     def from_pretrained(self, path):
         tokenizer_path = path / "20B_tokenizer.json"
-        tokenizer = Tokenizer.from_file(os.path.abspath(tokenizer_path))
+        tokenizer = Tokenizer.from_file(str(tokenizer_path))
 
         result = self()
         result.tokenizer = tokenizer
diff --git a/modules/quantized_LLaMA.py b/modules/quantized_LLaMA.py
index 5e4a38e8..fa7f15c2 100644
--- a/modules/quantized_LLaMA.py
+++ b/modules/quantized_LLaMA.py
@@ -1,4 +1,3 @@
-import os
 import sys
 from pathlib import Path
 
@@ -7,7 +6,7 @@ import torch
 
 import modules.shared as shared
 
-sys.path.insert(0, os.path.abspath(Path("repositories/GPTQ-for-LLaMa")))
+sys.path.insert(0, str(Path("repositories/GPTQ-for-LLaMa")))
 from llama import load_quant
 
 
@@ -41,7 +40,7 @@ def load_quantized_LLaMA(model_name):
         print(f"Could not find {pt_model}, exiting...")
         exit()
 
-    model = load_quant(path_to_model, os.path.abspath(pt_path), bits)
+    model = load_quant(path_to_model, str(pt_path), bits)
 
     # Multi-GPU setup
     if shared.args.gpu_memory:

From 0a7acb3bd9217b8d38e35679cb3911aaa07ba864 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 13 Mar 2023 00:12:21 -0300
Subject: [PATCH 23/33] Remove redundant comments

---
 modules/chat.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/modules/chat.py b/modules/chat.py
index 47398afc..d78278c4 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -127,7 +127,6 @@ def chatbot_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typical
         prompt = custom_generate_chat_prompt(text, max_new_tokens, name1, name2, context, chat_prompt_size)
 
     if not regenerate:
-        # Display user input and "*is typing...*" imediately
         yield shared.history['visible']+[[visible_text, '*Is typing...*']]
 
     # Generate
@@ -168,10 +167,8 @@ def impersonate_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typ
 
     prompt = generate_chat_prompt(text, max_new_tokens, name1, name2, context, chat_prompt_size, impersonate=True)
 
-    # Display "*is typing...*" imediately
-    yield '*Is typing...*'
-
     reply = ''
+    yield '*Is typing...*'
     for i in range(chat_generation_attempts):
         for reply in generate_reply(prompt+reply, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, eos_token=eos_token, stopping_string=f"\n{name2}:"):
             reply, next_character_found = extract_message_from_reply(prompt, reply, name1, name2, check, impersonate=True)

From 2c4699a7e9a1e611052f6e5635ddb9942b26524a Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 13 Mar 2023 00:20:02 -0300
Subject: [PATCH 24/33] Change a comment

---
 modules/quantized_LLaMA.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/quantized_LLaMA.py b/modules/quantized_LLaMA.py
index fa7f15c2..e9352f90 100644
--- a/modules/quantized_LLaMA.py
+++ b/modules/quantized_LLaMA.py
@@ -42,7 +42,7 @@ def load_quantized_LLaMA(model_name):
 
     model = load_quant(path_to_model, str(pt_path), bits)
 
-    # Multi-GPU setup
+    # Multiple GPUs or GPU+CPU
     if shared.args.gpu_memory:
         max_memory = {}
         for i in range(len(shared.args.gpu_memory)):

From 91c2a8e88d4271991f85a61cb8721faba6a34efd Mon Sep 17 00:00:00 2001
From: stefanhamburger <9825318+stefanhamburger@users.noreply.github.com>
Date: Mon, 13 Mar 2023 07:42:09 +0100
Subject: [PATCH 25/33] Fix: tuple object does not support item assignment

---
 extensions/silero_tts/script.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/extensions/silero_tts/script.py b/extensions/silero_tts/script.py
index bc660483..1d068229 100644
--- a/extensions/silero_tts/script.py
+++ b/extensions/silero_tts/script.py
@@ -57,7 +57,7 @@ def remove_surrounded_chars(string):
 
 def remove_tts_from_history(name1, name2):
     for i, entry in enumerate(shared.history['internal']):
-        shared.history['visible'][i][1] = entry[1]
+        shared.history['visible'][i] = [shared.history['visible'][i][0], entry[1]]
     return chat.generate_chat_output(shared.history['visible'], name1, name2, shared.character)
 
 def toggle_text_in_history(name1, name2):
@@ -66,9 +66,9 @@ def toggle_text_in_history(name1, name2):
         if visible_reply.startswith('<audio'):
             if params['show_text']:
                 reply = shared.history['internal'][i][1]
-                shared.history['visible'][i][1] = f"{visible_reply.split('</audio>')[0]}</audio>\n\n{reply}"
+                shared.history['visible'][i] = [shared.history['visible'][i][0], f"{visible_reply.split('</audio>')[0]}</audio>\n\n{reply}"]
             else:
-                shared.history['visible'][i][1] = f"{visible_reply.split('</audio>')[0]}</audio>"
+                shared.history['visible'][i] = [shared.history['visible'][i][0], f"{visible_reply.split('</audio>')[0]}</audio>"]
     return chat.generate_chat_output(shared.history['visible'], name1, name2, shared.character)
 
 def input_modifier(string):
@@ -79,7 +79,7 @@ def input_modifier(string):
 
     # Remove autoplay from the last reply
     if (shared.args.chat or shared.args.cai_chat) and len(shared.history['internal']) > 0:
-        shared.history['visible'][-1][1] = shared.history['visible'][-1][1].replace('controls autoplay>','controls>')
+        shared.history['visible'][-1] = [shared.history['visible'][-1][0], shared.history['visible'][-1][1].replace('controls autoplay>','controls>')]
 
     return string
 

From 0c224cf4f4d9c85ecce7aaf00af0e880c46fb7ac Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 13 Mar 2023 10:32:28 -0300
Subject: [PATCH 26/33] Fix GALACTICA (#285)

---
 modules/text_generation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/text_generation.py b/modules/text_generation.py
index f5d2b8d0..d64481b2 100644
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@@ -123,7 +123,7 @@ def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typi
     original_input_ids = input_ids
     output = input_ids[0]
     cuda = "" if (shared.args.cpu or shared.args.deepspeed or shared.args.flexgen) else ".cuda()"
-    eos_token_ids = [shared.tokenizer.eos_token_id]
+    eos_token_ids = [shared.tokenizer.eos_token_id] if shared.tokenizer.eos_token_id is not None else []
     if eos_token is not None:
         eos_token_ids.append(int(encode(eos_token)[0][-1]))
     stopping_criteria_list = transformers.StoppingCriteriaList()

From 72757088fa6082676badf987725b27b50628a265 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 13 Mar 2023 10:55:00 -0300
Subject: [PATCH 27/33] Create FUNDING.yml

---
 .github/FUNDING.yml | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 .github/FUNDING.yml

diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
new file mode 100644
index 00000000..57b7f698
--- /dev/null
+++ b/.github/FUNDING.yml
@@ -0,0 +1 @@
+ko_fi: oobabooga

From bdff37f0bb174d05a17c02beba11ee3c6fc49453 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 13 Mar 2023 11:05:51 -0300
Subject: [PATCH 28/33] Update README.md

---
 README.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 89b567f2..ec5063b9 100644
--- a/README.md
+++ b/README.md
@@ -60,8 +60,7 @@ pip3 install torch torchvision torchaudio --extra-index-url https://download.pyt
 conda install pytorch torchvision torchaudio git -c pytorch
 ```
 
-See also: [Installation instructions for human beings
-](https://github.com/oobabooga/text-generation-webui/wiki/Installation-instructions-for-human-beings)
+See also: [Installation instructions for human beings](https://github.com/oobabooga/text-generation-webui/wiki/Installation-instructions-for-human-beings).
 
 ## Installation option 2: one-click installers
 

From 372363bc3d5383d8351e45ee77323ba686a59769 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 13 Mar 2023 12:07:02 -0300
Subject: [PATCH 29/33] Fix GPTQ load_quant call on Windows

---
 modules/quantized_LLaMA.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/quantized_LLaMA.py b/modules/quantized_LLaMA.py
index e9352f90..a5757c68 100644
--- a/modules/quantized_LLaMA.py
+++ b/modules/quantized_LLaMA.py
@@ -40,7 +40,7 @@ def load_quantized_LLaMA(model_name):
         print(f"Could not find {pt_model}, exiting...")
         exit()
 
-    model = load_quant(path_to_model, str(pt_path), bits)
+    model = load_quant(str(path_to_model), str(pt_path), bits)
 
     # Multiple GPUs or GPU+CPU
     if shared.args.gpu_memory:

From d97bfb871331528aa7217f65299a72baa3e64516 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 13 Mar 2023 12:39:33 -0300
Subject: [PATCH 30/33] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index ec5063b9..b6cc6687 100644
--- a/README.md
+++ b/README.md
@@ -157,7 +157,7 @@ Optionally, you can use the following command-line flags:
 | `--local_rank LOCAL_RANK`    | DeepSpeed: Optional argument for distributed setups. |
 |  `--rwkv-strategy RWKV_STRATEGY`         |    RWKV: The strategy to use while loading the model. Examples: "cpu fp32", "cuda fp16", "cuda fp16i8". |
 |  `--rwkv-cuda-on`                        |   RWKV: Compile the CUDA kernel for better performance. |
-| `--no-stream`   | Don't stream the text output in real time. This improves the text generation performance.|
+| `--no-stream`   | Don't stream the text output in real time. |
 | `--settings SETTINGS_FILE` | Load the default interface settings from this json file. See `settings-template.json` for an example. If you create a file called `settings.json`, this file will be loaded by default without the need to use the `--settings` flag.|
 |  `--extensions EXTENSIONS [EXTENSIONS ...]` |  The list of extensions to load. If you want to load more than one extension, write the names separated by spaces. |
 | `--listen`   | Make the web UI reachable from your local network.|

From ddea518e0fb06ba2bd38b6d9672178ad669bda1f Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 13 Mar 2023 12:43:33 -0300
Subject: [PATCH 31/33] Document --auto-launch

---
 README.md         | 1 +
 modules/shared.py | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index b6cc6687..79a66f14 100644
--- a/README.md
+++ b/README.md
@@ -163,6 +163,7 @@ Optionally, you can use the following command-line flags:
 | `--listen`   | Make the web UI reachable from your local network.|
 |  `--listen-port LISTEN_PORT` | The listening port that the server will use. |
 | `--share`   | Create a public URL. This is useful for running the web UI on Google Colab or similar. |
+| '--auto-launch' | 'Open the web UI in the default browser upon launch' |
 | `--verbose`   | Print the prompts to the terminal. |
 
 Out of memory errors? [Check this guide](https://github.com/oobabooga/text-generation-webui/wiki/Low-VRAM-guide).
diff --git a/modules/shared.py b/modules/shared.py
index 5f6c01f3..66b00f93 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -85,12 +85,12 @@ parser.add_argument('--nvme-offload-dir', type=str, help='DeepSpeed: Directory t
 parser.add_argument('--local_rank', type=int, default=0, help='DeepSpeed: Optional argument for distributed setups.')
 parser.add_argument('--rwkv-strategy', type=str, default=None, help='RWKV: The strategy to use while loading the model. Examples: "cpu fp32", "cuda fp16", "cuda fp16i8".')
 parser.add_argument('--rwkv-cuda-on', action='store_true', help='RWKV: Compile the CUDA kernel for better performance.')
-parser.add_argument('--no-stream', action='store_true', help='Don\'t stream the text output in real time. This improves the text generation performance.')
+parser.add_argument('--no-stream', action='store_true', help='Don\'t stream the text output in real time.')
 parser.add_argument('--settings', type=str, help='Load the default interface settings from this json file. See settings-template.json for an example. If you create a file called settings.json, this file will be loaded by default without the need to use the --settings flag.')
 parser.add_argument('--extensions', type=str, nargs="+", help='The list of extensions to load. If you want to load more than one extension, write the names separated by spaces.')
 parser.add_argument('--listen', action='store_true', help='Make the web UI reachable from your local network.')
 parser.add_argument('--listen-port', type=int, help='The listening port that the server will use.')
 parser.add_argument('--share', action='store_true', help='Create a public URL. This is useful for running the web UI on Google Colab or similar.')
-parser.add_argument('--verbose', action='store_true', help='Print the prompts to the terminal.')
 parser.add_argument('--auto-launch', action='store_true', default=False, help='Open the web UI in the default browser upon launch')
+parser.add_argument('--verbose', action='store_true', help='Print the prompts to the terminal.')
 args = parser.parse_args()

From 66b6971b61c7a783be8d5416baf21e896e3e2164 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 13 Mar 2023 12:44:18 -0300
Subject: [PATCH 32/33] Update README

---
 README.md         | 2 +-
 modules/shared.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 79a66f14..dbc8c59c 100644
--- a/README.md
+++ b/README.md
@@ -163,7 +163,7 @@ Optionally, you can use the following command-line flags:
 | `--listen`   | Make the web UI reachable from your local network.|
 |  `--listen-port LISTEN_PORT` | The listening port that the server will use. |
 | `--share`   | Create a public URL. This is useful for running the web UI on Google Colab or similar. |
-| '--auto-launch' | 'Open the web UI in the default browser upon launch' |
+| `--auto-launch` | Open the web UI in the default browser upon launch. |
 | `--verbose`   | Print the prompts to the terminal. |
 
 Out of memory errors? [Check this guide](https://github.com/oobabooga/text-generation-webui/wiki/Low-VRAM-guide).
diff --git a/modules/shared.py b/modules/shared.py
index 66b00f93..8fcd4745 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -91,6 +91,6 @@ parser.add_argument('--extensions', type=str, nargs="+", help='The list of exten
 parser.add_argument('--listen', action='store_true', help='Make the web UI reachable from your local network.')
 parser.add_argument('--listen-port', type=int, help='The listening port that the server will use.')
 parser.add_argument('--share', action='store_true', help='Create a public URL. This is useful for running the web UI on Google Colab or similar.')
-parser.add_argument('--auto-launch', action='store_true', default=False, help='Open the web UI in the default browser upon launch')
+parser.add_argument('--auto-launch', action='store_true', default=False, help='Open the web UI in the default browser upon launch.')
 parser.add_argument('--verbose', action='store_true', help='Print the prompts to the terminal.')
 args = parser.parse_args()

From 435a69e357926d2ae10cf9285f73b52971d4b572 Mon Sep 17 00:00:00 2001
From: Luis Cosio <luisalfonsocosioizcapa@gmail.com>
Date: Mon, 13 Mar 2023 11:41:35 -0600
Subject: [PATCH 33/33] Fix for issue #282

RuntimeError: Tensors must have same number of dimensions: got 3 and 4
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index b078ecf4..6d0095aa 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,4 +8,4 @@ rwkv==0.3.1
 safetensors==0.3.0
 sentencepiece
 tqdm
-git+https://github.com/zphang/transformers@llama_push
+git+https://github.com/zphang/transformers.git@68d640f7c368bcaaaecfc678f11908ebbd3d6176