From 4c72e43bcfb70102a7330a5748ca2ea0989f0ffb Mon Sep 17 00:00:00 2001
From: EliasVincent <riesyeti@outlook.de>
Date: Thu, 9 Mar 2023 12:46:50 +0100
Subject: [PATCH 01/89] first implementation

---
 extensions/whisper_stt/requirements.txt |  5 ++++
 extensions/whisper_stt/script.py        | 40 +++++++++++++++++++++++++
 2 files changed, 45 insertions(+)
 create mode 100644 extensions/whisper_stt/requirements.txt
 create mode 100644 extensions/whisper_stt/script.py

diff --git a/extensions/whisper_stt/requirements.txt b/extensions/whisper_stt/requirements.txt
new file mode 100644
index 00000000..e6e3255f
--- /dev/null
+++ b/extensions/whisper_stt/requirements.txt
@@ -0,0 +1,5 @@
+git+https://github.com/Uberi/speech_recognition.git@010382b
+PyAudio
+openai-whisper
+soundfile
+ffmpeg
diff --git a/extensions/whisper_stt/script.py b/extensions/whisper_stt/script.py
new file mode 100644
index 00000000..287a1fdd
--- /dev/null
+++ b/extensions/whisper_stt/script.py
@@ -0,0 +1,40 @@
+import gradio as gr
+import speech_recognition as sr
+import modules.shared as shared
+
+input_hijack = {
+    'state': False,
+    'value': ["", ""]
+}
+
+
+def input_modifier(string):
+    return string
+
+
+def do_stt():
+    transcription = ""
+    r = sr.Recognizer()
+    with sr.Microphone() as source:
+        print("Say something!")
+        r.adjust_for_ambient_noise(source)
+        audio = r.listen(source)
+
+    # recognize speech using whisper
+    try:
+        transcription = r.recognize_whisper(audio, language="english", model="tiny.en")
+        print("Whisper thinks you said " + transcription)
+    except sr.UnknownValueError:
+        print("Whisper could not understand audio")
+    except sr.RequestError as e:
+        print("Could not request results from Whisper")
+
+    # input_modifier(transcription)
+    input_hijack.update({"state": True, "value": [transcription, transcription]})
+    return transcription
+
+
+def ui():
+    speech_button = gr.Button(value="STT")
+    output_transcription = gr.Textbox(label="Speech Preview")
+    speech_button.click(do_stt, outputs=[output_transcription])

From 7a03d0bda357cb781f8675f42caa35a69b79f613 Mon Sep 17 00:00:00 2001
From: EliasVincent <riesyeti@outlook.de>
Date: Thu, 9 Mar 2023 20:33:00 +0100
Subject: [PATCH 02/89] cleanup

---
 extensions/whisper_stt/script.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/extensions/whisper_stt/script.py b/extensions/whisper_stt/script.py
index 287a1fdd..dcb4dfc9 100644
--- a/extensions/whisper_stt/script.py
+++ b/extensions/whisper_stt/script.py
@@ -1,6 +1,6 @@
 import gradio as gr
 import speech_recognition as sr
-import modules.shared as shared
+
 
 input_hijack = {
     'state': False,
@@ -16,25 +16,21 @@ def do_stt():
     transcription = ""
     r = sr.Recognizer()
     with sr.Microphone() as source:
-        print("Say something!")
         r.adjust_for_ambient_noise(source)
         audio = r.listen(source)
 
-    # recognize speech using whisper
     try:
         transcription = r.recognize_whisper(audio, language="english", model="tiny.en")
-        print("Whisper thinks you said " + transcription)
     except sr.UnknownValueError:
         print("Whisper could not understand audio")
     except sr.RequestError as e:
-        print("Could not request results from Whisper")
+        print("Could not request results from Whisper", e)
 
-    # input_modifier(transcription)
     input_hijack.update({"state": True, "value": [transcription, transcription]})
     return transcription
 
 
 def ui():
-    speech_button = gr.Button(value="STT")
-    output_transcription = gr.Textbox(label="Speech Preview")
+    speech_button = gr.Button(value="🎙️")
+    output_transcription = gr.Textbox(label="STT-Preview", placeholder="Speech Preview. Click \"Generate\" to send")
     speech_button.click(do_stt, outputs=[output_transcription])

From 00359ba054797d5115c30a292f0f919c514f0046 Mon Sep 17 00:00:00 2001
From: EliasVincent <riesyeti@outlook.de>
Date: Thu, 9 Mar 2023 21:03:49 +0100
Subject: [PATCH 03/89] interactive preview window

---
 extensions/whisper_stt/script.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/extensions/whisper_stt/script.py b/extensions/whisper_stt/script.py
index dcb4dfc9..9f07e5c9 100644
--- a/extensions/whisper_stt/script.py
+++ b/extensions/whisper_stt/script.py
@@ -1,17 +1,12 @@
 import gradio as gr
 import speech_recognition as sr
 
-
 input_hijack = {
     'state': False,
     'value': ["", ""]
 }
 
 
-def input_modifier(string):
-    return string
-
-
 def do_stt():
     transcription = ""
     r = sr.Recognizer()
@@ -30,7 +25,13 @@ def do_stt():
     return transcription
 
 
+def update_hijack(val):
+    input_hijack.update({"state": True, "value": [val, val]})
+    return val
+
+
 def ui():
     speech_button = gr.Button(value="🎙️")
-    output_transcription = gr.Textbox(label="STT-Preview", placeholder="Speech Preview. Click \"Generate\" to send")
+    output_transcription = gr.Textbox(label="STT-Input", placeholder="Speech Preview. Click \"Generate\" to send", interactive=True)
+    output_transcription.change(fn=update_hijack, inputs=[output_transcription])
     speech_button.click(do_stt, outputs=[output_transcription])

From a24fa781f1627ffc0c15cf56f1eb1b1f8ee26876 Mon Sep 17 00:00:00 2001
From: EliasVincent <riesyeti@outlook.de>
Date: Thu, 9 Mar 2023 21:18:46 +0100
Subject: [PATCH 04/89] tweaked Whisper parameters

---
 extensions/whisper_stt/script.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/extensions/whisper_stt/script.py b/extensions/whisper_stt/script.py
index 9f07e5c9..dec1efb0 100644
--- a/extensions/whisper_stt/script.py
+++ b/extensions/whisper_stt/script.py
@@ -11,11 +11,11 @@ def do_stt():
     transcription = ""
     r = sr.Recognizer()
     with sr.Microphone() as source:
-        r.adjust_for_ambient_noise(source)
+        r.adjust_for_ambient_noise(source, 0.2)
         audio = r.listen(source)
 
     try:
-        transcription = r.recognize_whisper(audio, language="english", model="tiny.en")
+        transcription = r.recognize_whisper(audio, language="english", model="base.en")
     except sr.UnknownValueError:
         print("Whisper could not understand audio")
     except sr.RequestError as e:

From 1c0bda33fb713ad7d3811300babf606e57253e8d Mon Sep 17 00:00:00 2001
From: EliasVincent <riesyeti@outlook.de>
Date: Fri, 10 Mar 2023 11:47:16 +0100
Subject: [PATCH 05/89] added installation instructions

---
 extensions/whisper_stt/README.md | 39 ++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)
 create mode 100644 extensions/whisper_stt/README.md

diff --git a/extensions/whisper_stt/README.md b/extensions/whisper_stt/README.md
new file mode 100644
index 00000000..068f5dda
--- /dev/null
+++ b/extensions/whisper_stt/README.md
@@ -0,0 +1,39 @@
+# Installation instructions
+
+- On all platforms, run `pip install -r requirements.txt` in this folder
+- You need **PortAudio** to run the speech recognition. Below are guides for all platforms
+
+
+## Windows
+
+- You don't need to do anything, `pyaudio` already comes with PortAudio included on Windows.
+
+## Mac
+
+```commandline
+brew install portaudio
+brew link --overwrite portaudio
+pip install pyaudio
+```
+
+## Linux
+
+- You have to use your distro's package manager to install PortAudio.
+
+### Ubuntu / Debian / Mint
+
+```commandline
+sudo apt install portaudio19-dev python3-pyaudio
+```
+
+### Arch / Manjaro
+
+```commandline
+sudo pacman -S portaudio
+```
+
+### Fedora
+
+```commandline
+sudo dnf -y install portaudio
+```
\ No newline at end of file

From 683556f4118f2b63ee8049f6fb8a3ecf02121785 Mon Sep 17 00:00:00 2001
From: HideLord <polimonom@gmail.com>
Date: Sun, 12 Mar 2023 21:34:09 +0200
Subject: [PATCH 06/89] Adding markdown support and slight refactoring.

---
 css/html_4chan_style.css  | 103 +++++++++++++++++
 css/html_chat_style.css   |  73 ++++++++++++
 modules/html_generator.py | 238 ++++----------------------------------
 requirements.txt          |   1 +
 4 files changed, 198 insertions(+), 217 deletions(-)
 create mode 100644 css/html_4chan_style.css
 create mode 100644 css/html_chat_style.css

diff --git a/css/html_4chan_style.css b/css/html_4chan_style.css
new file mode 100644
index 00000000..843e8a97
--- /dev/null
+++ b/css/html_4chan_style.css
@@ -0,0 +1,103 @@
+#parent #container {
+    background-color: #eef2ff;
+    padding: 17px;
+}
+#parent #container .reply {
+    background-color: rgb(214, 218, 240);
+    border-bottom-color: rgb(183, 197, 217);
+    border-bottom-style: solid;
+    border-bottom-width: 1px;
+    border-image-outset: 0;
+    border-image-repeat: stretch;
+    border-image-slice: 100%;
+    border-image-source: none;
+    border-image-width: 1;
+    border-left-color: rgb(0, 0, 0);
+    border-left-style: none;
+    border-left-width: 0px;
+    border-right-color: rgb(183, 197, 217);
+    border-right-style: solid;
+    border-right-width: 1px;
+    border-top-color: rgb(0, 0, 0);
+    border-top-style: none;
+    border-top-width: 0px;
+    color: rgb(0, 0, 0);
+    display: table;
+    font-family: arial, helvetica, sans-serif;
+    font-size: 13.3333px;
+    margin-bottom: 4px;
+    margin-left: 0px;
+    margin-right: 0px;
+    margin-top: 4px;
+    overflow-x: hidden;
+    overflow-y: hidden;
+    padding-bottom: 4px;
+    padding-left: 2px;
+    padding-right: 2px;
+    padding-top: 4px;
+}
+
+#parent #container .number {
+    color: rgb(0, 0, 0);
+    font-family: arial, helvetica, sans-serif;
+    font-size: 13.3333px;
+    width: 342.65px;
+    margin-right: 7px;
+}
+
+#parent #container .op {
+    color: rgb(0, 0, 0);
+    font-family: arial, helvetica, sans-serif;
+    font-size: 13.3333px;
+    margin-bottom: 8px;
+    margin-left: 0px;
+    margin-right: 0px;
+    margin-top: 4px;
+    overflow-x: hidden;
+    overflow-y: hidden;
+}
+
+#parent #container .op blockquote {
+    margin-left: 0px !important;
+}
+
+#parent #container .name {
+    color: rgb(17, 119, 67);
+    font-family: arial, helvetica, sans-serif;
+    font-size: 13.3333px;
+    font-weight: 700;
+    margin-left: 7px;
+}
+
+#parent #container .quote {
+    color: rgb(221, 0, 0);
+    font-family: arial, helvetica, sans-serif;
+    font-size: 13.3333px;
+    text-decoration-color: rgb(221, 0, 0);
+    text-decoration-line: underline;
+    text-decoration-style: solid;
+    text-decoration-thickness: auto;
+}
+
+#parent #container .greentext {
+    color: rgb(120, 153, 34);
+    font-family: arial, helvetica, sans-serif;
+    font-size: 13.3333px;
+}
+
+#parent #container blockquote {
+    margin: 0px !important;
+    margin-block-start: 1em;
+    margin-block-end: 1em;
+    margin-inline-start: 40px;
+    margin-inline-end: 40px;
+    margin-top: 13.33px !important;
+    margin-bottom: 13.33px !important;
+    margin-left: 40px !important;
+    margin-right: 40px !important;
+}
+
+#parent #container .message {
+    color: black;
+    border: none;
+}
\ No newline at end of file
diff --git a/css/html_chat_style.css b/css/html_chat_style.css
new file mode 100644
index 00000000..3190b3d1
--- /dev/null
+++ b/css/html_chat_style.css
@@ -0,0 +1,73 @@
+.chat {
+    margin-left: auto;
+    margin-right: auto;
+    max-width: 800px;
+    height: 66.67vh;
+    overflow-y: auto;
+    padding-right: 20px;
+    display: flex;
+    flex-direction: column-reverse;
+}
+
+.message {
+    display: grid;
+    grid-template-columns: 60px 1fr;
+    padding-bottom: 25px;
+    font-size: 15px;
+    font-family: Helvetica, Arial, sans-serif;
+    line-height: 1.428571429;
+}
+
+.circle-you {
+    width: 50px;
+    height: 50px;
+    background-color: rgb(238, 78, 59);
+    border-radius: 50%;
+}
+
+.circle-bot {
+    width: 50px;
+    height: 50px;
+    background-color: rgb(59, 78, 244);
+    border-radius: 50%;
+}
+
+.circle-bot img,
+.circle-you img {
+    border-radius: 50%;
+    width: 100%;
+    height: 100%;
+    object-fit: cover;
+}
+
+.text {}
+
+.text p {
+    margin-top: 5px;
+}
+
+.username {
+    font-weight: bold;
+}
+
+.message-body {}
+
+.message-body img {
+    max-width: 300px;
+    max-height: 300px;
+    border-radius: 20px;
+}
+
+.message-body p {
+    margin-bottom: 0 !important;
+    font-size: 15px !important;
+    line-height: 1.428571429 !important;
+}
+
+.dark .message-body p em {
+    color: rgb(138, 138, 138) !important;
+}
+
+.message-body p em {
+    color: rgb(110, 110, 110) !important;
+}
\ No newline at end of file
diff --git a/modules/html_generator.py b/modules/html_generator.py
index 162040ba..d8aadf43 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -6,6 +6,7 @@ This is a library for formatting GPT-4chan and chat outputs as nice HTML.
 
 import os
 import re
+import markdown
 from pathlib import Path
 
 from PIL import Image
@@ -48,112 +49,8 @@ def process_post(post, c):
     return src
 
 def generate_4chan_html(f):
-    css = """
-
-    #parent #container {
-        background-color: #eef2ff;
-        padding: 17px;
-    }
-    #parent #container .reply {
-        background-color: rgb(214, 218, 240);
-        border-bottom-color: rgb(183, 197, 217);
-        border-bottom-style: solid;
-        border-bottom-width: 1px;
-        border-image-outset: 0;
-        border-image-repeat: stretch;
-        border-image-slice: 100%;
-        border-image-source: none;
-        border-image-width: 1;
-        border-left-color: rgb(0, 0, 0);
-        border-left-style: none;
-        border-left-width: 0px;
-        border-right-color: rgb(183, 197, 217);
-        border-right-style: solid;
-        border-right-width: 1px;
-        border-top-color: rgb(0, 0, 0);
-        border-top-style: none;
-        border-top-width: 0px;
-        color: rgb(0, 0, 0);
-        display: table;
-        font-family: arial, helvetica, sans-serif;
-        font-size: 13.3333px;
-        margin-bottom: 4px;
-        margin-left: 0px;
-        margin-right: 0px;
-        margin-top: 4px;
-        overflow-x: hidden;
-        overflow-y: hidden;
-        padding-bottom: 4px;
-        padding-left: 2px;
-        padding-right: 2px;
-        padding-top: 4px;
-    }
-
-    #parent #container .number {
-        color: rgb(0, 0, 0);
-        font-family: arial, helvetica, sans-serif;
-        font-size: 13.3333px;
-        width: 342.65px;
-        margin-right: 7px;
-    }
-
-    #parent #container .op {
-        color: rgb(0, 0, 0);
-        font-family: arial, helvetica, sans-serif;
-        font-size: 13.3333px;
-        margin-bottom: 8px;
-        margin-left: 0px;
-        margin-right: 0px;
-        margin-top: 4px;
-        overflow-x: hidden;
-        overflow-y: hidden;
-    }
-
-    #parent #container .op blockquote {
-        margin-left: 0px !important;
-    }
-
-    #parent #container .name {
-        color: rgb(17, 119, 67);
-        font-family: arial, helvetica, sans-serif;
-        font-size: 13.3333px;
-        font-weight: 700;
-        margin-left: 7px;
-    }
-
-    #parent #container .quote {
-        color: rgb(221, 0, 0);
-        font-family: arial, helvetica, sans-serif;
-        font-size: 13.3333px;
-        text-decoration-color: rgb(221, 0, 0);
-        text-decoration-line: underline;
-        text-decoration-style: solid;
-        text-decoration-thickness: auto;
-    }
-
-    #parent #container .greentext {
-        color: rgb(120, 153, 34);
-        font-family: arial, helvetica, sans-serif;
-        font-size: 13.3333px;
-    }
-
-    #parent #container blockquote {
-        margin: 0px !important;
-        margin-block-start: 1em;
-        margin-block-end: 1em;
-        margin-inline-start: 40px;
-        margin-inline-end: 40px;
-        margin-top: 13.33px !important;
-        margin-bottom: 13.33px !important;
-        margin-left: 40px !important;
-        margin-right: 40px !important;
-    }
-
-    #parent #container .message {
-        color: black;
-        border: none;
-    }
-    """
+    with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), '../css/html_4chan_style.css'), 'r') as f:
+        css = f.read()
 
     posts = []
     post = ''
@@ -208,135 +105,42 @@ def get_image_cache(path):
 
     return image_cache[path][1]
 
+def load_html_image(paths):
+    for str_path in paths:
+          path = Path(str_path)
+          if path.exists():
+              return f'<img src="file/{get_image_cache(path)}">'
+    return ''
+
 def generate_chat_html(history, name1, name2, character):
-    css = """
-    .chat {
-      margin-left: auto;
-      margin-right: auto;
-      max-width: 800px;
-      height: 66.67vh;
-      overflow-y: auto;
-      padding-right: 20px;
-      display: flex;
-      flex-direction: column-reverse;
-    }       
-
-    .message {
-      display: grid;
-      grid-template-columns: 60px 1fr;
-      padding-bottom: 25px;
-      font-size: 15px;
-      font-family: Helvetica, Arial, sans-serif;
-      line-height: 1.428571429;
-    }   
+    with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), '../css/html_chat_style.css'), 'r') as f:
+        css = f.read()
         
-    .circle-you {
-      width: 50px;
-      height: 50px;
-      background-color: rgb(238, 78, 59);
-      border-radius: 50%;
-    }
-          
-    .circle-bot {
-      width: 50px;
-      height: 50px;
-      background-color: rgb(59, 78, 244);
-      border-radius: 50%;
-    }
-
-    .circle-bot img, .circle-you img {
-      border-radius: 50%;
-      width: 100%;
-      height: 100%;
-      object-fit: cover;
-    }
-
-    .text {
-    }
-
-    .text p {
-      margin-top: 5px;
-    }
-
-    .username {
-      font-weight: bold;
-    }
-
-    .message-body {
-    }
-
-    .message-body img {
-      max-width: 300px;
-      max-height: 300px;
-      border-radius: 20px;
-    }
-
-    .message-body p {
-      margin-bottom: 0 !important;
-      font-size: 15px !important;
-      line-height: 1.428571429 !important;
-    }
-
-    .dark .message-body p em {
-      color: rgb(138, 138, 138) !important;
-    }
-
-    .message-body p em {
-      color: rgb(110, 110, 110) !important;
-    }
-
-    """
-
-    output = ''
-    output += f'<style>{css}</style><div class="chat" id="chat">'
-    img = ''
-
-    for i in [
-            f"characters/{character}.png",
-            f"characters/{character}.jpg",
-            f"characters/{character}.jpeg",
-            "img_bot.png",
-            "img_bot.jpg",
-            "img_bot.jpeg"
-            ]:
-
-        path = Path(i)
-        if path.exists():
-            img = f'<img src="file/{get_image_cache(path)}">'
-            break
-
-    img_me = ''
-    for i in ["img_me.png", "img_me.jpg", "img_me.jpeg"]:
-        path = Path(i)
-        if path.exists():
-            img_me = f'<img src="file/{get_image_cache(path)}">'
-            break
+    output = f'<style>{css}</style><div class="chat" id="chat">'
+    
+    img_bot = load_html_image([f"characters/{character}.{ext}" for ext in ['png', 'jpg', 'jpeg']] + ["img_bot.png","img_bot.jpg","img_bot.jpeg"])
+    img_me = load_html_image(["img_me.png", "img_me.jpg", "img_me.jpeg"])
 
     for i,_row in enumerate(history[::-1]):
-        row = _row.copy()
-        row[0] = re.sub(r"(\*\*)([^\*\n]*)(\*\*)", r"<b>\2</b>", row[0])
-        row[1] = re.sub(r"(\*\*)([^\*\n]*)(\*\*)", r"<b>\2</b>", row[1])
-        row[0] = re.sub(r"(\*)([^\*\n]*)(\*)", r"<em>\2</em>", row[0])
-        row[1] = re.sub(r"(\*)([^\*\n]*)(\*)", r"<em>\2</em>", row[1])
-        p = '\n'.join([f"<p>{x}</p>" for x in row[1].split('\n')])
+        row = [markdown.markdown(re.sub(r"(.)```", r"\1\n```", entry), extensions=['fenced_code']) for entry in _row]
+        
         output += f"""
               <div class="message">
                 <div class="circle-bot">
-                  {img}
+                  {img_bot}
                 </div>
                 <div class="text">
                   <div class="username">
                     {name2}
                   </div>
                   <div class="message-body">
-                    {p}
+                    {row[1]}
                   </div>
                 </div>
               </div>
             """
 
         if not (i == len(history)-1 and len(row[0]) == 0):
-            p = '\n'.join([f"<p>{x}</p>" for x in row[0].split('\n')])
             output += f"""
                   <div class="message">
                     <div class="circle-you">
@@ -347,7 +151,7 @@ def generate_chat_html(history, name1, name2, character):
                         {name1}
                       </div>
                       <div class="message-body">
-                        {p}
+                        {row[0]}
                       </div>
                     </div>
                   </div>
diff --git a/requirements.txt b/requirements.txt
index ceaa0b70..9debe930 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,4 +8,5 @@ rwkv==0.1.0
 safetensors==0.2.8
 sentencepiece
 tqdm
+markdown
 git+https://github.com/zphang/transformers@llama_push

From 48aa52849bb938e62fdada0f4cebcd049e8536ec Mon Sep 17 00:00:00 2001
From: EliasVincent <riesyeti@outlook.de>
Date: Sun, 12 Mar 2023 21:03:07 +0100
Subject: [PATCH 07/89] use Gradio microphone input instead

---
 extensions/whisper_stt/README.md        | 39 -------------------------
 extensions/whisper_stt/requirements.txt |  1 -
 extensions/whisper_stt/script.py        | 27 ++++++++++-------
 3 files changed, 17 insertions(+), 50 deletions(-)
 delete mode 100644 extensions/whisper_stt/README.md

diff --git a/extensions/whisper_stt/README.md b/extensions/whisper_stt/README.md
deleted file mode 100644
index 068f5dda..00000000
--- a/extensions/whisper_stt/README.md
+++ /dev/null
@@ -1,39 +0,0 @@
-# Installation instructions
-
-- On all platforms, run `pip install -r requirements.txt` in this folder
-- You need **PortAudio** to run the speech recognition. Below are guides for all platforms
-
-
-## Windows
-
-- You don't need to do anything, `pyaudio` already comes with PortAudio included on Windows.
-
-## Mac
-
-```commandline
-brew install portaudio
-brew link --overwrite portaudio
-pip install pyaudio
-```
-
-## Linux
-
-- You have to use your distro's package manager to install PortAudio.
-
-### Ubuntu / Debian / Mint
-
-```commandline
-sudo apt install portaudio19-dev python3-pyaudio
-```
-
-### Arch / Manjaro
-
-```commandline
-sudo pacman -S portaudio
-```
-
-### Fedora
-
-```commandline
-sudo dnf -y install portaudio
-```
\ No newline at end of file
diff --git a/extensions/whisper_stt/requirements.txt b/extensions/whisper_stt/requirements.txt
index e6e3255f..770c38bb 100644
--- a/extensions/whisper_stt/requirements.txt
+++ b/extensions/whisper_stt/requirements.txt
@@ -1,5 +1,4 @@
 git+https://github.com/Uberi/speech_recognition.git@010382b
-PyAudio
 openai-whisper
 soundfile
 ffmpeg
diff --git a/extensions/whisper_stt/script.py b/extensions/whisper_stt/script.py
index dec1efb0..b2e840a8 100644
--- a/extensions/whisper_stt/script.py
+++ b/extensions/whisper_stt/script.py
@@ -7,22 +7,24 @@ input_hijack = {
 }
 
 
-def do_stt():
+def do_stt(audio, text_state=""):
     transcription = ""
     r = sr.Recognizer()
-    with sr.Microphone() as source:
-        r.adjust_for_ambient_noise(source, 0.2)
-        audio = r.listen(source)
+
+    # Convert to AudioData
+    audio_data = sr.AudioData(sample_rate=audio[0], frame_data=audio[1], sample_width=4)
 
     try:
-        transcription = r.recognize_whisper(audio, language="english", model="base.en")
+        transcription = r.recognize_whisper(audio_data, language="english", model="base.en")
     except sr.UnknownValueError:
         print("Whisper could not understand audio")
     except sr.RequestError as e:
         print("Could not request results from Whisper", e)
 
     input_hijack.update({"state": True, "value": [transcription, transcription]})
-    return transcription
+
+    text_state += transcription + " "
+    return text_state, text_state
 
 
 def update_hijack(val):
@@ -31,7 +33,12 @@ def update_hijack(val):
 
 
 def ui():
-    speech_button = gr.Button(value="🎙️")
-    output_transcription = gr.Textbox(label="STT-Input", placeholder="Speech Preview. Click \"Generate\" to send", interactive=True)
-    output_transcription.change(fn=update_hijack, inputs=[output_transcription])
-    speech_button.click(do_stt, outputs=[output_transcription])
+    tr_state = gr.State(value="")
+    output_transcription = gr.Textbox(label="STT-Input",
+                                      placeholder="Speech Preview. Click \"Generate\" to send",
+                                      interactive=True)
+    output_transcription.change(fn=update_hijack, inputs=[output_transcription], outputs=[tr_state])
+    with gr.Row():
+        audio = gr.Audio(source="microphone")
+        transcribe_button = gr.Button(value="Transcribe")
+        transcribe_button.click(do_stt, inputs=[audio, tr_state], outputs=[output_transcription, tr_state])

From 02e1113d955832990cc97a0c00315753e7100837 Mon Sep 17 00:00:00 2001
From: EliasVincent <riesyeti@outlook.de>
Date: Mon, 13 Mar 2023 21:41:19 +0100
Subject: [PATCH 08/89] add auto-transcribe option

---
 extensions/whisper_stt/script.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/extensions/whisper_stt/script.py b/extensions/whisper_stt/script.py
index b2e840a8..6ef60c57 100644
--- a/extensions/whisper_stt/script.py
+++ b/extensions/whisper_stt/script.py
@@ -32,13 +32,23 @@ def update_hijack(val):
     return val
 
 
+def auto_transcribe(audio, audio_auto, text_state=""):
+    if audio is None:
+        return "", ""
+    if audio_auto:
+        return do_stt(audio, text_state)
+    return "", ""
+
+
 def ui():
     tr_state = gr.State(value="")
     output_transcription = gr.Textbox(label="STT-Input",
                                       placeholder="Speech Preview. Click \"Generate\" to send",
                                       interactive=True)
     output_transcription.change(fn=update_hijack, inputs=[output_transcription], outputs=[tr_state])
+    audio_auto = gr.Checkbox(label="Auto-Transcribe", value=True)
     with gr.Row():
         audio = gr.Audio(source="microphone")
+        audio.change(fn=auto_transcribe, inputs=[audio, audio_auto, tr_state], outputs=[output_transcription, tr_state])
         transcribe_button = gr.Button(value="Transcribe")
         transcribe_button.click(do_stt, inputs=[audio, tr_state], outputs=[output_transcription, tr_state])

From f0f325eac120b88665eccd7b5e62efd097b278a0 Mon Sep 17 00:00:00 2001
From: Forkoz <59298527+Ph0rk0z@users.noreply.github.com>
Date: Tue, 14 Mar 2023 21:21:47 +0000
Subject: [PATCH 09/89] Remove Json from loading

no more 20b tokenizer
---
 server.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/server.py b/server.py
index a54e3b62..8a6a97b8 100644
--- a/server.py
+++ b/server.py
@@ -34,7 +34,7 @@ def get_available_models():
     if shared.args.flexgen:
         return sorted([re.sub('-np$', '', item.name) for item in list(Path('models/').glob('*')) if item.name.endswith('-np')], key=str.lower)
     else:
-        return sorted([item.name for item in list(Path('models/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt'))], key=str.lower)
+        return sorted([item.name for item in list(Path('models/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower)
 
 def get_available_presets():
     return sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('presets').glob('*.txt'))), key=str.lower)

From 3b62bd180d4d1e3dabecd7724ff3adbc9195dc57 Mon Sep 17 00:00:00 2001
From: Forkoz <59298527+Ph0rk0z@users.noreply.github.com>
Date: Tue, 14 Mar 2023 21:23:39 +0000
Subject: [PATCH 10/89] Remove PTH extension from RWKV

When loading the current model was blank unless you typed it out.
---
 server.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/server.py b/server.py
index 8a6a97b8..e4c7b8fa 100644
--- a/server.py
+++ b/server.py
@@ -34,7 +34,7 @@ def get_available_models():
     if shared.args.flexgen:
         return sorted([re.sub('-np$', '', item.name) for item in list(Path('models/').glob('*')) if item.name.endswith('-np')], key=str.lower)
     else:
-        return sorted([item.name for item in list(Path('models/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower)
+        return sorted([re.sub('.pth$', '', item.name) for item in list(Path('models/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower)
 
 def get_available_presets():
     return sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('presets').glob('*.txt'))), key=str.lower)

From 9d6a625bd6f2e7939ef2fa6dc4576c5744a84d83 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 15 Mar 2023 11:04:30 -0300
Subject: [PATCH 11/89] Add 'hallucinations' filter #326

This breaks the API since a new parameter has been added.
It should be a one-line fix. See api-example.py.
---
 api-example-stream.py      |  2 ++
 api-example.py             |  2 ++
 modules/chat.py            | 16 ++++++++--------
 modules/text_generation.py |  3 ++-
 server.py                  | 20 +++++++++++---------
 5 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/api-example-stream.py b/api-example-stream.py
index a5ed4202..add1df41 100644
--- a/api-example-stream.py
+++ b/api-example-stream.py
@@ -26,6 +26,7 @@ async def run(context):
         'top_p': 0.9,
         'typical_p': 1,
         'repetition_penalty': 1.05,
+        'encoder_repetition_penalty': 1.0,
         'top_k': 0,
         'min_length': 0,
         'no_repeat_ngram_size': 0,
@@ -59,6 +60,7 @@ async def run(context):
                             params['top_p'],
                             params['typical_p'],
                             params['repetition_penalty'],
+                            params['encoder_repetition_penalty'],
                             params['top_k'],
                             params['min_length'],
                             params['no_repeat_ngram_size'],
diff --git a/api-example.py b/api-example.py
index 0306b7ab..a6f0c10e 100644
--- a/api-example.py
+++ b/api-example.py
@@ -24,6 +24,7 @@ params = {
     'top_p': 0.9,
     'typical_p': 1,
     'repetition_penalty': 1.05,
+    'encoder_repetition_penalty': 1.0,
     'top_k': 0,
     'min_length': 0,
     'no_repeat_ngram_size': 0,
@@ -45,6 +46,7 @@ response = requests.post(f"http://{server}:7860/run/textgen", json={
         params['top_p'],
         params['typical_p'],
         params['repetition_penalty'],
+        params['encoder_repetition_penalty'],
         params['top_k'],
         params['min_length'],
         params['no_repeat_ngram_size'],
diff --git a/modules/chat.py b/modules/chat.py
index bd45b879..d7202bee 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -97,7 +97,7 @@ def extract_message_from_reply(question, reply, name1, name2, check, impersonate
 def stop_everything_event():
     shared.stop_everything = True
 
-def chatbot_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, name1, name2, context, check, chat_prompt_size, chat_generation_attempts=1, regenerate=False):
+def chatbot_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, encoder_repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, name1, name2, context, check, chat_prompt_size, chat_generation_attempts=1, regenerate=False):
     shared.stop_everything = False
     just_started = True
     eos_token = '\n' if check else None
@@ -133,7 +133,7 @@ def chatbot_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typical
     # Generate
     reply = ''
     for i in range(chat_generation_attempts):
-        for reply in generate_reply(f"{prompt}{' ' if len(reply) > 0 else ''}{reply}", max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, eos_token=eos_token, stopping_string=f"\n{name1}:"):
+        for reply in generate_reply(f"{prompt}{' ' if len(reply) > 0 else ''}{reply}", max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, encoder_repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, eos_token=eos_token, stopping_string=f"\n{name1}:"):
 
             # Extracting the reply
             reply, next_character_found = extract_message_from_reply(prompt, reply, name1, name2, check)
@@ -160,7 +160,7 @@ def chatbot_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typical
 
     yield shared.history['visible']
 
-def impersonate_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, name1, name2, context, check, chat_prompt_size, chat_generation_attempts=1):
+def impersonate_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, encoder_repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, name1, name2, context, check, chat_prompt_size, chat_generation_attempts=1):
     eos_token = '\n' if check else None
 
     if 'pygmalion' in shared.model_name.lower():
@@ -172,18 +172,18 @@ def impersonate_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typ
     # Yield *Is typing...*
     yield shared.processing_message
     for i in range(chat_generation_attempts):
-        for reply in generate_reply(prompt+reply, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, eos_token=eos_token, stopping_string=f"\n{name2}:"):
+        for reply in generate_reply(prompt+reply, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, encoder_repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, eos_token=eos_token, stopping_string=f"\n{name2}:"):
             reply, next_character_found = extract_message_from_reply(prompt, reply, name1, name2, check, impersonate=True)
             yield reply
             if next_character_found:
                 break
         yield reply
 
-def cai_chatbot_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, name1, name2, context, check, chat_prompt_size, chat_generation_attempts=1):
-    for _history in chatbot_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, name1, name2, context, check, chat_prompt_size, chat_generation_attempts):
+def cai_chatbot_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, encoder_repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, name1, name2, context, check, chat_prompt_size, chat_generation_attempts=1):
+    for _history in chatbot_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, encoder_repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, name1, name2, context, check, chat_prompt_size, chat_generation_attempts):
         yield generate_chat_html(_history, name1, name2, shared.character)
 
-def regenerate_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, name1, name2, context, check, chat_prompt_size, chat_generation_attempts=1):
+def regenerate_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, encoder_repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, name1, name2, context, check, chat_prompt_size, chat_generation_attempts=1):
     if (shared.character != 'None' and len(shared.history['visible']) == 1) or len(shared.history['internal']) == 0:
         yield generate_chat_output(shared.history['visible'], name1, name2, shared.character)
     else:
@@ -191,7 +191,7 @@ def regenerate_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typi
         last_internal = shared.history['internal'].pop()
         # Yield '*Is typing...*'
         yield generate_chat_output(shared.history['visible']+[[last_visible[0], shared.processing_message]], name1, name2, shared.character)
-        for _history in chatbot_wrapper(last_internal[0], max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, name1, name2, context, check, chat_prompt_size, chat_generation_attempts, regenerate=True):
+        for _history in chatbot_wrapper(last_internal[0], max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, encoder_repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, name1, name2, context, check, chat_prompt_size, chat_generation_attempts, regenerate=True):
             if shared.args.cai_chat:
                 shared.history['visible'][-1] = [last_visible[0], _history[-1][1]]
             else:
diff --git a/modules/text_generation.py b/modules/text_generation.py
index 70a51d91..f302a918 100644
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@@ -89,7 +89,7 @@ def clear_torch_cache():
     if not shared.args.cpu:
         torch.cuda.empty_cache()
 
-def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, eos_token=None, stopping_string=None):
+def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, encoder_repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, eos_token=None, stopping_string=None):
     clear_torch_cache()
     t0 = time.time()
 
@@ -143,6 +143,7 @@ def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typi
             "top_p": top_p,
             "typical_p": typical_p,
             "repetition_penalty": repetition_penalty,
+            "encoder_repetition_penalty": encoder_repetition_penalty,
             "top_k": top_k,
             "min_length": min_length if shared.args.no_stream else 0,
             "no_repeat_ngram_size": no_repeat_ngram_size,
diff --git a/server.py b/server.py
index a54e3b62..4ac81f01 100644
--- a/server.py
+++ b/server.py
@@ -66,6 +66,7 @@ def load_preset_values(preset_menu, return_dict=False):
         'top_p': 1,
         'typical_p': 1,
         'repetition_penalty': 1,
+        'encoder_repetition_penalty': 1,
         'top_k': 50,
         'num_beams': 1,
         'penalty_alpha': 0,
@@ -86,7 +87,7 @@ def load_preset_values(preset_menu, return_dict=False):
     if return_dict:
         return generate_params
     else:
-        return generate_params['do_sample'], generate_params['temperature'], generate_params['top_p'], generate_params['typical_p'], generate_params['repetition_penalty'], generate_params['top_k'], generate_params['min_length'], generate_params['no_repeat_ngram_size'], generate_params['num_beams'], generate_params['penalty_alpha'], generate_params['length_penalty'], generate_params['early_stopping']
+        return generate_params['do_sample'], generate_params['temperature'], generate_params['top_p'], generate_params['typical_p'], generate_params['repetition_penalty'], generate_params['encoder_repetition_penalty'], generate_params['top_k'], generate_params['min_length'], generate_params['no_repeat_ngram_size'], generate_params['num_beams'], generate_params['penalty_alpha'], generate_params['length_penalty'], generate_params['early_stopping']
 
 def upload_soft_prompt(file):
     with zipfile.ZipFile(io.BytesIO(file)) as zf:
@@ -117,14 +118,15 @@ def create_settings_menus(default_preset):
         with gr.Row():
             with gr.Column():
                 shared.gradio['temperature'] = gr.Slider(0.01, 1.99, value=generate_params['temperature'], step=0.01, label='temperature')
-                shared.gradio['repetition_penalty'] = gr.Slider(1.0, 2.99, value=generate_params['repetition_penalty'],step=0.01,label='repetition_penalty')
-                shared.gradio['top_k'] = gr.Slider(0,200,value=generate_params['top_k'],step=1,label='top_k')
                 shared.gradio['top_p'] = gr.Slider(0.0,1.0,value=generate_params['top_p'],step=0.01,label='top_p')
-            with gr.Column():
-                shared.gradio['do_sample'] = gr.Checkbox(value=generate_params['do_sample'], label='do_sample')
+                shared.gradio['top_k'] = gr.Slider(0,200,value=generate_params['top_k'],step=1,label='top_k')
                 shared.gradio['typical_p'] = gr.Slider(0.0,1.0,value=generate_params['typical_p'],step=0.01,label='typical_p')
+            with gr.Column():
+                shared.gradio['repetition_penalty'] = gr.Slider(1.0, 1.5, value=generate_params['repetition_penalty'],step=0.01,label='repetition_penalty')
+                shared.gradio['encoder_repetition_penalty'] = gr.Slider(0.8, 1.5, value=generate_params['encoder_repetition_penalty'],step=0.01,label='encoder_repetition_penalty')
                 shared.gradio['no_repeat_ngram_size'] = gr.Slider(0, 20, step=1, value=generate_params['no_repeat_ngram_size'], label='no_repeat_ngram_size')
                 shared.gradio['min_length'] = gr.Slider(0, 2000, step=1, value=generate_params['min_length'] if shared.args.no_stream else 0, label='min_length', interactive=shared.args.no_stream)
+        shared.gradio['do_sample'] = gr.Checkbox(value=generate_params['do_sample'], label='do_sample')
 
         gr.Markdown('Contrastive search:')
         shared.gradio['penalty_alpha'] = gr.Slider(0, 5, value=generate_params['penalty_alpha'], label='penalty_alpha')
@@ -147,7 +149,7 @@ def create_settings_menus(default_preset):
             shared.gradio['upload_softprompt'] = gr.File(type='binary', file_types=['.zip'])
 
     shared.gradio['model_menu'].change(load_model_wrapper, [shared.gradio['model_menu']], [shared.gradio['model_menu']], show_progress=True)
-    shared.gradio['preset_menu'].change(load_preset_values, [shared.gradio['preset_menu']], [shared.gradio['do_sample'], shared.gradio['temperature'], shared.gradio['top_p'], shared.gradio['typical_p'], shared.gradio['repetition_penalty'], shared.gradio['top_k'], shared.gradio['min_length'], shared.gradio['no_repeat_ngram_size'], shared.gradio['num_beams'], shared.gradio['penalty_alpha'], shared.gradio['length_penalty'], shared.gradio['early_stopping']])
+    shared.gradio['preset_menu'].change(load_preset_values, [shared.gradio['preset_menu']], [shared.gradio['do_sample'], shared.gradio['temperature'], shared.gradio['top_p'], shared.gradio['typical_p'], shared.gradio['repetition_penalty'], shared.gradio['encoder_repetition_penalty'], shared.gradio['top_k'], shared.gradio['min_length'], shared.gradio['no_repeat_ngram_size'], shared.gradio['num_beams'], shared.gradio['penalty_alpha'], shared.gradio['length_penalty'], shared.gradio['early_stopping']])
     shared.gradio['softprompts_menu'].change(load_soft_prompt, [shared.gradio['softprompts_menu']], [shared.gradio['softprompts_menu']], show_progress=True)
     shared.gradio['upload_softprompt'].upload(upload_soft_prompt, [shared.gradio['upload_softprompt']], [shared.gradio['softprompts_menu']])
 
@@ -262,7 +264,7 @@ if shared.args.chat or shared.args.cai_chat:
                 shared.gradio['chat_generation_attempts'] = gr.Slider(minimum=shared.settings['chat_generation_attempts_min'], maximum=shared.settings['chat_generation_attempts_max'], value=shared.settings['chat_generation_attempts'], step=1, label='Generation attempts (for longer replies)')
             create_settings_menus(default_preset)
 
-        shared.input_params = [shared.gradio[k] for k in ['textbox', 'max_new_tokens', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping', 'name1', 'name2', 'context', 'check', 'chat_prompt_size_slider', 'chat_generation_attempts']]
+        shared.input_params = [shared.gradio[k] for k in ['textbox', 'max_new_tokens', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping', 'name1', 'name2', 'context', 'check', 'chat_prompt_size_slider', 'chat_generation_attempts']]
         if shared.args.extensions is not None:
             with gr.Tab('Extensions'):
                 extensions_module.create_extensions_block()
@@ -329,7 +331,7 @@ elif shared.args.notebook:
         if shared.args.extensions is not None:
             extensions_module.create_extensions_block()
 
-        shared.input_params = [shared.gradio[k] for k in ['textbox', 'max_new_tokens', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']]
+        shared.input_params = [shared.gradio[k] for k in ['textbox', 'max_new_tokens', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']]
         output_params = [shared.gradio[k] for k in ['textbox', 'markdown', 'html']]
         gen_events.append(shared.gradio['Generate'].click(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream, api_name='textgen'))
         gen_events.append(shared.gradio['textbox'].submit(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream))
@@ -361,7 +363,7 @@ else:
                 with gr.Tab('HTML'):
                     shared.gradio['html'] = gr.HTML()
 
-        shared.input_params = [shared.gradio[k] for k in ['textbox', 'max_new_tokens', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']]
+        shared.input_params = [shared.gradio[k] for k in ['textbox', 'max_new_tokens', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']]
         output_params = [shared.gradio[k] for k in ['output_textbox', 'markdown', 'html']]
         gen_events.append(shared.gradio['Generate'].click(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream, api_name='textgen'))
         gen_events.append(shared.gradio['textbox'].submit(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream))

From 14139317055f2dd947e54472cc81d69956f25fe3 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 15 Mar 2023 12:01:32 -0300
Subject: [PATCH 12/89] Add a header bar and redesign the interface (#293)

---
 extensions/gallery/script.py |   2 +-
 modules/ui.py                |   9 ++
 server.py                    | 159 ++++++++++++++++++++---------------
 3 files changed, 99 insertions(+), 71 deletions(-)

diff --git a/extensions/gallery/script.py b/extensions/gallery/script.py
index 8a2d7cf9..fbf23bc9 100644
--- a/extensions/gallery/script.py
+++ b/extensions/gallery/script.py
@@ -76,7 +76,7 @@ def generate_html():
     return container_html
 
 def ui():
-    with gr.Accordion("Character gallery"):
+    with gr.Accordion("Character gallery", open=False):
         update = gr.Button("Refresh")
         gallery = gr.HTML(value=generate_html())
     update.click(generate_html, [], gallery)
diff --git a/modules/ui.py b/modules/ui.py
index bb193e35..27233153 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -38,6 +38,9 @@ svg {
 ol li p, ul li p {
     display: inline-block;
 }
+#main, #settings, #extensions, #chat-settings {
+  border: 0;
+}
 """
 
 chat_css = """
@@ -64,6 +67,12 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
 }
 """
 
+page_js = """
+document.getElementById("main").parentNode.childNodes[0].style = "border: none; background-color: #8080802b; margin-bottom: 40px"
+document.getElementById("main").parentNode.style = "padding: 0; margin: 0"
+document.getElementById("main").parentNode.parentNode.parentNode.style = "padding: 0"
+"""
+
 class ToolButton(gr.Button, gr.components.FormComponent):
     """Small button with single emoji as text, fits inside gradio forms"""
 
diff --git a/server.py b/server.py
index 4ac81f01..a7ec4888 100644
--- a/server.py
+++ b/server.py
@@ -101,9 +101,7 @@ def upload_soft_prompt(file):
 
     return name
 
-def create_settings_menus(default_preset):
-    generate_params = load_preset_values(default_preset if not shared.args.flexgen else 'Naive', return_dict=True)
-
+def create_model_and_preset_menus():
     with gr.Row():
         with gr.Column():
             with gr.Row():
@@ -114,7 +112,11 @@ def create_settings_menus(default_preset):
                 shared.gradio['preset_menu'] = gr.Dropdown(choices=available_presets, value=default_preset if not shared.args.flexgen else 'Naive', label='Generation parameters preset')
                 ui.create_refresh_button(shared.gradio['preset_menu'], lambda : None, lambda : {'choices': get_available_presets()}, 'refresh-button')
 
-    with gr.Accordion('Custom generation parameters', open=False, elem_id='accordion'):
+def create_settings_menus(default_preset):
+    generate_params = load_preset_values(default_preset if not shared.args.flexgen else 'Naive', return_dict=True)
+
+    with gr.Box():
+        gr.Markdown('Custom generation parameters')
         with gr.Row():
             with gr.Column():
                 shared.gradio['temperature'] = gr.Slider(0.01, 1.99, value=generate_params['temperature'], step=0.01, label='temperature')
@@ -128,9 +130,11 @@ def create_settings_menus(default_preset):
                 shared.gradio['min_length'] = gr.Slider(0, 2000, step=1, value=generate_params['min_length'] if shared.args.no_stream else 0, label='min_length', interactive=shared.args.no_stream)
         shared.gradio['do_sample'] = gr.Checkbox(value=generate_params['do_sample'], label='do_sample')
 
+    with gr.Box():
         gr.Markdown('Contrastive search:')
         shared.gradio['penalty_alpha'] = gr.Slider(0, 5, value=generate_params['penalty_alpha'], label='penalty_alpha')
 
+    with gr.Box():
         gr.Markdown('Beam search (uses a lot of VRAM):')
         with gr.Row():
             with gr.Column():
@@ -139,7 +143,8 @@ def create_settings_menus(default_preset):
                 shared.gradio['length_penalty'] = gr.Slider(-5, 5, value=generate_params['length_penalty'], label='length_penalty')
         shared.gradio['early_stopping'] = gr.Checkbox(value=generate_params['early_stopping'], label='early_stopping')
 
-    with gr.Accordion('Soft prompt', open=False, elem_id='accordion'):
+    with gr.Box():
+        gr.Markdown('Soft prompt')
         with gr.Row():
             shared.gradio['softprompts_menu'] = gr.Dropdown(choices=available_softprompts, value='None', label='Soft prompt')
             ui.create_refresh_button(shared.gradio['softprompts_menu'], lambda : None, lambda : {'choices': get_available_softprompts()}, 'refresh-button')
@@ -202,26 +207,41 @@ suffix = '_pygmalion' if 'pygmalion' in shared.model_name.lower() else ''
 
 if shared.args.chat or shared.args.cai_chat:
     with gr.Blocks(css=ui.css+ui.chat_css, analytics_enabled=False, title=title) as shared.gradio['interface']:
-        if shared.args.cai_chat:
-            shared.gradio['display'] = gr.HTML(value=generate_chat_html(shared.history['visible'], shared.settings[f'name1{suffix}'], shared.settings[f'name2{suffix}'], shared.character))
-        else:
-            shared.gradio['display'] = gr.Chatbot(value=shared.history['visible']).style(color_map=("#326efd", "#212528"))
-        shared.gradio['textbox'] = gr.Textbox(label='Input')
-        with gr.Row():
-            shared.gradio['Stop'] = gr.Button('Stop')
-            shared.gradio['Generate'] = gr.Button('Generate')
-        with gr.Row():
-            shared.gradio['Impersonate'] = gr.Button('Impersonate')
-            shared.gradio['Regenerate'] = gr.Button('Regenerate')
-        with gr.Row():
-            shared.gradio['Copy last reply'] = gr.Button('Copy last reply')
-            shared.gradio['Replace last reply'] = gr.Button('Replace last reply')
-            shared.gradio['Remove last'] = gr.Button('Remove last')
+        with gr.Tab("Text generation", elem_id="main"):
+            if shared.args.cai_chat:
+                shared.gradio['display'] = gr.HTML(value=generate_chat_html(shared.history['visible'], shared.settings[f'name1{suffix}'], shared.settings[f'name2{suffix}'], shared.character))
+            else:
+                shared.gradio['display'] = gr.Chatbot(value=shared.history['visible']).style(color_map=("#326efd", "#212528"))
+            shared.gradio['textbox'] = gr.Textbox(label='Input')
+            with gr.Row():
+                shared.gradio['Stop'] = gr.Button('Stop')
+                shared.gradio['Generate'] = gr.Button('Generate')
+            with gr.Row():
+                shared.gradio['Impersonate'] = gr.Button('Impersonate')
+                shared.gradio['Regenerate'] = gr.Button('Regenerate')
+            with gr.Row():
+                shared.gradio['Copy last reply'] = gr.Button('Copy last reply')
+                shared.gradio['Replace last reply'] = gr.Button('Replace last reply')
+                shared.gradio['Remove last'] = gr.Button('Remove last')
 
-            shared.gradio['Clear history'] = gr.Button('Clear history')
-            shared.gradio['Clear history-confirm'] = gr.Button('Confirm', variant="stop", visible=False)
-            shared.gradio['Clear history-cancel'] = gr.Button('Cancel', visible=False)
-        with gr.Tab('Chat settings'):
+                shared.gradio['Clear history'] = gr.Button('Clear history')
+                shared.gradio['Clear history-confirm'] = gr.Button('Confirm', variant="stop", visible=False)
+                shared.gradio['Clear history-cancel'] = gr.Button('Cancel', visible=False)
+
+            create_model_and_preset_menus()
+
+            with gr.Box():
+                with gr.Row():
+                    with gr.Column():
+                        shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens'])
+                        shared.gradio['chat_prompt_size_slider'] = gr.Slider(minimum=shared.settings['chat_prompt_size_min'], maximum=shared.settings['chat_prompt_size_max'], step=1, label='Maximum prompt size in tokens', value=shared.settings['chat_prompt_size'])
+                    with gr.Column():
+                        shared.gradio['chat_generation_attempts'] = gr.Slider(minimum=shared.settings['chat_generation_attempts_min'], maximum=shared.settings['chat_generation_attempts_max'], value=shared.settings['chat_generation_attempts'], step=1, label='Generation attempts (for longer replies)')
+
+            if shared.args.extensions is not None:
+                extensions_module.create_extensions_block()
+
+        with gr.Tab("Chat settings", elem_id="chat-settings"):
             shared.gradio['name1'] = gr.Textbox(value=shared.settings[f'name1{suffix}'], lines=1, label='Your name')
             shared.gradio['name2'] = gr.Textbox(value=shared.settings[f'name2{suffix}'], lines=1, label='Bot\'s name')
             shared.gradio['context'] = gr.Textbox(value=shared.settings[f'context{suffix}'], lines=5, label='Context')
@@ -255,21 +275,11 @@ if shared.args.chat or shared.args.cai_chat:
                 with gr.Tab('Upload TavernAI Character Card'):
                     shared.gradio['upload_img_tavern'] = gr.File(type='binary', file_types=['image'])
 
-        with gr.Tab('Generation settings'):
-            with gr.Row():
-                with gr.Column():
-                    shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens'])
-                with gr.Column():
-                    shared.gradio['chat_prompt_size_slider'] = gr.Slider(minimum=shared.settings['chat_prompt_size_min'], maximum=shared.settings['chat_prompt_size_max'], step=1, label='Maximum prompt size in tokens', value=shared.settings['chat_prompt_size'])
-                shared.gradio['chat_generation_attempts'] = gr.Slider(minimum=shared.settings['chat_generation_attempts_min'], maximum=shared.settings['chat_generation_attempts_max'], value=shared.settings['chat_generation_attempts'], step=1, label='Generation attempts (for longer replies)')
+        with gr.Tab("Settings", elem_id="settings"):
             create_settings_menus(default_preset)
 
-        shared.input_params = [shared.gradio[k] for k in ['textbox', 'max_new_tokens', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping', 'name1', 'name2', 'context', 'check', 'chat_prompt_size_slider', 'chat_generation_attempts']]
-        if shared.args.extensions is not None:
-            with gr.Tab('Extensions'):
-                extensions_module.create_extensions_block()
-
         function_call = 'chat.cai_chatbot_wrapper' if shared.args.cai_chat else 'chat.chatbot_wrapper'
+        shared.input_params = [shared.gradio[k] for k in ['textbox', 'max_new_tokens', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping', 'name1', 'name2', 'context', 'check', 'chat_prompt_size_slider', 'chat_generation_attempts']]
 
         gen_events.append(shared.gradio['Generate'].click(eval(function_call), shared.input_params, shared.gradio['display'], show_progress=shared.args.no_stream))
         gen_events.append(shared.gradio['textbox'].submit(eval(function_call), shared.input_params, shared.gradio['display'], show_progress=shared.args.no_stream))
@@ -310,58 +320,66 @@ if shared.args.chat or shared.args.cai_chat:
         shared.gradio['upload_img_me'].upload(reload_func, reload_inputs, [shared.gradio['display']])
         shared.gradio['Stop'].click(reload_func, reload_inputs, [shared.gradio['display']])
 
+        shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.page_js}}}")
         shared.gradio['interface'].load(lambda : chat.load_default_history(shared.settings[f'name1{suffix}'], shared.settings[f'name2{suffix}']), None, None)
         shared.gradio['interface'].load(reload_func, reload_inputs, [shared.gradio['display']], show_progress=True)
 
 elif shared.args.notebook:
     with gr.Blocks(css=ui.css, analytics_enabled=False, title=title) as shared.gradio['interface']:
-        gr.Markdown(description)
-        with gr.Tab('Raw'):
-            shared.gradio['textbox'] = gr.Textbox(value=default_text, lines=23)
-        with gr.Tab('Markdown'):
-            shared.gradio['markdown'] = gr.Markdown()
-        with gr.Tab('HTML'):
-            shared.gradio['html'] = gr.HTML()
+        with gr.Tab("Text generation", elem_id="main"):
+            with gr.Tab('Raw'):
+                shared.gradio['textbox'] = gr.Textbox(value=default_text, lines=25)
+            with gr.Tab('Markdown'):
+                shared.gradio['markdown'] = gr.Markdown()
+            with gr.Tab('HTML'):
+                shared.gradio['html'] = gr.HTML()
 
-        shared.gradio['Generate'] = gr.Button('Generate')
-        shared.gradio['Stop'] = gr.Button('Stop')
-        shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens'])
+            with gr.Row():
+                shared.gradio['Stop'] = gr.Button('Stop')
+                shared.gradio['Generate'] = gr.Button('Generate')
+            shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens'])
 
-        create_settings_menus(default_preset)
-        if shared.args.extensions is not None:
-            extensions_module.create_extensions_block()
+            create_model_and_preset_menus()
+            if shared.args.extensions is not None:
+                extensions_module.create_extensions_block()
+
+        with gr.Tab("Settings", elem_id="settings"):
+            create_settings_menus(default_preset)
 
         shared.input_params = [shared.gradio[k] for k in ['textbox', 'max_new_tokens', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']]
         output_params = [shared.gradio[k] for k in ['textbox', 'markdown', 'html']]
         gen_events.append(shared.gradio['Generate'].click(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream, api_name='textgen'))
         gen_events.append(shared.gradio['textbox'].submit(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream))
         shared.gradio['Stop'].click(None, None, None, cancels=gen_events)
+        shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.page_js}}}")
 
 else:
     with gr.Blocks(css=ui.css, analytics_enabled=False, title=title) as shared.gradio['interface']:
-        gr.Markdown(description)
-        with gr.Row():
-            with gr.Column():
-                shared.gradio['textbox'] = gr.Textbox(value=default_text, lines=15, label='Input')
-                shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens'])
-                shared.gradio['Generate'] = gr.Button('Generate')
-                with gr.Row():
-                    with gr.Column():
-                        shared.gradio['Continue'] = gr.Button('Continue')
-                    with gr.Column():
-                        shared.gradio['Stop'] = gr.Button('Stop')
+        with gr.Tab("Text generation", elem_id="main"):
+            with gr.Row():
+                with gr.Column():
+                    shared.gradio['textbox'] = gr.Textbox(value=default_text, lines=15, label='Input')
+                    shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens'])
+                    shared.gradio['Generate'] = gr.Button('Generate')
+                    with gr.Row():
+                        with gr.Column():
+                            shared.gradio['Continue'] = gr.Button('Continue')
+                        with gr.Column():
+                            shared.gradio['Stop'] = gr.Button('Stop')
 
-                create_settings_menus(default_preset)
-                if shared.args.extensions is not None:
-                    extensions_module.create_extensions_block()
+                    create_model_and_preset_menus()
+                    if shared.args.extensions is not None:
+                        extensions_module.create_extensions_block()
 
-            with gr.Column():
-                with gr.Tab('Raw'):
-                    shared.gradio['output_textbox'] = gr.Textbox(lines=15, label='Output')
-                with gr.Tab('Markdown'):
-                    shared.gradio['markdown'] = gr.Markdown()
-                with gr.Tab('HTML'):
-                    shared.gradio['html'] = gr.HTML()
+                with gr.Column():
+                    with gr.Tab('Raw'):
+                        shared.gradio['output_textbox'] = gr.Textbox(lines=25, label='Output')
+                    with gr.Tab('Markdown'):
+                        shared.gradio['markdown'] = gr.Markdown()
+                    with gr.Tab('HTML'):
+                        shared.gradio['html'] = gr.HTML()
+        with gr.Tab("Settings", elem_id="settings"):
+            create_settings_menus(default_preset)
 
         shared.input_params = [shared.gradio[k] for k in ['textbox', 'max_new_tokens', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']]
         output_params = [shared.gradio[k] for k in ['output_textbox', 'markdown', 'html']]
@@ -369,6 +387,7 @@ else:
         gen_events.append(shared.gradio['textbox'].submit(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream))
         gen_events.append(shared.gradio['Continue'].click(generate_reply, [shared.gradio['output_textbox']] + shared.input_params[1:], output_params, show_progress=shared.args.no_stream))
         shared.gradio['Stop'].click(None, None, None, cancels=gen_events)
+        shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.page_js}}}")
 
 shared.gradio['interface'].queue()
 if shared.args.listen:

From ec972b85d1be8b4d756e5416bfee90449cf57381 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 15 Mar 2023 12:33:26 -0300
Subject: [PATCH 13/89] Move all css/js into separate files

---
 css/chat.css                | 22 +++++++++++
 css/html_readable_style.css | 14 +++++++
 css/main.css                | 39 +++++++++++++++++++
 css/main.js                 |  3 ++
 modules/html_generator.py   | 24 +++---------
 modules/ui.py               | 78 ++++---------------------------------
 6 files changed, 92 insertions(+), 88 deletions(-)
 create mode 100644 css/chat.css
 create mode 100644 css/html_readable_style.css
 create mode 100644 css/main.css
 create mode 100644 css/main.js

diff --git a/css/chat.css b/css/chat.css
new file mode 100644
index 00000000..b1229830
--- /dev/null
+++ b/css/chat.css
@@ -0,0 +1,22 @@
+.h-\[40vh\], .wrap.svelte-byatnx.svelte-byatnx.svelte-byatnx {
+    height: 66.67vh
+}
+.gradio-container {
+    max-width: 800px !important;
+    margin-left: auto !important;
+    margin-right: auto !important;
+}
+.w-screen {
+    width: unset
+}
+div.svelte-362y77>*, div.svelte-362y77>.form>* {
+    flex-wrap: nowrap
+}
+/* fixes the API documentation in chat mode */
+.api-docs.svelte-1iguv9h.svelte-1iguv9h.svelte-1iguv9h {
+    display: grid;
+}
+.pending.svelte-1ed2p3z {
+    opacity: 1;
+}
+
diff --git a/css/html_readable_style.css b/css/html_readable_style.css
new file mode 100644
index 00000000..d3f580a5
--- /dev/null
+++ b/css/html_readable_style.css
@@ -0,0 +1,14 @@
+.container {
+    max-width: 600px;
+    margin-left: auto;
+    margin-right: auto;
+    background-color: rgb(31, 41, 55);
+    padding:3em;
+}
+
+.container p {
+    font-size: 16px !important;
+    color: white !important;
+    margin-bottom: 22px;
+    line-height: 1.4 !important;
+}
diff --git a/css/main.css b/css/main.css
new file mode 100644
index 00000000..8519f1c9
--- /dev/null
+++ b/css/main.css
@@ -0,0 +1,39 @@
+.tabs.svelte-710i53 {
+    margin-top: 0
+}
+.py-6 {
+    padding-top: 2.5rem
+}
+.dark #refresh-button {
+    background-color: #ffffff1f;
+}
+#refresh-button {
+  flex: none;
+  margin: 0;
+  padding: 0;
+  min-width: 50px;
+  border: none;
+  box-shadow: none;
+  border-radius: 10px;
+  background-color: #0000000d;
+}
+#download-label, #upload-label {
+  min-height: 0
+}
+#accordion {
+}
+.dark svg {
+  fill: white;
+}
+svg {
+  display: unset !important;
+  vertical-align: middle !important;
+  margin: 5px;
+}
+ol li p, ul li p {
+    display: inline-block;
+}
+#main, #settings, #extensions, #chat-settings {
+  border: 0;
+}
+
diff --git a/css/main.js b/css/main.js
new file mode 100644
index 00000000..7d9cf9a2
--- /dev/null
+++ b/css/main.js
@@ -0,0 +1,3 @@
+document.getElementById("main").parentNode.childNodes[0].style = "border: none; background-color: #8080802b; margin-bottom: 40px"
+document.getElementById("main").parentNode.style = "padding: 0; margin: 0"
+document.getElementById("main").parentNode.parentNode.parentNode.style = "padding: 0"
diff --git a/modules/html_generator.py b/modules/html_generator.py
index d8aadf43..71a638cb 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -6,30 +6,18 @@ This is a library for formatting GPT-4chan and chat outputs as nice HTML.
 
 import os
 import re
-import markdown
 from pathlib import Path
 
+import markdown
 from PIL import Image
 
 # This is to store the paths to the thumbnails of the profile pictures
 image_cache = {}
 
 def generate_basic_html(s):
-    css = """
-    .container {
-        max-width: 600px;
-        margin-left: auto;
-        margin-right: auto;
-        background-color: rgb(31, 41, 55);
-        padding:3em;
-    }
-    .container p {
-        font-size: 16px !important;
-        color: white !important;
-        margin-bottom: 22px;
-        line-height: 1.4 !important;
-    }
-    """
+    with open(Path(__file__).resolve().parent / '../css/html_readable_style.css', 'r') as f:
+        css = f.read()
+
     s = '\n'.join([f'<p>{line}</p>' for line in s.split('\n')])
     s = f'<style>{css}</style><div class="container">{s}</div>'
     return s
@@ -49,7 +37,7 @@ def process_post(post, c):
     return src
 
 def generate_4chan_html(f):
-    with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), '../css/html_4chan_style.css'), 'r') as f:
+    with open(Path(__file__).resolve().parent / '../css/html_4chan_style.css', 'r') as f:
         css = f.read()
 
     posts = []
@@ -113,7 +101,7 @@ def load_html_image(paths):
     return ''
 
 def generate_chat_html(history, name1, name2, character):
-    with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), '../css/html_chat_style.css'), 'r') as f:
+    with open(Path(__file__).resolve().parent / '../css/html_chat_style.css', 'r') as f:
         css = f.read()
         
     output = f'<style>{css}</style><div class="chat" id="chat">'
diff --git a/modules/ui.py b/modules/ui.py
index 27233153..98942559 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -1,77 +1,15 @@
+from pathlib import Path
+
 import gradio as gr
 
 refresh_symbol = '\U0001f504'  # 🔄
 
-css = """
-.tabs.svelte-710i53 {
-    margin-top: 0
-}
-.py-6 {
-    padding-top: 2.5rem
-}
-.dark #refresh-button {
-    background-color: #ffffff1f;
-}
-#refresh-button {
-  flex: none;
-  margin: 0;
-  padding: 0;
-  min-width: 50px;
-  border: none;
-  box-shadow: none;
-  border-radius: 10px;
-  background-color: #0000000d;
-}
-#download-label, #upload-label {
-  min-height: 0
-}
-#accordion {
-}
-.dark svg {
-  fill: white;
-}
-svg {
-  display: unset !important;
-  vertical-align: middle !important;
-  margin: 5px;
-}
-ol li p, ul li p {
-    display: inline-block;
-}
-#main, #settings, #extensions, #chat-settings {
-  border: 0;
-}
-"""
-
-chat_css = """
-.h-\[40vh\], .wrap.svelte-byatnx.svelte-byatnx.svelte-byatnx {
-    height: 66.67vh
-}
-.gradio-container {
-    max-width: 800px !important;
-    margin-left: auto !important;
-    margin-right: auto !important;
-}
-.w-screen {
-    width: unset
-}
-div.svelte-362y77>*, div.svelte-362y77>.form>* {
-    flex-wrap: nowrap
-}
-/* fixes the API documentation in chat mode */
-.api-docs.svelte-1iguv9h.svelte-1iguv9h.svelte-1iguv9h {
-    display: grid;
-}
-.pending.svelte-1ed2p3z {
-    opacity: 1;
-}
-"""
-
-page_js = """
-document.getElementById("main").parentNode.childNodes[0].style = "border: none; background-color: #8080802b; margin-bottom: 40px"
-document.getElementById("main").parentNode.style = "padding: 0; margin: 0"
-document.getElementById("main").parentNode.parentNode.parentNode.style = "padding: 0"
-"""
+with open(Path(__file__).resolve().parent / '../css/main.css', 'r') as f:
+    css = f.read()
+with open(Path(__file__).resolve().parent / '../css/chat.css', 'r') as f:
+    chat_css = f.read()
+with open(Path(__file__).resolve().parent / '../css/main.js', 'r') as f:
+    page_js = f.read()
 
 class ToolButton(gr.Button, gr.components.FormComponent):
     """Small button with single emoji as text, fits inside gradio forms"""

From 29b7c5ac0c87bdd3b50416791161590cc9156c09 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 15 Mar 2023 12:40:03 -0300
Subject: [PATCH 14/89] Sort the requirements

---
 requirements.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index cbfe4e60..4994ca1a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,11 +2,11 @@ accelerate==0.17.1
 bitsandbytes==0.37.1
 flexgen==0.1.7
 gradio==3.18.0
+markdown
 numpy
 requests
 rwkv==0.4.2
 safetensors==0.3.0
 sentencepiece
 tqdm
-markdown
-git+https://github.com/zphang/transformers.git@68d640f7c368bcaaaecfc678f11908ebbd3d6176
\ No newline at end of file
+git+https://github.com/zphang/transformers.git@68d640f7c368bcaaaecfc678f11908ebbd3d6176

From cf2da8635223dacba1780a66a92ea864c898f80c Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 15 Mar 2023 12:51:13 -0300
Subject: [PATCH 15/89] Prevent *Is typing* from disappearing instantly while
 streaming

---
 modules/text_generation.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/modules/text_generation.py b/modules/text_generation.py
index f302a918..a29b987f 100644
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@@ -101,7 +101,8 @@ def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typi
                 reply = shared.model.generate(context=question, token_count=max_new_tokens, temperature=temperature, top_p=top_p, top_k=top_k)
                 yield formatted_outputs(reply, shared.model_name)
             else:
-                yield formatted_outputs(question, shared.model_name)
+                if not (shared.args.chat or shared.args.cai_chat):
+                    yield formatted_outputs(question, shared.model_name)
                 # RWKV has proper streaming, which is very nice.
                 # No need to generate 8 tokens at a time.
                 for reply in shared.model.generate_with_streaming(context=question, token_count=max_new_tokens, temperature=temperature, top_p=top_p, top_k=top_k):
@@ -197,7 +198,8 @@ def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typi
             def generate_with_streaming(**kwargs):
                 return Iteratorize(generate_with_callback, kwargs, callback=None)
 
-            yield formatted_outputs(original_question, shared.model_name)
+            if not (shared.args.chat or shared.args.cai_chat):
+                yield formatted_outputs(original_question, shared.model_name)
             with generate_with_streaming(**generate_params) as generator:
                 for output in generator:
                     if shared.soft_prompt:

From d30a14087f3b28862b13dcf259deb8497cf6e2aa Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 15 Mar 2023 13:24:54 -0300
Subject: [PATCH 16/89] Further reorganize the UI

---
 css/chat.css  |  7 ++++--
 css/main.js   |  6 ++---
 modules/ui.py |  4 +++-
 server.py     | 63 ++++++++++++++++++++++++++-------------------------
 4 files changed, 43 insertions(+), 37 deletions(-)

diff --git a/css/chat.css b/css/chat.css
index b1229830..8d9d88a6 100644
--- a/css/chat.css
+++ b/css/chat.css
@@ -1,22 +1,25 @@
 .h-\[40vh\], .wrap.svelte-byatnx.svelte-byatnx.svelte-byatnx {
     height: 66.67vh
 }
+
 .gradio-container {
-    max-width: 800px !important;
     margin-left: auto !important;
     margin-right: auto !important;
 }
+
 .w-screen {
     width: unset
 }
+
 div.svelte-362y77>*, div.svelte-362y77>.form>* {
     flex-wrap: nowrap
 }
+
 /* fixes the API documentation in chat mode */
 .api-docs.svelte-1iguv9h.svelte-1iguv9h.svelte-1iguv9h {
     display: grid;
 }
+
 .pending.svelte-1ed2p3z {
     opacity: 1;
 }
-
diff --git a/css/main.js b/css/main.js
index 7d9cf9a2..ad4b5ec0 100644
--- a/css/main.js
+++ b/css/main.js
@@ -1,3 +1,3 @@
-document.getElementById("main").parentNode.childNodes[0].style = "border: none; background-color: #8080802b; margin-bottom: 40px"
-document.getElementById("main").parentNode.style = "padding: 0; margin: 0"
-document.getElementById("main").parentNode.parentNode.parentNode.style = "padding: 0"
+document.getElementById("main").parentNode.childNodes[0].style = "border: none; background-color: #8080802b; margin-bottom: 40px";
+document.getElementById("main").parentNode.style = "padding: 0; margin: 0";
+document.getElementById("main").parentNode.parentNode.parentNode.style = "padding: 0";
diff --git a/modules/ui.py b/modules/ui.py
index 98942559..80bd7c1c 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -9,7 +9,9 @@ with open(Path(__file__).resolve().parent / '../css/main.css', 'r') as f:
 with open(Path(__file__).resolve().parent / '../css/chat.css', 'r') as f:
     chat_css = f.read()
 with open(Path(__file__).resolve().parent / '../css/main.js', 'r') as f:
-    page_js = f.read()
+    main_js = f.read()
+with open(Path(__file__).resolve().parent / '../css/chat.js', 'r') as f:
+    chat_js = f.read()
 
 class ToolButton(gr.Button, gr.components.FormComponent):
     """Small button with single emoji as text, fits inside gradio forms"""
diff --git a/server.py b/server.py
index fd156087..7d807ca2 100644
--- a/server.py
+++ b/server.py
@@ -115,36 +115,37 @@ def create_model_and_preset_menus():
 def create_settings_menus(default_preset):
     generate_params = load_preset_values(default_preset if not shared.args.flexgen else 'Naive', return_dict=True)
 
-    with gr.Box():
-        gr.Markdown('Custom generation parameters')
-        with gr.Row():
-            with gr.Column():
-                shared.gradio['temperature'] = gr.Slider(0.01, 1.99, value=generate_params['temperature'], step=0.01, label='temperature')
-                shared.gradio['top_p'] = gr.Slider(0.0,1.0,value=generate_params['top_p'],step=0.01,label='top_p')
-                shared.gradio['top_k'] = gr.Slider(0,200,value=generate_params['top_k'],step=1,label='top_k')
-                shared.gradio['typical_p'] = gr.Slider(0.0,1.0,value=generate_params['typical_p'],step=0.01,label='typical_p')
-            with gr.Column():
-                shared.gradio['repetition_penalty'] = gr.Slider(1.0, 1.5, value=generate_params['repetition_penalty'],step=0.01,label='repetition_penalty')
-                shared.gradio['encoder_repetition_penalty'] = gr.Slider(0.8, 1.5, value=generate_params['encoder_repetition_penalty'],step=0.01,label='encoder_repetition_penalty')
-                shared.gradio['no_repeat_ngram_size'] = gr.Slider(0, 20, step=1, value=generate_params['no_repeat_ngram_size'], label='no_repeat_ngram_size')
-                shared.gradio['min_length'] = gr.Slider(0, 2000, step=1, value=generate_params['min_length'] if shared.args.no_stream else 0, label='min_length', interactive=shared.args.no_stream)
-        shared.gradio['do_sample'] = gr.Checkbox(value=generate_params['do_sample'], label='do_sample')
+    with gr.Row():
+        with gr.Column():
+            with gr.Box():
+                gr.Markdown('Custom generation parameters')
+                with gr.Row():
+                    with gr.Column():
+                        shared.gradio['temperature'] = gr.Slider(0.01, 1.99, value=generate_params['temperature'], step=0.01, label='temperature')
+                        shared.gradio['top_p'] = gr.Slider(0.0,1.0,value=generate_params['top_p'],step=0.01,label='top_p')
+                        shared.gradio['top_k'] = gr.Slider(0,200,value=generate_params['top_k'],step=1,label='top_k')
+                        shared.gradio['typical_p'] = gr.Slider(0.0,1.0,value=generate_params['typical_p'],step=0.01,label='typical_p')
+                    with gr.Column():
+                        shared.gradio['repetition_penalty'] = gr.Slider(1.0, 1.5, value=generate_params['repetition_penalty'],step=0.01,label='repetition_penalty')
+                        shared.gradio['encoder_repetition_penalty'] = gr.Slider(0.8, 1.5, value=generate_params['encoder_repetition_penalty'],step=0.01,label='encoder_repetition_penalty')
+                        shared.gradio['no_repeat_ngram_size'] = gr.Slider(0, 20, step=1, value=generate_params['no_repeat_ngram_size'], label='no_repeat_ngram_size')
+                        shared.gradio['min_length'] = gr.Slider(0, 2000, step=1, value=generate_params['min_length'] if shared.args.no_stream else 0, label='min_length', interactive=shared.args.no_stream)
+                shared.gradio['do_sample'] = gr.Checkbox(value=generate_params['do_sample'], label='do_sample')
+        with gr.Column():
+            with gr.Box():
+                gr.Markdown('Contrastive search')
+                shared.gradio['penalty_alpha'] = gr.Slider(0, 5, value=generate_params['penalty_alpha'], label='penalty_alpha')
 
-    with gr.Box():
-        gr.Markdown('Contrastive search:')
-        shared.gradio['penalty_alpha'] = gr.Slider(0, 5, value=generate_params['penalty_alpha'], label='penalty_alpha')
+            with gr.Box():
+                gr.Markdown('Beam search (uses a lot of VRAM)')
+                with gr.Row():
+                    with gr.Column():
+                        shared.gradio['num_beams'] = gr.Slider(1, 20, step=1, value=generate_params['num_beams'], label='num_beams')
+                    with gr.Column():
+                        shared.gradio['length_penalty'] = gr.Slider(-5, 5, value=generate_params['length_penalty'], label='length_penalty')
+                shared.gradio['early_stopping'] = gr.Checkbox(value=generate_params['early_stopping'], label='early_stopping')
 
-    with gr.Box():
-        gr.Markdown('Beam search (uses a lot of VRAM):')
-        with gr.Row():
-            with gr.Column():
-                shared.gradio['num_beams'] = gr.Slider(1, 20, step=1, value=generate_params['num_beams'], label='num_beams')
-            with gr.Column():
-                shared.gradio['length_penalty'] = gr.Slider(-5, 5, value=generate_params['length_penalty'], label='length_penalty')
-        shared.gradio['early_stopping'] = gr.Checkbox(value=generate_params['early_stopping'], label='early_stopping')
-
-    with gr.Box():
-        gr.Markdown('Soft prompt')
+    with gr.Accordion('Soft prompt', open=False):
         with gr.Row():
             shared.gradio['softprompts_menu'] = gr.Dropdown(choices=available_softprompts, value='None', label='Soft prompt')
             ui.create_refresh_button(shared.gradio['softprompts_menu'], lambda : None, lambda : {'choices': get_available_softprompts()}, 'refresh-button')
@@ -320,7 +321,7 @@ if shared.args.chat or shared.args.cai_chat:
         shared.gradio['upload_img_me'].upload(reload_func, reload_inputs, [shared.gradio['display']])
         shared.gradio['Stop'].click(reload_func, reload_inputs, [shared.gradio['display']])
 
-        shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.page_js}}}")
+        shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.main_js+ui.chat_js}}}")
         shared.gradio['interface'].load(lambda : chat.load_default_history(shared.settings[f'name1{suffix}'], shared.settings[f'name2{suffix}']), None, None)
         shared.gradio['interface'].load(reload_func, reload_inputs, [shared.gradio['display']], show_progress=True)
 
@@ -351,7 +352,7 @@ elif shared.args.notebook:
         gen_events.append(shared.gradio['Generate'].click(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream, api_name='textgen'))
         gen_events.append(shared.gradio['textbox'].submit(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream))
         shared.gradio['Stop'].click(None, None, None, cancels=gen_events)
-        shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.page_js}}}")
+        shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.main_js}}}")
 
 else:
     with gr.Blocks(css=ui.css, analytics_enabled=False, title=title) as shared.gradio['interface']:
@@ -387,7 +388,7 @@ else:
         gen_events.append(shared.gradio['textbox'].submit(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream))
         gen_events.append(shared.gradio['Continue'].click(generate_reply, [shared.gradio['output_textbox']] + shared.input_params[1:], output_params, show_progress=shared.args.no_stream))
         shared.gradio['Stop'].click(None, None, None, cancels=gen_events)
-        shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.page_js}}}")
+        shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.main_js}}}")
 
 shared.gradio['interface'].queue()
 if shared.args.listen:

From 40c9e4686c0954acbafd91af5f1db0d0c404e3e0 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 15 Mar 2023 13:25:28 -0300
Subject: [PATCH 17/89] Add file

---
 css/chat.js | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 css/chat.js

diff --git a/css/chat.js b/css/chat.js
new file mode 100644
index 00000000..f45bf78a
--- /dev/null
+++ b/css/chat.js
@@ -0,0 +1 @@
+document.getElementById("main").childNodes[0].style = "max-width: 800px; margin-left: auto; margin-right: auto";

From 05ee323ce59f7b6edd371dcbdd6c8e63cb5cf7cf Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 15 Mar 2023 13:26:32 -0300
Subject: [PATCH 18/89] Rename a file

---
 css/{html_chat_style.css => html_cai_style.css} | 0
 modules/html_generator.py                       | 2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename css/{html_chat_style.css => html_cai_style.css} (100%)

diff --git a/css/html_chat_style.css b/css/html_cai_style.css
similarity index 100%
rename from css/html_chat_style.css
rename to css/html_cai_style.css
diff --git a/modules/html_generator.py b/modules/html_generator.py
index 71a638cb..1c8b3d1e 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -101,7 +101,7 @@ def load_html_image(paths):
     return ''
 
 def generate_chat_html(history, name1, name2, character):
-    with open(Path(__file__).resolve().parent / '../css/html_chat_style.css', 'r') as f:
+    with open(Path(__file__).resolve().parent / '../css/html_cai_style.css', 'r') as f:
         css = f.read()
         
     output = f'<style>{css}</style><div class="chat" id="chat">'

From 658849d6c33f52097eeb121d2ec0f87eae9b5c9f Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 15 Mar 2023 13:29:00 -0300
Subject: [PATCH 19/89] Move a checkbutton

---
 server.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/server.py b/server.py
index 7d807ca2..1651517f 100644
--- a/server.py
+++ b/server.py
@@ -238,6 +238,7 @@ if shared.args.chat or shared.args.cai_chat:
                         shared.gradio['chat_prompt_size_slider'] = gr.Slider(minimum=shared.settings['chat_prompt_size_min'], maximum=shared.settings['chat_prompt_size_max'], step=1, label='Maximum prompt size in tokens', value=shared.settings['chat_prompt_size'])
                     with gr.Column():
                         shared.gradio['chat_generation_attempts'] = gr.Slider(minimum=shared.settings['chat_generation_attempts_min'], maximum=shared.settings['chat_generation_attempts_max'], value=shared.settings['chat_generation_attempts'], step=1, label='Generation attempts (for longer replies)')
+                        shared.gradio['check'] = gr.Checkbox(value=shared.settings[f'stop_at_newline{suffix}'], label='Stop generating at new line character?')
 
             if shared.args.extensions is not None:
                 extensions_module.create_extensions_block()
@@ -250,8 +251,6 @@ if shared.args.chat or shared.args.cai_chat:
                 shared.gradio['character_menu'] = gr.Dropdown(choices=available_characters, value='None', label='Character', elem_id='character-menu')
                 ui.create_refresh_button(shared.gradio['character_menu'], lambda : None, lambda : {'choices': get_available_characters()}, 'refresh-button')
 
-            with gr.Row():
-                shared.gradio['check'] = gr.Checkbox(value=shared.settings[f'stop_at_newline{suffix}'], label='Stop generating at new line character?')
             with gr.Row():
                 with gr.Tab('Chat history'):
                     with gr.Row():

From bf812c48933b89b62cdff3e171f392c18d67b85d Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 15 Mar 2023 14:05:35 -0300
Subject: [PATCH 20/89] Minor fix

---
 modules/html_generator.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/html_generator.py b/modules/html_generator.py
index 1c8b3d1e..c8579f81 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -37,8 +37,8 @@ def process_post(post, c):
     return src
 
 def generate_4chan_html(f):
-    with open(Path(__file__).resolve().parent / '../css/html_4chan_style.css', 'r') as f:
-        css = f.read()
+    with open(Path(__file__).resolve().parent / '../css/html_4chan_style.css', 'r') as css_f:
+        css = css_f.read()
 
     posts = []
     post = ''

From c5f14fb9b876477b6912b51c8c8a69653cc83d89 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 15 Mar 2023 14:19:28 -0300
Subject: [PATCH 21/89] Optimize the HTML generation speed

---
 modules/html_generator.py | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/modules/html_generator.py b/modules/html_generator.py
index c8579f81..9942e6c9 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -14,12 +14,16 @@ from PIL import Image
 # This is to store the paths to the thumbnails of the profile pictures
 image_cache = {}
 
-def generate_basic_html(s):
-    with open(Path(__file__).resolve().parent / '../css/html_readable_style.css', 'r') as f:
-        css = f.read()
+with open(Path(__file__).resolve().parent / '../css/html_readable_style.css', 'r') as f:
+    readable_css = f.read()
+with open(Path(__file__).resolve().parent / '../css/html_4chan_style.css', 'r') as css_f:
+    _4chan_css = css_f.read()
+with open(Path(__file__).resolve().parent / '../css/html_cai_style.css', 'r') as f:
+    cai_css = f.read()
 
+def generate_basic_html(s):
     s = '\n'.join([f'<p>{line}</p>' for line in s.split('\n')])
-    s = f'<style>{css}</style><div class="container">{s}</div>'
+    s = f'<style>{readable_css}</style><div class="container">{s}</div>'
     return s
 
 def process_post(post, c):
@@ -37,9 +41,6 @@ def process_post(post, c):
     return src
 
 def generate_4chan_html(f):
-    with open(Path(__file__).resolve().parent / '../css/html_4chan_style.css', 'r') as css_f:
-        css = css_f.read()
-
     posts = []
     post = ''
     c = -2
@@ -66,7 +67,7 @@ def generate_4chan_html(f):
             posts[i] = f'<div class="reply">{posts[i]}</div>\n'
     
     output = ''
-    output += f'<style>{css}</style><div id="parent"><div id="container">'
+    output += f'<style>{_4chan_css}</style><div id="parent"><div id="container">'
     for post in posts:
         output += post
     output += '</div></div>'
@@ -101,10 +102,7 @@ def load_html_image(paths):
     return ''
 
 def generate_chat_html(history, name1, name2, character):
-    with open(Path(__file__).resolve().parent / '../css/html_cai_style.css', 'r') as f:
-        css = f.read()
-        
-    output = f'<style>{css}</style><div class="chat" id="chat">'
+    output = f'<style>{cai_css}</style><div class="chat" id="chat">'
     
     img_bot = load_html_image([f"characters/{character}.{ext}" for ext in ['png', 'jpg', 'jpeg']] + ["img_bot.png","img_bot.jpg","img_bot.jpeg"])
     img_me = load_html_image(["img_me.png", "img_me.jpg", "img_me.jpeg"])

From 348596f634312cec2a6cd93193986828f771ceb6 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 15 Mar 2023 15:11:16 -0300
Subject: [PATCH 22/89] Fix broken extensions

---
 modules/extensions.py | 10 +++++++---
 server.py             | 17 +++++++++--------
 2 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/modules/extensions.py b/modules/extensions.py
index c8de8a7b..e8541ab5 100644
--- a/modules/extensions.py
+++ b/modules/extensions.py
@@ -1,3 +1,5 @@
+import gradio as gr
+
 import extensions
 import modules.shared as shared
 
@@ -40,6 +42,8 @@ def create_extensions_block():
                     extension.params[param] = shared.settings[_id]
 
     # Creating the extension ui elements
-    for extension, name in iterator():
-        if hasattr(extension, "ui"):
-            extension.ui()
+    with gr.Box(elem_id="#extensions"):
+        gr.Markdown("Extensions")
+        for extension, name in iterator():
+            if hasattr(extension, "ui"):
+                extension.ui()
diff --git a/server.py b/server.py
index 1651517f..e417f146 100644
--- a/server.py
+++ b/server.py
@@ -240,9 +240,6 @@ if shared.args.chat or shared.args.cai_chat:
                         shared.gradio['chat_generation_attempts'] = gr.Slider(minimum=shared.settings['chat_generation_attempts_min'], maximum=shared.settings['chat_generation_attempts_max'], value=shared.settings['chat_generation_attempts'], step=1, label='Generation attempts (for longer replies)')
                         shared.gradio['check'] = gr.Checkbox(value=shared.settings[f'stop_at_newline{suffix}'], label='Stop generating at new line character?')
 
-            if shared.args.extensions is not None:
-                extensions_module.create_extensions_block()
-
         with gr.Tab("Chat settings", elem_id="chat-settings"):
             shared.gradio['name1'] = gr.Textbox(value=shared.settings[f'name1{suffix}'], lines=1, label='Your name')
             shared.gradio['name2'] = gr.Textbox(value=shared.settings[f'name2{suffix}'], lines=1, label='Bot\'s name')
@@ -278,6 +275,9 @@ if shared.args.chat or shared.args.cai_chat:
         with gr.Tab("Settings", elem_id="settings"):
             create_settings_menus(default_preset)
 
+        if shared.args.extensions is not None:
+            extensions_module.create_extensions_block()
+
         function_call = 'chat.cai_chatbot_wrapper' if shared.args.cai_chat else 'chat.chatbot_wrapper'
         shared.input_params = [shared.gradio[k] for k in ['textbox', 'max_new_tokens', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping', 'name1', 'name2', 'context', 'check', 'chat_prompt_size_slider', 'chat_generation_attempts']]
 
@@ -340,12 +340,12 @@ elif shared.args.notebook:
             shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens'])
 
             create_model_and_preset_menus()
-            if shared.args.extensions is not None:
-                extensions_module.create_extensions_block()
-
         with gr.Tab("Settings", elem_id="settings"):
             create_settings_menus(default_preset)
 
+        if shared.args.extensions is not None:
+            extensions_module.create_extensions_block()
+
         shared.input_params = [shared.gradio[k] for k in ['textbox', 'max_new_tokens', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']]
         output_params = [shared.gradio[k] for k in ['textbox', 'markdown', 'html']]
         gen_events.append(shared.gradio['Generate'].click(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream, api_name='textgen'))
@@ -368,8 +368,6 @@ else:
                             shared.gradio['Stop'] = gr.Button('Stop')
 
                     create_model_and_preset_menus()
-                    if shared.args.extensions is not None:
-                        extensions_module.create_extensions_block()
 
                 with gr.Column():
                     with gr.Tab('Raw'):
@@ -381,6 +379,9 @@ else:
         with gr.Tab("Settings", elem_id="settings"):
             create_settings_menus(default_preset)
 
+        if shared.args.extensions is not None:
+            extensions_module.create_extensions_block()
+
         shared.input_params = [shared.gradio[k] for k in ['textbox', 'max_new_tokens', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']]
         output_params = [shared.gradio[k] for k in ['output_textbox', 'markdown', 'html']]
         gen_events.append(shared.gradio['Generate'].click(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream, api_name='textgen'))

From c1959c26ee2bf57439b1d06d8750ece72259fa2a Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 15 Mar 2023 16:34:31 -0300
Subject: [PATCH 23/89] Show/hide the extensions block using javascript

---
 css/chat.js           |  1 +
 css/main.css          |  2 +-
 css/main.js           | 15 +++++++++++++++
 modules/extensions.py |  2 +-
 server.py             |  2 +-
 5 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/css/chat.js b/css/chat.js
index f45bf78a..4a78e454 100644
--- a/css/chat.js
+++ b/css/chat.js
@@ -1 +1,2 @@
 document.getElementById("main").childNodes[0].style = "max-width: 800px; margin-left: auto; margin-right: auto";
+document.getElementById("extensions").style = "max-width: 800px; margin-left: auto; margin-right: auto";
diff --git a/css/main.css b/css/main.css
index 8519f1c9..ca076d28 100644
--- a/css/main.css
+++ b/css/main.css
@@ -33,7 +33,7 @@ svg {
 ol li p, ul li p {
     display: inline-block;
 }
-#main, #settings, #extensions, #chat-settings {
+#main, #settings, #chat-settings {
   border: 0;
 }
 
diff --git a/css/main.js b/css/main.js
index ad4b5ec0..d96c8cd8 100644
--- a/css/main.js
+++ b/css/main.js
@@ -1,3 +1,18 @@
 document.getElementById("main").parentNode.childNodes[0].style = "border: none; background-color: #8080802b; margin-bottom: 40px";
 document.getElementById("main").parentNode.style = "padding: 0; margin: 0";
 document.getElementById("main").parentNode.parentNode.parentNode.style = "padding: 0";
+
+// Get references to the elements
+let main = document.getElementById('main');
+let main_parent = main.parentNode;
+let extensions = document.getElementById('extensions');
+
+// Add an event listener to the main element
+main_parent.addEventListener('click', function(e) {
+    // Check if the main element is visible
+    if (main.offsetHeight > 0 && main.offsetWidth > 0) {
+        extensions.style.visibility = 'visible';
+    } else {
+        extensions.style.visibility = 'hidden';
+    }
+});
diff --git a/modules/extensions.py b/modules/extensions.py
index e8541ab5..89dfcda4 100644
--- a/modules/extensions.py
+++ b/modules/extensions.py
@@ -42,7 +42,7 @@ def create_extensions_block():
                     extension.params[param] = shared.settings[_id]
 
     # Creating the extension ui elements
-    with gr.Box(elem_id="#extensions"):
+    with gr.Box(elem_id="extensions"):
         gr.Markdown("Extensions")
         for extension, name in iterator():
             if hasattr(extension, "ui"):
diff --git a/server.py b/server.py
index e417f146..6512e756 100644
--- a/server.py
+++ b/server.py
@@ -215,7 +215,7 @@ if shared.args.chat or shared.args.cai_chat:
                 shared.gradio['display'] = gr.Chatbot(value=shared.history['visible']).style(color_map=("#326efd", "#212528"))
             shared.gradio['textbox'] = gr.Textbox(label='Input')
             with gr.Row():
-                shared.gradio['Stop'] = gr.Button('Stop')
+                shared.gradio['Stop'] = gr.Button('Stop', elem_id="stop")
                 shared.gradio['Generate'] = gr.Button('Generate')
             with gr.Row():
                 shared.gradio['Impersonate'] = gr.Button('Impersonate')

From 87b84d227555894c1f53168c4774974ebffef46c Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 15 Mar 2023 16:39:59 -0300
Subject: [PATCH 24/89] CSS fix

---
 css/chat.js | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/css/chat.js b/css/chat.js
index 4a78e454..cbdcf91e 100644
--- a/css/chat.js
+++ b/css/chat.js
@@ -1,2 +1,6 @@
-document.getElementById("main").childNodes[0].style = "max-width: 800px; margin-left: auto; margin-right: auto";
-document.getElementById("extensions").style = "max-width: 800px; margin-left: auto; margin-right: auto";
+document.getElementById("main").style.setProperty("max-width", "800px");
+document.getElementById("main").style.setProperty("margin-left", "auto");
+document.getElementById("main").style.setProperty("margin-right", "auto");
+document.getElementById("extensions").style.setProperty("max-width", "800px");
+document.getElementById("extensions").style.setProperty("margin-left", "auto");
+document.getElementById("extensions").style.setProperty("margin-right", "auto");

From 3047ed8ce34ca234f6a13f6bacb49c3a66c63cff Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 15 Mar 2023 16:41:38 -0300
Subject: [PATCH 25/89] CSS fix

---
 css/main.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/css/main.js b/css/main.js
index d96c8cd8..9db3fe8b 100644
--- a/css/main.js
+++ b/css/main.js
@@ -11,8 +11,8 @@ let extensions = document.getElementById('extensions');
 main_parent.addEventListener('click', function(e) {
     // Check if the main element is visible
     if (main.offsetHeight > 0 && main.offsetWidth > 0) {
-        extensions.style.visibility = 'visible';
+        extensions.style.display = 'block';
     } else {
-        extensions.style.visibility = 'hidden';
+        extensions.style.display = 'none';
     }
 });

From 6a1787a5fee69c2c692dc6a333229d9ba8374348 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 15 Mar 2023 16:55:40 -0300
Subject: [PATCH 26/89] CSS fixes

---
 css/chat.js  | 4 +---
 css/main.css | 1 -
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/css/chat.js b/css/chat.js
index cbdcf91e..e304f125 100644
--- a/css/chat.js
+++ b/css/chat.js
@@ -1,6 +1,4 @@
-document.getElementById("main").style.setProperty("max-width", "800px");
-document.getElementById("main").style.setProperty("margin-left", "auto");
-document.getElementById("main").style.setProperty("margin-right", "auto");
+document.getElementById("main").childNodes[0].style = "max-width: 800px; margin-left: auto; margin-right: auto";
 document.getElementById("extensions").style.setProperty("max-width", "800px");
 document.getElementById("extensions").style.setProperty("margin-left", "auto");
 document.getElementById("extensions").style.setProperty("margin-right", "auto");
diff --git a/css/main.css b/css/main.css
index ca076d28..8c5f156e 100644
--- a/css/main.css
+++ b/css/main.css
@@ -36,4 +36,3 @@ ol li p, ul li p {
 #main, #settings, #chat-settings {
   border: 0;
 }
-

From 30281122a5bf5a2654624505b15dce568c41a5b1 Mon Sep 17 00:00:00 2001
From: awoo <awoo@awoo>
Date: Wed, 15 Mar 2023 23:52:46 +0300
Subject: [PATCH 27/89] KoboldAI api

---
 extensions/api/script.py | 82 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 82 insertions(+)
 create mode 100644 extensions/api/script.py

diff --git a/extensions/api/script.py b/extensions/api/script.py
new file mode 100644
index 00000000..3dbf6368
--- /dev/null
+++ b/extensions/api/script.py
@@ -0,0 +1,82 @@
+from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
+from threading import Thread
+from modules import shared
+from modules.text_generation import generate_reply, encode
+import json
+
+params = {
+    'port': 5000,
+}
+
+class Handler(BaseHTTPRequestHandler):
+    def do_GET(self):
+        if self.path == '/api/v1/model':
+            self.send_response(200)
+            self.end_headers()
+            response = json.dumps({
+                'result': shared.model_name
+            })
+
+            self.wfile.write(response.encode('utf-8'))
+        else:
+            self.send_error(404)
+
+    def do_POST(self):
+        content_length = int(self.headers['Content-Length'])
+        body = json.loads(self.rfile.read(content_length).decode('utf-8'))
+
+        if self.path == '/api/v1/generate':
+            self.send_response(200)
+            self.send_header('Content-Type', 'application/json')
+            self.end_headers()
+
+            prompt = body['prompt']
+            prompt_lines = [l.strip() for l in prompt.split('\n')]
+
+            max_context = body.get('max_context_length', 2048)
+
+            while len(prompt_lines) >= 0 and len(encode('\n'.join(prompt_lines))) > max_context:
+                prompt_lines.pop(0)
+
+            prompt = '\n'.join(prompt_lines)
+
+            generator = generate_reply(
+                question = prompt, 
+                max_new_tokens = body.get('max_length', 200), 
+                do_sample=True, 
+                temperature=body.get('temperature', 0.5), 
+                top_p=body.get('top_p', 1), 
+                typical_p=body.get('typical', 1), 
+                repetition_penalty=body.get('rep_pen', 1.1), 
+                encoder_repetition_penalty=1, 
+                top_k=body.get('top_k', 0), 
+                min_length=0, 
+                no_repeat_ngram_size=0, 
+                num_beams=1, 
+                penalty_alpha=0, 
+                length_penalty=1,
+                early_stopping=False,
+            )
+
+            answer = ''
+            for a in generator:
+                answer = a[0]
+
+            response = json.dumps({
+                'results': [{
+                    'text': answer[len(prompt):]
+                }]
+            })
+            self.wfile.write(response.encode('utf-8'))
+        else:
+            self.send_error(404)
+
+
+def run_server():
+    server_addr = ('0.0.0.0' if shared.args.listen else '127.0.0.1', params['port'])
+    server = ThreadingHTTPServer(server_addr, Handler)
+    print(f'Starting KoboldAI compatible api at http://{server_addr[0]}:{server_addr[1]}/api')
+    server.serve_forever()
+
+def ui():
+    Thread(target=run_server, daemon=True).start()
\ No newline at end of file

From 67d62475dca6b7b08a1f3479d6c454176843b540 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 15 Mar 2023 18:56:26 -0300
Subject: [PATCH 28/89] Further reorganize chat UI

---
 server.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/server.py b/server.py
index 6512e756..dac613f5 100644
--- a/server.py
+++ b/server.py
@@ -231,16 +231,7 @@ if shared.args.chat or shared.args.cai_chat:
 
             create_model_and_preset_menus()
 
-            with gr.Box():
-                with gr.Row():
-                    with gr.Column():
-                        shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens'])
-                        shared.gradio['chat_prompt_size_slider'] = gr.Slider(minimum=shared.settings['chat_prompt_size_min'], maximum=shared.settings['chat_prompt_size_max'], step=1, label='Maximum prompt size in tokens', value=shared.settings['chat_prompt_size'])
-                    with gr.Column():
-                        shared.gradio['chat_generation_attempts'] = gr.Slider(minimum=shared.settings['chat_generation_attempts_min'], maximum=shared.settings['chat_generation_attempts_max'], value=shared.settings['chat_generation_attempts'], step=1, label='Generation attempts (for longer replies)')
-                        shared.gradio['check'] = gr.Checkbox(value=shared.settings[f'stop_at_newline{suffix}'], label='Stop generating at new line character?')
-
-        with gr.Tab("Chat settings", elem_id="chat-settings"):
+        with gr.Tab("Character", elem_id="chat-settings"):
             shared.gradio['name1'] = gr.Textbox(value=shared.settings[f'name1{suffix}'], lines=1, label='Your name')
             shared.gradio['name2'] = gr.Textbox(value=shared.settings[f'name2{suffix}'], lines=1, label='Bot\'s name')
             shared.gradio['context'] = gr.Textbox(value=shared.settings[f'context{suffix}'], lines=5, label='Context')
@@ -273,6 +264,16 @@ if shared.args.chat or shared.args.cai_chat:
                     shared.gradio['upload_img_tavern'] = gr.File(type='binary', file_types=['image'])
 
         with gr.Tab("Settings", elem_id="settings"):
+            with gr.Box():
+                gr.Markdown("Chat parameters")
+                with gr.Row():
+                    with gr.Column():
+                        shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens'])
+                        shared.gradio['chat_prompt_size_slider'] = gr.Slider(minimum=shared.settings['chat_prompt_size_min'], maximum=shared.settings['chat_prompt_size_max'], step=1, label='Maximum prompt size in tokens', value=shared.settings['chat_prompt_size'])
+                    with gr.Column():
+                        shared.gradio['chat_generation_attempts'] = gr.Slider(minimum=shared.settings['chat_generation_attempts_min'], maximum=shared.settings['chat_generation_attempts_max'], value=shared.settings['chat_generation_attempts'], step=1, label='Generation attempts (for longer replies)')
+                        shared.gradio['check'] = gr.Checkbox(value=shared.settings[f'stop_at_newline{suffix}'], label='Stop generating at new line character?')
+
             create_settings_menus(default_preset)
 
         if shared.args.extensions is not None:

From 89883a399c6fc80ca2c811c9ac877593d462ad0f Mon Sep 17 00:00:00 2001
From: jfryton <35437877+jfryton@users.noreply.github.com>
Date: Wed, 15 Mar 2023 18:20:21 -0400
Subject: [PATCH 29/89] Create WSL.md guide for setting up WSL Ubuntu

Quick start guide for Windows Subsystem for Linux (Ubuntu), including port forwarding to enable local network webui access.
---
 docs/WSL.md | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 docs/WSL.md

diff --git a/docs/WSL.md b/docs/WSL.md
new file mode 100644
index 00000000..5c250de6
--- /dev/null
+++ b/docs/WSL.md
@@ -0,0 +1,71 @@
+# Windows Subsystem for Linux (Ubuntu) Installation Guide
+
+Here's an easy-to-follow, step-by-step guide for installing Windows Subsystem for Linux (WSL) with Ubuntu on Windows 10/11:
+
+## Step 1: Enable WSL
+
+1. Press the Windows key + X and click on "Windows PowerShell (Admin)" or "Windows Terminal (Admin)" to open PowerShell or Terminal with administrator privileges.
+2. In the PowerShell window, type the following command and press Enter:
+
+```
+wsl --install
+```
+
+If this command doesn't work, you can enable WSL with the following command for Windows 10:
+
+```
+wsl --set-default-version 1
+```
+
+For Windows 11, you can use:
+
+```
+wsl --set-default-version 2
+```
+
+You may be prompted to restart your computer. If so, save your work and restart.
+
+## Step 2: Install Ubuntu
+
+1. Open the Microsoft Store.
+2. Search for "Ubuntu" in the search bar.
+3. Choose the desired Ubuntu version (e.g., Ubuntu 20.04 LTS) and click "Get" or "Install" to download and install the Ubuntu app.
+4. Once the installation is complete, click "Launch" or search for "Ubuntu" in the Start menu and open the app.
+
+## Step 3: Set up Ubuntu
+
+1. When you first launch the Ubuntu app, it will take a few minutes to set up. Be patient as it installs the necessary files and sets up your environment.
+2. Once the setup is complete, you will be prompted to create a new UNIX username and password. Choose a username and password, and make sure to remember them, as you will need them for future administrative tasks within the Ubuntu environment.
+
+## Step 4: Update and upgrade packages
+
+1. After setting up your username and password, it's a good idea to update and upgrade your Ubuntu system. Run the following commands in the Ubuntu terminal:
+
+```
+sudo apt update
+sudo apt upgrade
+```
+
+2. Enter your password when prompted. This will update the package list and upgrade any outdated packages.
+
+Congratulations! You have now installed WSL with Ubuntu on your Windows 10/11 system. You can use the Ubuntu terminal for various tasks, like running Linux commands, installing packages, or managing files.
+
+You can launch your WSL Ubuntu installation by selecting the Ubuntu app (like any other program installed on your computer) or typing 'ubuntu' into Powershell or Terminal.
+
+## Step 5: Proceed with Linux instructions
+
+1. You can now follow the Linux setup instructions. If you receive any error messages about a missing tool or package, just install them using apt:
+
+```
+sudo apt install [missing package]
+```
+
+## Bonus: Port Forwarding
+
+By default, you won't be able to access the webui from another device on your local network. You will need to setup the appropriate port forwarding using the following command (using PowerShell or Terminal with administrator privileges). 
+
+```
+netsh interface portproxy add v4tov4 listenaddress=0.0.0.0 listenport=7860 connectaddress=localhost connectport=7860
+```
+
+If you face any issues or need to troubleshoot, you can always refer to the official Microsoft documentation for WSL: https://docs.microsoft.com/en-us/windows/wsl/

From 055edc7fddfc83b5858e72590bed85af2e560010 Mon Sep 17 00:00:00 2001
From: jfryton <35437877+jfryton@users.noreply.github.com>
Date: Wed, 15 Mar 2023 18:21:14 -0400
Subject: [PATCH 30/89] Update WSL.md

---
 docs/WSL.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/WSL.md b/docs/WSL.md
index 5c250de6..8297e6bf 100644
--- a/docs/WSL.md
+++ b/docs/WSL.md
@@ -60,6 +60,8 @@ You can launch your WSL Ubuntu installation by selecting the Ubuntu app (like an
 sudo apt install [missing package]
 ```
 
+If you face any issues or need to troubleshoot, you can always refer to the official Microsoft documentation for WSL: https://docs.microsoft.com/en-us/windows/wsl/
+
 ## Bonus: Port Forwarding
 
 By default, you won't be able to access the webui from another device on your local network. You will need to setup the appropriate port forwarding using the following command (using PowerShell or Terminal with administrator privileges). 
@@ -68,4 +70,4 @@ By default, you won't be able to access the webui from another device on your lo
 netsh interface portproxy add v4tov4 listenaddress=0.0.0.0 listenport=7860 connectaddress=localhost connectport=7860
 ```
 
-If you face any issues or need to troubleshoot, you can always refer to the official Microsoft documentation for WSL: https://docs.microsoft.com/en-us/windows/wsl/
+

From 66256ac1ddd2456028ada6643081c6e4b19dfdef Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 15 Mar 2023 19:31:27 -0300
Subject: [PATCH 31/89] Make the "no GPU has been detected" message more
 descriptive

---
 modules/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/models.py b/modules/models.py
index f4bb11fd..a934514b 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -98,7 +98,7 @@ def load_model(model_name):
         command = "AutoModelForCausalLM.from_pretrained"
         params = ["low_cpu_mem_usage=True"]
         if not shared.args.cpu and not torch.cuda.is_available():
-            print("Warning: no GPU has been detected.\nFalling back to CPU mode.\n")
+            print("Warning: torch.cuda.is_available() returned False.\nThis means that no GPU has been detected.\nFalling back to CPU mode.\n")
             shared.args.cpu = True
 
         if shared.args.cpu:

From 09045e4bdbddeedb2d35378b647312c7de20b49e Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 15 Mar 2023 19:42:06 -0300
Subject: [PATCH 32/89] Add WSL guide

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c9834558..180c8629 100644
--- a/README.md
+++ b/README.md
@@ -61,7 +61,8 @@ conda install pytorch torchvision torchaudio git -c pytorch
 ```
 
 > **Note**
-> 1. If you are on Windows, it may be easier to run the commands above in a WSL environment. The performance may also be better.
+> 1. If you are on Windows, it may be easier to run the commands above in a WSL environment. The performance may also be better. A full guide can be found here: [Windows Subsystem for Linux (Ubuntu) Installation Guide
+](https://github.com/oobabooga/text-generation-webui/wiki/Windows-Subsystem-for-Linux-(Ubuntu)-Installation-Guide).
 > 2. For a more detailed, user-contributed guide, see: [Installation instructions for human beings](https://github.com/oobabooga/text-generation-webui/wiki/Installation-instructions-for-human-beings).
 
 ## Installation option 2: one-click installers

From e9e76bb06cc3552b1e52984ec4f1682673102115 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 15 Mar 2023 19:42:29 -0300
Subject: [PATCH 33/89] Delete WSL.md

---
 docs/WSL.md | 73 -----------------------------------------------------
 1 file changed, 73 deletions(-)
 delete mode 100644 docs/WSL.md

diff --git a/docs/WSL.md b/docs/WSL.md
deleted file mode 100644
index 8297e6bf..00000000
--- a/docs/WSL.md
+++ /dev/null
@@ -1,73 +0,0 @@
-# Windows Subsystem for Linux (Ubuntu) Installation Guide
-
-Here's an easy-to-follow, step-by-step guide for installing Windows Subsystem for Linux (WSL) with Ubuntu on Windows 10/11:
-
-## Step 1: Enable WSL
-
-1. Press the Windows key + X and click on "Windows PowerShell (Admin)" or "Windows Terminal (Admin)" to open PowerShell or Terminal with administrator privileges.
-2. In the PowerShell window, type the following command and press Enter:
-
-```
-wsl --install
-```
-
-If this command doesn't work, you can enable WSL with the following command for Windows 10:
-
-```
-wsl --set-default-version 1
-```
-
-For Windows 11, you can use:
-
-```
-wsl --set-default-version 2
-```
-
-You may be prompted to restart your computer. If so, save your work and restart.
-
-## Step 2: Install Ubuntu
-
-1. Open the Microsoft Store.
-2. Search for "Ubuntu" in the search bar.
-3. Choose the desired Ubuntu version (e.g., Ubuntu 20.04 LTS) and click "Get" or "Install" to download and install the Ubuntu app.
-4. Once the installation is complete, click "Launch" or search for "Ubuntu" in the Start menu and open the app.
-
-## Step 3: Set up Ubuntu
-
-1. When you first launch the Ubuntu app, it will take a few minutes to set up. Be patient as it installs the necessary files and sets up your environment.
-2. Once the setup is complete, you will be prompted to create a new UNIX username and password. Choose a username and password, and make sure to remember them, as you will need them for future administrative tasks within the Ubuntu environment.
-
-## Step 4: Update and upgrade packages
-
-1. After setting up your username and password, it's a good idea to update and upgrade your Ubuntu system. Run the following commands in the Ubuntu terminal:
-
-```
-sudo apt update
-sudo apt upgrade
-```
-
-2. Enter your password when prompted. This will update the package list and upgrade any outdated packages.
-
-Congratulations! You have now installed WSL with Ubuntu on your Windows 10/11 system. You can use the Ubuntu terminal for various tasks, like running Linux commands, installing packages, or managing files.
-
-You can launch your WSL Ubuntu installation by selecting the Ubuntu app (like any other program installed on your computer) or typing 'ubuntu' into Powershell or Terminal.
-
-## Step 5: Proceed with Linux instructions
-
-1. You can now follow the Linux setup instructions. If you receive any error messages about a missing tool or package, just install them using apt:
-
-```
-sudo apt install [missing package]
-```
-
-If you face any issues or need to troubleshoot, you can always refer to the official Microsoft documentation for WSL: https://docs.microsoft.com/en-us/windows/wsl/
-
-## Bonus: Port Forwarding
-
-By default, you won't be able to access the webui from another device on your local network. You will need to setup the appropriate port forwarding using the following command (using PowerShell or Terminal with administrator privileges). 
-
-```
-netsh interface portproxy add v4tov4 listenaddress=0.0.0.0 listenport=7860 connectaddress=localhost connectport=7860
-```
-
-

From 0552ab2e9f879e296e6946244db0b7a5755b30ac Mon Sep 17 00:00:00 2001
From: awoo <awoo@awoo>
Date: Thu, 16 Mar 2023 02:00:16 +0300
Subject: [PATCH 34/89] flask_cloudflared for shared tunnels

---
 extensions/api/requirements.txt |  1 +
 extensions/api/script.py        | 10 +++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)
 create mode 100644 extensions/api/requirements.txt

diff --git a/extensions/api/requirements.txt b/extensions/api/requirements.txt
new file mode 100644
index 00000000..ad788ab8
--- /dev/null
+++ b/extensions/api/requirements.txt
@@ -0,0 +1 @@
+flask_cloudflared==0.0.12
\ No newline at end of file
diff --git a/extensions/api/script.py b/extensions/api/script.py
index 3dbf6368..53e47f3f 100644
--- a/extensions/api/script.py
+++ b/extensions/api/script.py
@@ -75,7 +75,15 @@ class Handler(BaseHTTPRequestHandler):
 def run_server():
     server_addr = ('0.0.0.0' if shared.args.listen else '127.0.0.1', params['port'])
     server = ThreadingHTTPServer(server_addr, Handler)
-    print(f'Starting KoboldAI compatible api at http://{server_addr[0]}:{server_addr[1]}/api')
+    if shared.args.share: 
+        try:
+            from flask_cloudflared import  _run_cloudflared
+            public_url = _run_cloudflared(params['port'], params['port'] + 1)
+            print(f'Starting KoboldAI compatible api at {public_url}/api')
+        except ImportError:
+            print('You should install flask_cloudflared manually')
+    else:
+        print(f'Starting KoboldAI compatible api at http://{server_addr[0]}:{server_addr[1]}/api')
     server.serve_forever()
 
 def ui():

From 445ebf0ba80b7cf55e7f793ecfa6a9f3ed0f20aa Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 15 Mar 2023 20:06:46 -0300
Subject: [PATCH 35/89] Update README.md

---
 README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 180c8629..992d96ee 100644
--- a/README.md
+++ b/README.md
@@ -40,7 +40,7 @@ Open a terminal and copy and paste these commands one at a time ([install conda]
 ```
 conda create -n textgen
 conda activate textgen
-conda install torchvision torchaudio pytorch-cuda=11.7 git -c pytorch -c nvidia
+conda install torchvision=0.14.1 torchaudio=0.13.1 pytorch-cuda=11.7 git -c pytorch -c nvidia
 git clone https://github.com/oobabooga/text-generation-webui
 cd text-generation-webui
 pip install -r requirements.txt
@@ -51,13 +51,13 @@ The third line assumes that you have an NVIDIA GPU.
 * If you have an AMD GPU, replace the third command with this one:
 
 ```
-pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/rocm5.2
+pip3 install torch torchvision=0.14.1 torchaudio=0.13.1 --extra-index-url https://download.pytorch.org/whl/rocm5.2
 ```
   	  
 * If you are running it in CPU mode, replace the third command with this one:
 
 ```
-conda install pytorch torchvision torchaudio git -c pytorch
+conda install pytorch torchvision=0.14.1 torchaudio=0.13.1 git -c pytorch
 ```
 
 > **Note**

From ffb898608b03385643f3f556b00b47a797fe0651 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 15 Mar 2023 20:44:34 -0300
Subject: [PATCH 36/89] Mini refactor

---
 server.py | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/server.py b/server.py
index dac613f5..a0e52d74 100644
--- a/server.py
+++ b/server.py
@@ -206,8 +206,8 @@ title ='Text generation web UI'
 description = '\n\n# Text generation lab\nGenerate text using Large Language Models.\n'
 suffix = '_pygmalion' if 'pygmalion' in shared.model_name.lower() else ''
 
-if shared.args.chat or shared.args.cai_chat:
-    with gr.Blocks(css=ui.css+ui.chat_css, analytics_enabled=False, title=title) as shared.gradio['interface']:
+with gr.Blocks(css=ui.css if not any((shared.args.chat, shared.args.cai_chat)) else ui.css+ui.chat_css, analytics_enabled=False, title=title) as shared.gradio['interface']:
+    if shared.args.chat or shared.args.cai_chat:
         with gr.Tab("Text generation", elem_id="main"):
             if shared.args.cai_chat:
                 shared.gradio['display'] = gr.HTML(value=generate_chat_html(shared.history['visible'], shared.settings[f'name1{suffix}'], shared.settings[f'name2{suffix}'], shared.character))
@@ -276,9 +276,6 @@ if shared.args.chat or shared.args.cai_chat:
 
             create_settings_menus(default_preset)
 
-        if shared.args.extensions is not None:
-            extensions_module.create_extensions_block()
-
         function_call = 'chat.cai_chatbot_wrapper' if shared.args.cai_chat else 'chat.chatbot_wrapper'
         shared.input_params = [shared.gradio[k] for k in ['textbox', 'max_new_tokens', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping', 'name1', 'name2', 'context', 'check', 'chat_prompt_size_slider', 'chat_generation_attempts']]
 
@@ -325,8 +322,7 @@ if shared.args.chat or shared.args.cai_chat:
         shared.gradio['interface'].load(lambda : chat.load_default_history(shared.settings[f'name1{suffix}'], shared.settings[f'name2{suffix}']), None, None)
         shared.gradio['interface'].load(reload_func, reload_inputs, [shared.gradio['display']], show_progress=True)
 
-elif shared.args.notebook:
-    with gr.Blocks(css=ui.css, analytics_enabled=False, title=title) as shared.gradio['interface']:
+    elif shared.args.notebook:
         with gr.Tab("Text generation", elem_id="main"):
             with gr.Tab('Raw'):
                 shared.gradio['textbox'] = gr.Textbox(value=default_text, lines=25)
@@ -344,9 +340,6 @@ elif shared.args.notebook:
         with gr.Tab("Settings", elem_id="settings"):
             create_settings_menus(default_preset)
 
-        if shared.args.extensions is not None:
-            extensions_module.create_extensions_block()
-
         shared.input_params = [shared.gradio[k] for k in ['textbox', 'max_new_tokens', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']]
         output_params = [shared.gradio[k] for k in ['textbox', 'markdown', 'html']]
         gen_events.append(shared.gradio['Generate'].click(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream, api_name='textgen'))
@@ -354,8 +347,7 @@ elif shared.args.notebook:
         shared.gradio['Stop'].click(None, None, None, cancels=gen_events)
         shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.main_js}}}")
 
-else:
-    with gr.Blocks(css=ui.css, analytics_enabled=False, title=title) as shared.gradio['interface']:
+    else:
         with gr.Tab("Text generation", elem_id="main"):
             with gr.Row():
                 with gr.Column():
@@ -380,9 +372,6 @@ else:
         with gr.Tab("Settings", elem_id="settings"):
             create_settings_menus(default_preset)
 
-        if shared.args.extensions is not None:
-            extensions_module.create_extensions_block()
-
         shared.input_params = [shared.gradio[k] for k in ['textbox', 'max_new_tokens', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']]
         output_params = [shared.gradio[k] for k in ['output_textbox', 'markdown', 'html']]
         gen_events.append(shared.gradio['Generate'].click(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream, api_name='textgen'))
@@ -391,6 +380,9 @@ else:
         shared.gradio['Stop'].click(None, None, None, cancels=gen_events)
         shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.main_js}}}")
 
+    if shared.args.extensions is not None:
+        extensions_module.create_extensions_block()
+
 shared.gradio['interface'].queue()
 if shared.args.listen:
     shared.gradio['interface'].launch(prevent_thread_lock=True, share=shared.args.share, server_name='0.0.0.0', server_port=shared.args.listen_port, inbrowser=shared.args.auto_launch)

From 4d64a570925f49992dba48573f660f07e13a5f16 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 15 Mar 2023 23:29:56 -0300
Subject: [PATCH 37/89] Add Interface mode tab

---
 css/main.css          |   2 +-
 modules/extensions.py |  20 ++-
 modules/shared.py     |   3 +
 server.py             | 363 +++++++++++++++++++++++-------------------
 4 files changed, 213 insertions(+), 175 deletions(-)

diff --git a/css/main.css b/css/main.css
index 8c5f156e..f5ccfe94 100644
--- a/css/main.css
+++ b/css/main.css
@@ -33,6 +33,6 @@ svg {
 ol li p, ul li p {
     display: inline-block;
 }
-#main, #settings, #chat-settings {
+#main, #parameters, #chat-settings, #interface-mode {
   border: 0;
 }
diff --git a/modules/extensions.py b/modules/extensions.py
index 89dfcda4..836fbc60 100644
--- a/modules/extensions.py
+++ b/modules/extensions.py
@@ -11,9 +11,12 @@ def load_extensions():
     for i, name in enumerate(shared.args.extensions):
         if name in available_extensions:
             print(f'Loading the extension "{name}"... ', end='')
-            exec(f"import extensions.{name}.script")
-            state[name] = [True, i]
-            print('Ok.')
+            try:
+                exec(f"import extensions.{name}.script")
+                state[name] = [True, i]
+                print('Ok.')
+            except:
+                print('Fail.')
 
 # This iterator returns the extensions in the order specified in the command-line
 def iterator():
@@ -42,8 +45,9 @@ def create_extensions_block():
                     extension.params[param] = shared.settings[_id]
 
     # Creating the extension ui elements
-    with gr.Box(elem_id="extensions"):
-        gr.Markdown("Extensions")
-        for extension, name in iterator():
-            if hasattr(extension, "ui"):
-                extension.ui()
+    if len(state) > 0:
+        with gr.Box(elem_id="extensions"):
+            gr.Markdown("Extensions")
+            for extension, name in iterator():
+                if hasattr(extension, "ui"):
+                    extension.ui()
diff --git a/modules/shared.py b/modules/shared.py
index ea2eb50b..da5efbd3 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -19,6 +19,9 @@ gradio = {}
 # Generation input parameters
 input_params = []
 
+# For restarting the interface
+need_restart = False
+
 settings = {
     'max_new_tokens': 200,
     'max_new_tokens_min': 1,
diff --git a/server.py b/server.py
index a0e52d74..7af8bd3e 100644
--- a/server.py
+++ b/server.py
@@ -176,8 +176,6 @@ else:
         shared.args.extensions = shared.args.extensions or []
         if extension not in shared.args.extensions:
             shared.args.extensions.append(extension)
-if shared.args.extensions is not None and len(shared.args.extensions) > 0:
-    extensions_module.load_extensions()
 
 # Default model
 if shared.args.model is not None:
@@ -199,196 +197,229 @@ else:
 shared.model, shared.tokenizer = load_model(shared.model_name)
 
 # Default UI settings
-gen_events = []
 default_preset = shared.settings['presets'][next((k for k in shared.settings['presets'] if re.match(k.lower(), shared.model_name.lower())), 'default')]
 default_text = shared.settings['prompts'][next((k for k in shared.settings['prompts'] if re.match(k.lower(), shared.model_name.lower())), 'default')]
 title ='Text generation web UI'
 description = '\n\n# Text generation lab\nGenerate text using Large Language Models.\n'
 suffix = '_pygmalion' if 'pygmalion' in shared.model_name.lower() else ''
 
-with gr.Blocks(css=ui.css if not any((shared.args.chat, shared.args.cai_chat)) else ui.css+ui.chat_css, analytics_enabled=False, title=title) as shared.gradio['interface']:
-    if shared.args.chat or shared.args.cai_chat:
-        with gr.Tab("Text generation", elem_id="main"):
-            if shared.args.cai_chat:
-                shared.gradio['display'] = gr.HTML(value=generate_chat_html(shared.history['visible'], shared.settings[f'name1{suffix}'], shared.settings[f'name2{suffix}'], shared.character))
-            else:
-                shared.gradio['display'] = gr.Chatbot(value=shared.history['visible']).style(color_map=("#326efd", "#212528"))
-            shared.gradio['textbox'] = gr.Textbox(label='Input')
-            with gr.Row():
-                shared.gradio['Stop'] = gr.Button('Stop', elem_id="stop")
-                shared.gradio['Generate'] = gr.Button('Generate')
-            with gr.Row():
-                shared.gradio['Impersonate'] = gr.Button('Impersonate')
-                shared.gradio['Regenerate'] = gr.Button('Regenerate')
-            with gr.Row():
-                shared.gradio['Copy last reply'] = gr.Button('Copy last reply')
-                shared.gradio['Replace last reply'] = gr.Button('Replace last reply')
-                shared.gradio['Remove last'] = gr.Button('Remove last')
+def create_interface():
 
-                shared.gradio['Clear history'] = gr.Button('Clear history')
-                shared.gradio['Clear history-confirm'] = gr.Button('Confirm', variant="stop", visible=False)
-                shared.gradio['Clear history-cancel'] = gr.Button('Cancel', visible=False)
+    gen_events = []
+    if shared.args.extensions is not None and len(shared.args.extensions) > 0:
+        extensions_module.load_extensions()
 
-            create_model_and_preset_menus()
+    with gr.Blocks(css=ui.css if not any((shared.args.chat, shared.args.cai_chat)) else ui.css+ui.chat_css, analytics_enabled=False, title=title) as shared.gradio['interface']:
+        if shared.args.chat or shared.args.cai_chat:
+            with gr.Tab("Text generation", elem_id="main"):
+                if shared.args.cai_chat:
+                    shared.gradio['display'] = gr.HTML(value=generate_chat_html(shared.history['visible'], shared.settings[f'name1{suffix}'], shared.settings[f'name2{suffix}'], shared.character))
+                else:
+                    shared.gradio['display'] = gr.Chatbot(value=shared.history['visible']).style(color_map=("#326efd", "#212528"))
+                shared.gradio['textbox'] = gr.Textbox(label='Input')
+                with gr.Row():
+                    shared.gradio['Stop'] = gr.Button('Stop', elem_id="stop")
+                    shared.gradio['Generate'] = gr.Button('Generate')
+                with gr.Row():
+                    shared.gradio['Impersonate'] = gr.Button('Impersonate')
+                    shared.gradio['Regenerate'] = gr.Button('Regenerate')
+                with gr.Row():
+                    shared.gradio['Copy last reply'] = gr.Button('Copy last reply')
+                    shared.gradio['Replace last reply'] = gr.Button('Replace last reply')
+                    shared.gradio['Remove last'] = gr.Button('Remove last')
 
-        with gr.Tab("Character", elem_id="chat-settings"):
-            shared.gradio['name1'] = gr.Textbox(value=shared.settings[f'name1{suffix}'], lines=1, label='Your name')
-            shared.gradio['name2'] = gr.Textbox(value=shared.settings[f'name2{suffix}'], lines=1, label='Bot\'s name')
-            shared.gradio['context'] = gr.Textbox(value=shared.settings[f'context{suffix}'], lines=5, label='Context')
-            with gr.Row():
-                shared.gradio['character_menu'] = gr.Dropdown(choices=available_characters, value='None', label='Character', elem_id='character-menu')
-                ui.create_refresh_button(shared.gradio['character_menu'], lambda : None, lambda : {'choices': get_available_characters()}, 'refresh-button')
+                    shared.gradio['Clear history'] = gr.Button('Clear history')
+                    shared.gradio['Clear history-confirm'] = gr.Button('Confirm', variant="stop", visible=False)
+                    shared.gradio['Clear history-cancel'] = gr.Button('Cancel', visible=False)
 
-            with gr.Row():
-                with gr.Tab('Chat history'):
+                create_model_and_preset_menus()
+
+            with gr.Tab("Character", elem_id="chat-settings"):
+                shared.gradio['name1'] = gr.Textbox(value=shared.settings[f'name1{suffix}'], lines=1, label='Your name')
+                shared.gradio['name2'] = gr.Textbox(value=shared.settings[f'name2{suffix}'], lines=1, label='Bot\'s name')
+                shared.gradio['context'] = gr.Textbox(value=shared.settings[f'context{suffix}'], lines=5, label='Context')
+                with gr.Row():
+                    shared.gradio['character_menu'] = gr.Dropdown(choices=available_characters, value='None', label='Character', elem_id='character-menu')
+                    ui.create_refresh_button(shared.gradio['character_menu'], lambda : None, lambda : {'choices': get_available_characters()}, 'refresh-button')
+
+                with gr.Row():
+                    with gr.Tab('Chat history'):
+                        with gr.Row():
+                            with gr.Column():
+                                gr.Markdown('Upload')
+                                shared.gradio['upload_chat_history'] = gr.File(type='binary', file_types=['.json', '.txt'])
+                            with gr.Column():
+                                gr.Markdown('Download')
+                                shared.gradio['download'] = gr.File()
+                                shared.gradio['download_button'] = gr.Button(value='Click me')
+                    with gr.Tab('Upload character'):
+                        with gr.Row():
+                            with gr.Column():
+                                gr.Markdown('1. Select the JSON file')
+                                shared.gradio['upload_json'] = gr.File(type='binary', file_types=['.json'])
+                            with gr.Column():
+                                gr.Markdown('2. Select your character\'s profile picture (optional)')
+                                shared.gradio['upload_img_bot'] = gr.File(type='binary', file_types=['image'])
+                        shared.gradio['Upload character'] = gr.Button(value='Submit')
+                    with gr.Tab('Upload your profile picture'):
+                        shared.gradio['upload_img_me'] = gr.File(type='binary', file_types=['image'])
+                    with gr.Tab('Upload TavernAI Character Card'):
+                        shared.gradio['upload_img_tavern'] = gr.File(type='binary', file_types=['image'])
+
+            with gr.Tab("Parameters", elem_id="parameters"):
+                with gr.Box():
+                    gr.Markdown("Chat parameters")
                     with gr.Row():
                         with gr.Column():
-                            gr.Markdown('Upload')
-                            shared.gradio['upload_chat_history'] = gr.File(type='binary', file_types=['.json', '.txt'])
+                            shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens'])
+                            shared.gradio['chat_prompt_size_slider'] = gr.Slider(minimum=shared.settings['chat_prompt_size_min'], maximum=shared.settings['chat_prompt_size_max'], step=1, label='Maximum prompt size in tokens', value=shared.settings['chat_prompt_size'])
                         with gr.Column():
-                            gr.Markdown('Download')
-                            shared.gradio['download'] = gr.File()
-                            shared.gradio['download_button'] = gr.Button(value='Click me')
-                with gr.Tab('Upload character'):
-                    with gr.Row():
-                        with gr.Column():
-                            gr.Markdown('1. Select the JSON file')
-                            shared.gradio['upload_json'] = gr.File(type='binary', file_types=['.json'])
-                        with gr.Column():
-                            gr.Markdown('2. Select your character\'s profile picture (optional)')
-                            shared.gradio['upload_img_bot'] = gr.File(type='binary', file_types=['image'])
-                    shared.gradio['Upload character'] = gr.Button(value='Submit')
-                with gr.Tab('Upload your profile picture'):
-                    shared.gradio['upload_img_me'] = gr.File(type='binary', file_types=['image'])
-                with gr.Tab('Upload TavernAI Character Card'):
-                    shared.gradio['upload_img_tavern'] = gr.File(type='binary', file_types=['image'])
+                            shared.gradio['chat_generation_attempts'] = gr.Slider(minimum=shared.settings['chat_generation_attempts_min'], maximum=shared.settings['chat_generation_attempts_max'], value=shared.settings['chat_generation_attempts'], step=1, label='Generation attempts (for longer replies)')
+                            shared.gradio['check'] = gr.Checkbox(value=shared.settings[f'stop_at_newline{suffix}'], label='Stop generating at new line character?')
 
-        with gr.Tab("Settings", elem_id="settings"):
-            with gr.Box():
-                gr.Markdown("Chat parameters")
+                create_settings_menus(default_preset)
+
+            function_call = 'chat.cai_chatbot_wrapper' if shared.args.cai_chat else 'chat.chatbot_wrapper'
+            shared.input_params = [shared.gradio[k] for k in ['textbox', 'max_new_tokens', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping', 'name1', 'name2', 'context', 'check', 'chat_prompt_size_slider', 'chat_generation_attempts']]
+
+            gen_events.append(shared.gradio['Generate'].click(eval(function_call), shared.input_params, shared.gradio['display'], show_progress=shared.args.no_stream))
+            gen_events.append(shared.gradio['textbox'].submit(eval(function_call), shared.input_params, shared.gradio['display'], show_progress=shared.args.no_stream))
+            gen_events.append(shared.gradio['Regenerate'].click(chat.regenerate_wrapper, shared.input_params, shared.gradio['display'], show_progress=shared.args.no_stream))
+            gen_events.append(shared.gradio['Impersonate'].click(chat.impersonate_wrapper, shared.input_params, shared.gradio['textbox'], show_progress=shared.args.no_stream))
+            shared.gradio['Stop'].click(chat.stop_everything_event, [], [], cancels=gen_events)
+
+            shared.gradio['Copy last reply'].click(chat.send_last_reply_to_input, [], shared.gradio['textbox'], show_progress=shared.args.no_stream)
+            shared.gradio['Replace last reply'].click(chat.replace_last_reply, [shared.gradio['textbox'], shared.gradio['name1'], shared.gradio['name2']], shared.gradio['display'], show_progress=shared.args.no_stream)
+
+            # Clear history with confirmation
+            clear_arr = [shared.gradio[k] for k in ['Clear history-confirm', 'Clear history', 'Clear history-cancel']]
+            shared.gradio['Clear history'].click(lambda :[gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, clear_arr)
+            shared.gradio['Clear history-confirm'].click(lambda :[gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, clear_arr)
+            shared.gradio['Clear history-confirm'].click(chat.clear_chat_log, [shared.gradio['name1'], shared.gradio['name2']], shared.gradio['display'])
+            shared.gradio['Clear history-cancel'].click(lambda :[gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, clear_arr)
+
+            shared.gradio['Remove last'].click(chat.remove_last_message, [shared.gradio['name1'], shared.gradio['name2']], [shared.gradio['display'], shared.gradio['textbox']], show_progress=False)
+            shared.gradio['download_button'].click(chat.save_history, inputs=[], outputs=[shared.gradio['download']])
+            shared.gradio['Upload character'].click(chat.upload_character, [shared.gradio['upload_json'], shared.gradio['upload_img_bot']], [shared.gradio['character_menu']])
+
+            # Clearing stuff and saving the history
+            for i in ['Generate', 'Regenerate', 'Replace last reply']:
+                shared.gradio[i].click(lambda x: '', shared.gradio['textbox'], shared.gradio['textbox'], show_progress=False)
+                shared.gradio[i].click(lambda : chat.save_history(timestamp=False), [], [], show_progress=False)
+            shared.gradio['Clear history-confirm'].click(lambda : chat.save_history(timestamp=False), [], [], show_progress=False)
+            shared.gradio['textbox'].submit(lambda x: '', shared.gradio['textbox'], shared.gradio['textbox'], show_progress=False)
+            shared.gradio['textbox'].submit(lambda : chat.save_history(timestamp=False), [], [], show_progress=False)
+
+            shared.gradio['character_menu'].change(chat.load_character, [shared.gradio['character_menu'], shared.gradio['name1'], shared.gradio['name2']], [shared.gradio['name2'], shared.gradio['context'], shared.gradio['display']])
+            shared.gradio['upload_chat_history'].upload(chat.load_history, [shared.gradio['upload_chat_history'], shared.gradio['name1'], shared.gradio['name2']], [])
+            shared.gradio['upload_img_tavern'].upload(chat.upload_tavern_character, [shared.gradio['upload_img_tavern'], shared.gradio['name1'], shared.gradio['name2']], [shared.gradio['character_menu']])
+            shared.gradio['upload_img_me'].upload(chat.upload_your_profile_picture, [shared.gradio['upload_img_me']], [])
+
+            reload_func = chat.redraw_html if shared.args.cai_chat else lambda : shared.history['visible']
+            reload_inputs = [shared.gradio['name1'], shared.gradio['name2']] if shared.args.cai_chat else []
+            shared.gradio['upload_chat_history'].upload(reload_func, reload_inputs, [shared.gradio['display']])
+            shared.gradio['upload_img_me'].upload(reload_func, reload_inputs, [shared.gradio['display']])
+            shared.gradio['Stop'].click(reload_func, reload_inputs, [shared.gradio['display']])
+
+            shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.main_js+ui.chat_js}}}")
+            shared.gradio['interface'].load(lambda : chat.load_default_history(shared.settings[f'name1{suffix}'], shared.settings[f'name2{suffix}']), None, None)
+            shared.gradio['interface'].load(reload_func, reload_inputs, [shared.gradio['display']], show_progress=True)
+
+        elif shared.args.notebook:
+            with gr.Tab("Text generation", elem_id="main"):
+                with gr.Tab('Raw'):
+                    shared.gradio['textbox'] = gr.Textbox(value=default_text, lines=25)
+                with gr.Tab('Markdown'):
+                    shared.gradio['markdown'] = gr.Markdown()
+                with gr.Tab('HTML'):
+                    shared.gradio['html'] = gr.HTML()
+
+                with gr.Row():
+                    shared.gradio['Stop'] = gr.Button('Stop')
+                    shared.gradio['Generate'] = gr.Button('Generate')
+                shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens'])
+
+                create_model_and_preset_menus()
+            with gr.Tab("Parameters", elem_id="parameters"):
+                create_settings_menus(default_preset)
+
+            shared.input_params = [shared.gradio[k] for k in ['textbox', 'max_new_tokens', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']]
+            output_params = [shared.gradio[k] for k in ['textbox', 'markdown', 'html']]
+            gen_events.append(shared.gradio['Generate'].click(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream, api_name='textgen'))
+            gen_events.append(shared.gradio['textbox'].submit(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream))
+            shared.gradio['Stop'].click(None, None, None, cancels=gen_events)
+            shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.main_js}}}")
+
+        else:
+            with gr.Tab("Text generation", elem_id="main"):
                 with gr.Row():
                     with gr.Column():
+                        shared.gradio['textbox'] = gr.Textbox(value=default_text, lines=15, label='Input')
                         shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens'])
-                        shared.gradio['chat_prompt_size_slider'] = gr.Slider(minimum=shared.settings['chat_prompt_size_min'], maximum=shared.settings['chat_prompt_size_max'], step=1, label='Maximum prompt size in tokens', value=shared.settings['chat_prompt_size'])
+                        shared.gradio['Generate'] = gr.Button('Generate')
+                        with gr.Row():
+                            with gr.Column():
+                                shared.gradio['Continue'] = gr.Button('Continue')
+                            with gr.Column():
+                                shared.gradio['Stop'] = gr.Button('Stop')
+
+                        create_model_and_preset_menus()
+
                     with gr.Column():
-                        shared.gradio['chat_generation_attempts'] = gr.Slider(minimum=shared.settings['chat_generation_attempts_min'], maximum=shared.settings['chat_generation_attempts_max'], value=shared.settings['chat_generation_attempts'], step=1, label='Generation attempts (for longer replies)')
-                        shared.gradio['check'] = gr.Checkbox(value=shared.settings[f'stop_at_newline{suffix}'], label='Stop generating at new line character?')
+                        with gr.Tab('Raw'):
+                            shared.gradio['output_textbox'] = gr.Textbox(lines=25, label='Output')
+                        with gr.Tab('Markdown'):
+                            shared.gradio['markdown'] = gr.Markdown()
+                        with gr.Tab('HTML'):
+                            shared.gradio['html'] = gr.HTML()
+            with gr.Tab("Parameters", elem_id="parameters"):
+                create_settings_menus(default_preset)
 
-            create_settings_menus(default_preset)
+            shared.input_params = [shared.gradio[k] for k in ['textbox', 'max_new_tokens', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']]
+            output_params = [shared.gradio[k] for k in ['output_textbox', 'markdown', 'html']]
+            gen_events.append(shared.gradio['Generate'].click(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream, api_name='textgen'))
+            gen_events.append(shared.gradio['textbox'].submit(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream))
+            gen_events.append(shared.gradio['Continue'].click(generate_reply, [shared.gradio['output_textbox']] + shared.input_params[1:], output_params, show_progress=shared.args.no_stream))
+            shared.gradio['Stop'].click(None, None, None, cancels=gen_events)
+            shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.main_js}}}")
 
-        function_call = 'chat.cai_chatbot_wrapper' if shared.args.cai_chat else 'chat.chatbot_wrapper'
-        shared.input_params = [shared.gradio[k] for k in ['textbox', 'max_new_tokens', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping', 'name1', 'name2', 'context', 'check', 'chat_prompt_size_slider', 'chat_generation_attempts']]
+        with gr.Tab("Interface mode", elem_id="interface-mode"):
+            def set_interface_mode(mode, choices):
+                shared.args.extensions = choices
+                for k in ["notebook", "chat", "cai_chat"]:
+                    exec(f"shared.args.{k} = False")
+                if mode != "default":
+                    exec(f"shared.args.{mode} = True")
+                shared.need_restart = True
 
-        gen_events.append(shared.gradio['Generate'].click(eval(function_call), shared.input_params, shared.gradio['display'], show_progress=shared.args.no_stream))
-        gen_events.append(shared.gradio['textbox'].submit(eval(function_call), shared.input_params, shared.gradio['display'], show_progress=shared.args.no_stream))
-        gen_events.append(shared.gradio['Regenerate'].click(chat.regenerate_wrapper, shared.input_params, shared.gradio['display'], show_progress=shared.args.no_stream))
-        gen_events.append(shared.gradio['Impersonate'].click(chat.impersonate_wrapper, shared.input_params, shared.gradio['textbox'], show_progress=shared.args.no_stream))
-        shared.gradio['Stop'].click(chat.stop_everything_event, [], [], cancels=gen_events)
+            extensions = get_available_extensions()
+            modes = ["default", "notebook", "chat", "cai_chat"]
+            current_mode = "default"
+            for mode in modes:
+                if hasattr(shared.args, mode) and eval(f"shared.args.{mode}"):
+                    current_mode = mode
 
-        shared.gradio['Copy last reply'].click(chat.send_last_reply_to_input, [], shared.gradio['textbox'], show_progress=shared.args.no_stream)
-        shared.gradio['Replace last reply'].click(chat.replace_last_reply, [shared.gradio['textbox'], shared.gradio['name1'], shared.gradio['name2']], shared.gradio['display'], show_progress=shared.args.no_stream)
+            modes_menu = gr.Dropdown(choices=modes, value=current_mode, label="Mode")
+            group = gr.CheckboxGroup(choices=extensions, value=shared.args.extensions, label="Available extensions")
+            kill = gr.Button("Apply and restart the interface")
+            kill.click(set_interface_mode, [modes_menu, group], None)
+            kill.click(lambda : None, None, None, _js='() => {document.body.innerHTML=\'<h1 style="font-family:monospace;margin-top:20%;color:lightgray;text-align:center;">Reloading...</h1>\'; setTimeout(function(){location.reload()},2000)}')
 
-        # Clear history with confirmation
-        clear_arr = [shared.gradio[k] for k in ['Clear history-confirm', 'Clear history', 'Clear history-cancel']]
-        shared.gradio['Clear history'].click(lambda :[gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, clear_arr)
-        shared.gradio['Clear history-confirm'].click(lambda :[gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, clear_arr)
-        shared.gradio['Clear history-confirm'].click(chat.clear_chat_log, [shared.gradio['name1'], shared.gradio['name2']], shared.gradio['display'])
-        shared.gradio['Clear history-cancel'].click(lambda :[gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, clear_arr)
-
-        shared.gradio['Remove last'].click(chat.remove_last_message, [shared.gradio['name1'], shared.gradio['name2']], [shared.gradio['display'], shared.gradio['textbox']], show_progress=False)
-        shared.gradio['download_button'].click(chat.save_history, inputs=[], outputs=[shared.gradio['download']])
-        shared.gradio['Upload character'].click(chat.upload_character, [shared.gradio['upload_json'], shared.gradio['upload_img_bot']], [shared.gradio['character_menu']])
-
-        # Clearing stuff and saving the history
-        for i in ['Generate', 'Regenerate', 'Replace last reply']:
-            shared.gradio[i].click(lambda x: '', shared.gradio['textbox'], shared.gradio['textbox'], show_progress=False)
-            shared.gradio[i].click(lambda : chat.save_history(timestamp=False), [], [], show_progress=False)
-        shared.gradio['Clear history-confirm'].click(lambda : chat.save_history(timestamp=False), [], [], show_progress=False)
-        shared.gradio['textbox'].submit(lambda x: '', shared.gradio['textbox'], shared.gradio['textbox'], show_progress=False)
-        shared.gradio['textbox'].submit(lambda : chat.save_history(timestamp=False), [], [], show_progress=False)
-
-        shared.gradio['character_menu'].change(chat.load_character, [shared.gradio['character_menu'], shared.gradio['name1'], shared.gradio['name2']], [shared.gradio['name2'], shared.gradio['context'], shared.gradio['display']])
-        shared.gradio['upload_chat_history'].upload(chat.load_history, [shared.gradio['upload_chat_history'], shared.gradio['name1'], shared.gradio['name2']], [])
-        shared.gradio['upload_img_tavern'].upload(chat.upload_tavern_character, [shared.gradio['upload_img_tavern'], shared.gradio['name1'], shared.gradio['name2']], [shared.gradio['character_menu']])
-        shared.gradio['upload_img_me'].upload(chat.upload_your_profile_picture, [shared.gradio['upload_img_me']], [])
-
-        reload_func = chat.redraw_html if shared.args.cai_chat else lambda : shared.history['visible']
-        reload_inputs = [shared.gradio['name1'], shared.gradio['name2']] if shared.args.cai_chat else []
-        shared.gradio['upload_chat_history'].upload(reload_func, reload_inputs, [shared.gradio['display']])
-        shared.gradio['upload_img_me'].upload(reload_func, reload_inputs, [shared.gradio['display']])
-        shared.gradio['Stop'].click(reload_func, reload_inputs, [shared.gradio['display']])
-
-        shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.main_js+ui.chat_js}}}")
-        shared.gradio['interface'].load(lambda : chat.load_default_history(shared.settings[f'name1{suffix}'], shared.settings[f'name2{suffix}']), None, None)
-        shared.gradio['interface'].load(reload_func, reload_inputs, [shared.gradio['display']], show_progress=True)
-
-    elif shared.args.notebook:
-        with gr.Tab("Text generation", elem_id="main"):
-            with gr.Tab('Raw'):
-                shared.gradio['textbox'] = gr.Textbox(value=default_text, lines=25)
-            with gr.Tab('Markdown'):
-                shared.gradio['markdown'] = gr.Markdown()
-            with gr.Tab('HTML'):
-                shared.gradio['html'] = gr.HTML()
-
-            with gr.Row():
-                shared.gradio['Stop'] = gr.Button('Stop')
-                shared.gradio['Generate'] = gr.Button('Generate')
-            shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens'])
-
-            create_model_and_preset_menus()
-        with gr.Tab("Settings", elem_id="settings"):
-            create_settings_menus(default_preset)
-
-        shared.input_params = [shared.gradio[k] for k in ['textbox', 'max_new_tokens', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']]
-        output_params = [shared.gradio[k] for k in ['textbox', 'markdown', 'html']]
-        gen_events.append(shared.gradio['Generate'].click(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream, api_name='textgen'))
-        gen_events.append(shared.gradio['textbox'].submit(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream))
-        shared.gradio['Stop'].click(None, None, None, cancels=gen_events)
-        shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.main_js}}}")
+        if shared.args.extensions is not None:
+            extensions_module.create_extensions_block()
 
+    # Launch the interface
+    shared.gradio['interface'].queue()
+    if shared.args.listen:
+        shared.gradio['interface'].launch(prevent_thread_lock=True, share=shared.args.share, server_name='0.0.0.0', server_port=shared.args.listen_port, inbrowser=shared.args.auto_launch)
     else:
-        with gr.Tab("Text generation", elem_id="main"):
-            with gr.Row():
-                with gr.Column():
-                    shared.gradio['textbox'] = gr.Textbox(value=default_text, lines=15, label='Input')
-                    shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens'])
-                    shared.gradio['Generate'] = gr.Button('Generate')
-                    with gr.Row():
-                        with gr.Column():
-                            shared.gradio['Continue'] = gr.Button('Continue')
-                        with gr.Column():
-                            shared.gradio['Stop'] = gr.Button('Stop')
+        shared.gradio['interface'].launch(prevent_thread_lock=True, share=shared.args.share, server_port=shared.args.listen_port, inbrowser=shared.args.auto_launch)
 
-                    create_model_and_preset_menus()
+create_interface()
 
-                with gr.Column():
-                    with gr.Tab('Raw'):
-                        shared.gradio['output_textbox'] = gr.Textbox(lines=25, label='Output')
-                    with gr.Tab('Markdown'):
-                        shared.gradio['markdown'] = gr.Markdown()
-                    with gr.Tab('HTML'):
-                        shared.gradio['html'] = gr.HTML()
-        with gr.Tab("Settings", elem_id="settings"):
-            create_settings_menus(default_preset)
-
-        shared.input_params = [shared.gradio[k] for k in ['textbox', 'max_new_tokens', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']]
-        output_params = [shared.gradio[k] for k in ['output_textbox', 'markdown', 'html']]
-        gen_events.append(shared.gradio['Generate'].click(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream, api_name='textgen'))
-        gen_events.append(shared.gradio['textbox'].submit(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream))
-        gen_events.append(shared.gradio['Continue'].click(generate_reply, [shared.gradio['output_textbox']] + shared.input_params[1:], output_params, show_progress=shared.args.no_stream))
-        shared.gradio['Stop'].click(None, None, None, cancels=gen_events)
-        shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.main_js}}}")
-
-    if shared.args.extensions is not None:
-        extensions_module.create_extensions_block()
-
-shared.gradio['interface'].queue()
-if shared.args.listen:
-    shared.gradio['interface'].launch(prevent_thread_lock=True, share=shared.args.share, server_name='0.0.0.0', server_port=shared.args.listen_port, inbrowser=shared.args.auto_launch)
-else:
-    shared.gradio['interface'].launch(prevent_thread_lock=True, share=shared.args.share, server_port=shared.args.listen_port, inbrowser=shared.args.auto_launch)
-
-# I think that I will need this later
 while True:
     time.sleep(0.5)
+    if shared.need_restart:
+        shared.need_restart = False
+        shared.gradio['interface'].close()
+        create_interface()

From 599d3139fd71d9a5647374043b33f2d5221f2642 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 15 Mar 2023 23:34:08 -0300
Subject: [PATCH 38/89] Increase the reload timeout a bit

---
 server.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/server.py b/server.py
index 7af8bd3e..94fec6b9 100644
--- a/server.py
+++ b/server.py
@@ -403,7 +403,7 @@ def create_interface():
             group = gr.CheckboxGroup(choices=extensions, value=shared.args.extensions, label="Available extensions")
             kill = gr.Button("Apply and restart the interface")
             kill.click(set_interface_mode, [modes_menu, group], None)
-            kill.click(lambda : None, None, None, _js='() => {document.body.innerHTML=\'<h1 style="font-family:monospace;margin-top:20%;color:lightgray;text-align:center;">Reloading...</h1>\'; setTimeout(function(){location.reload()},2000)}')
+            kill.click(lambda : None, None, None, _js='() => {document.body.innerHTML=\'<h1 style="font-family:monospace;margin-top:20%;color:lightgray;text-align:center;">Reloading...</h1>\'; setTimeout(function(){location.reload()},2500)}')
 
         if shared.args.extensions is not None:
             extensions_module.create_extensions_block()

From 25a00eaf9809b5dd69d8da34f70bcb41ad2254ae Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 15 Mar 2023 23:43:35 -0300
Subject: [PATCH 39/89] Add "Experimental" warning

---
 server.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/server.py b/server.py
index 94fec6b9..1e85e4f6 100644
--- a/server.py
+++ b/server.py
@@ -399,6 +399,7 @@ def create_interface():
                 if hasattr(shared.args, mode) and eval(f"shared.args.{mode}"):
                     current_mode = mode
 
+            gr.Markdown("*Experimental*")
             modes_menu = gr.Dropdown(choices=modes, value=current_mode, label="Mode")
             group = gr.CheckboxGroup(choices=extensions, value=shared.args.extensions, label="Available extensions")
             kill = gr.Button("Apply and restart the interface")

From a577fb1077ebbf2631276c5fc408fe4aff93d208 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 16 Mar 2023 00:46:59 -0300
Subject: [PATCH 40/89] Keep GALACTICA special tokens (#300)

---
 modules/text_generation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/text_generation.py b/modules/text_generation.py
index a29b987f..e5b4ad91 100644
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@@ -38,7 +38,7 @@ def encode(prompt, tokens_to_generate=0, add_special_tokens=True):
 
 def decode(output_ids):
     # Open Assistant relies on special tokens like <|endoftext|>
-    if re.match('oasst-*', shared.model_name.lower()):
+    if re.match('(oasst|galactica)-*', shared.model_name.lower()):
         return shared.tokenizer.decode(output_ids, skip_special_tokens=False)
     else:
         reply = shared.tokenizer.decode(output_ids, skip_special_tokens=True)

From 1c378965e191558e8808c7b9afde9753a0a9e343 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 16 Mar 2023 10:18:34 -0300
Subject: [PATCH 41/89] Remove unused imports

---
 extensions/elevenlabs_tts/script.py | 4 ++--
 modules/models.py                   | 3 +--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/extensions/elevenlabs_tts/script.py b/extensions/elevenlabs_tts/script.py
index 90d61efc..b8171063 100644
--- a/extensions/elevenlabs_tts/script.py
+++ b/extensions/elevenlabs_tts/script.py
@@ -1,8 +1,8 @@
 from pathlib import Path
 
 import gradio as gr
-from elevenlabslib import *
-from elevenlabslib.helpers import *
+from elevenlabslib import ElevenLabsUser
+from elevenlabslib.helpers import save_bytes_to_path
 
 params = {
     'activate': True,
diff --git a/modules/models.py b/modules/models.py
index a934514b..2a7dca62 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -16,8 +16,7 @@ transformers.logging.set_verbosity_error()
 local_rank = None
 
 if shared.args.flexgen:
-    from flexgen.flex_opt import (CompressionConfig, ExecutionEnv, OptLM,
-                                  Policy, str2bool)
+    from flexgen.flex_opt import CompressionConfig, ExecutionEnv, OptLM, Policy
 
 if shared.args.deepspeed:
     import deepspeed

From d54f3f4a34b7a1995a8ac98c9a094816386d165e Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 16 Mar 2023 10:19:00 -0300
Subject: [PATCH 42/89] Add no-stream checkbox to the interface

---
 server.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/server.py b/server.py
index 1e85e4f6..8deb1755 100644
--- a/server.py
+++ b/server.py
@@ -384,8 +384,9 @@ def create_interface():
             shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.main_js}}}")
 
         with gr.Tab("Interface mode", elem_id="interface-mode"):
-            def set_interface_mode(mode, choices):
+            def set_interface_mode(mode, choices, stream):
                 shared.args.extensions = choices
+                shared.args.no_stream = stream
                 for k in ["notebook", "chat", "cai_chat"]:
                     exec(f"shared.args.{k} = False")
                 if mode != "default":
@@ -402,8 +403,11 @@ def create_interface():
             gr.Markdown("*Experimental*")
             modes_menu = gr.Dropdown(choices=modes, value=current_mode, label="Mode")
             group = gr.CheckboxGroup(choices=extensions, value=shared.args.extensions, label="Available extensions")
+
+            with gr.Box():
+                stream = gr.Checkbox(label='no-stream', value=shared.args.no_stream)
             kill = gr.Button("Apply and restart the interface")
-            kill.click(set_interface_mode, [modes_menu, group], None)
+            kill.click(set_interface_mode, [modes_menu, group, stream], None)
             kill.click(lambda : None, None, None, _js='() => {document.body.innerHTML=\'<h1 style="font-family:monospace;margin-top:20%;color:lightgray;text-align:center;">Reloading...</h1>\'; setTimeout(function(){location.reload()},2500)}')
 
         if shared.args.extensions is not None:

From 23a5e886e1aa6849e0819256c3bb4b2bf7d8358e Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 16 Mar 2023 11:16:17 -0300
Subject: [PATCH 43/89] The LLaMA PR has been merged into transformers

https://github.com/huggingface/transformers/pull/21955

The tokenizer class has been changed from

"LLaMATokenizer"

to

"LlamaTokenizer"

It is necessary to edit this change in every tokenizer_config.json
that you had for LLaMA so far.
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 4994ca1a..b9a9b385 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,4 +9,4 @@ rwkv==0.4.2
 safetensors==0.3.0
 sentencepiece
 tqdm
-git+https://github.com/zphang/transformers.git@68d640f7c368bcaaaecfc678f11908ebbd3d6176
+git+https://github.com/huggingface/transformers

From 83cb20aad85d3c35f8cc88f86183fa5320d3ec9e Mon Sep 17 00:00:00 2001
From: awoo <awoo@awoo>
Date: Thu, 16 Mar 2023 18:42:53 +0300
Subject: [PATCH 44/89] Add support for --gpu-memory witn --load-in-8bit

---
 modules/models.py | 63 ++++++++++++++++++++++++++++++++---------------
 1 file changed, 43 insertions(+), 20 deletions(-)

diff --git a/modules/models.py b/modules/models.py
index 2a7dca62..ea5fe757 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -7,7 +7,8 @@ from pathlib import Path
 import numpy as np
 import torch
 import transformers
-from transformers import AutoModelForCausalLM, AutoTokenizer
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoConfig
+from accelerate import infer_auto_device_map, init_empty_weights, load_checkpoint_and_dispatch
 
 import modules.shared as shared
 
@@ -94,39 +95,61 @@ def load_model(model_name):
 
     # Custom
     else:
-        command = "AutoModelForCausalLM.from_pretrained"
-        params = ["low_cpu_mem_usage=True"]
+        params = {"low_cpu_mem_usage": True}
         if not shared.args.cpu and not torch.cuda.is_available():
             print("Warning: torch.cuda.is_available() returned False.\nThis means that no GPU has been detected.\nFalling back to CPU mode.\n")
             shared.args.cpu = True
 
         if shared.args.cpu:
-            params.append("low_cpu_mem_usage=True")
-            params.append("torch_dtype=torch.float32")
+            params["torch_dtype"] = torch.float32
         else:
-            params.append("device_map='auto'")
-            params.append("load_in_8bit=True" if shared.args.load_in_8bit else "torch_dtype=torch.bfloat16" if shared.args.bf16 else "torch_dtype=torch.float16")
+            params["device_map"] = 'auto'
+            if shared.args.load_in_8bit:
+                params['quantization_config'] = BitsAndBytesConfig(load_in_8bit=True, llm_int8_enable_fp32_cpu_offload=True)
+            elif shared.args.bf16:
+                params["torch_dtype"] = torch.bfloat16
+            else:
+                params["torch_dtype"] = torch.float16
 
             if shared.args.gpu_memory:
                 memory_map = shared.args.gpu_memory
-                max_memory = f"max_memory={{0: '{memory_map[0]}GiB'"
+                max_memory = { 0: f'{memory_map[0]}GiB' }
                 for i in range(1, len(memory_map)):
-                    max_memory += (f", {i}: '{memory_map[i]}GiB'")
-                max_memory += (f", 'cpu': '{shared.args.cpu_memory or '99'}GiB'}}")
-                params.append(max_memory)
-            elif not shared.args.load_in_8bit:
-                total_mem = (torch.cuda.get_device_properties(0).total_memory/(1024*1024))
-                suggestion = round((total_mem-1000)/1000)*1000
-                if total_mem-suggestion < 800:
+                    max_memory[i] = f'{memory_map[i]}GiB'
+                max_memory['cpu'] = f'{shared.args.cpu_memory or 99}GiB'
+                params['max_memory'] = max_memory
+            else:
+                total_mem = (torch.cuda.get_device_properties(0).total_memory / (1024 * 1024))
+                suggestion = round((total_mem - 1000) / 1000) * 1000
+                if total_mem - suggestion < 800:
                     suggestion -= 1000
                 suggestion = int(round(suggestion/1000))
                 print(f"\033[1;32;1mAuto-assiging --gpu-memory {suggestion} for your GPU to try to prevent out-of-memory errors.\nYou can manually set other values.\033[0;37;0m")
-                params.append(f"max_memory={{0: '{suggestion}GiB', 'cpu': '{shared.args.cpu_memory or '99'}GiB'}}")
-            if shared.args.disk:
-                params.append(f"offload_folder='{shared.args.disk_cache_dir}'")
+                
+                max_memory = { 
+                    0: f'{suggestion}GiB',
+                    'cpu': f'{shared.args.cpu_memory or 99}GiB'
+                }
+                params['max_memory'] = max_memory
 
-        command = f"{command}(Path(f'models/{shared.model_name}'), {', '.join(set(params))})"
-        model = eval(command)
+            if shared.args.disk:
+                params["offload_folder"] = shared.args.disk_cache_dir
+
+        checkpoint = Path(f'models/{shared.model_name}')
+
+        if shared.args.load_in_8bit and params.get('max_memory', None) is not None and params['device_map'] == 'auto':
+            config = AutoConfig.from_pretrained(checkpoint)
+            with init_empty_weights():
+                model = AutoModelForCausalLM.from_config(config)
+            model.tie_weights()
+            params['device_map'] = infer_auto_device_map(
+                model, 
+                dtype=torch.int8, 
+                max_memory=params['max_memory'],
+                no_split_module_classes = model._no_split_modules
+            )
+
+        model = AutoModelForCausalLM.from_pretrained(checkpoint, **params)
 
     # Loading the tokenizer
     if shared.model_name.lower().startswith(('gpt4chan', 'gpt-4chan', '4chan')) and Path("models/gpt-j-6B/").exists():

From 38d701765759e079a958586a7ef9a1694d12d978 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 16 Mar 2023 12:44:03 -0300
Subject: [PATCH 45/89] Add all command-line flags to "Interface mode"

---
 server.py | 49 ++++++++++++++++++++++++++++++-------------------
 1 file changed, 30 insertions(+), 19 deletions(-)

diff --git a/server.py b/server.py
index 8deb1755..2024fd42 100644
--- a/server.py
+++ b/server.py
@@ -159,6 +159,24 @@ def create_settings_menus(default_preset):
     shared.gradio['softprompts_menu'].change(load_soft_prompt, [shared.gradio['softprompts_menu']], [shared.gradio['softprompts_menu']], show_progress=True)
     shared.gradio['upload_softprompt'].upload(upload_soft_prompt, [shared.gradio['upload_softprompt']], [shared.gradio['softprompts_menu']])
 
+def set_interface_arguments(interface_mode, extensions, cmd_active):
+    modes = ["default", "notebook", "chat", "cai_chat"]
+    cmd_list = vars(shared.args)
+    cmd_list = [k for k in cmd_list if type(cmd_list[k]) is bool and k not in modes]
+
+    shared.args.extensions = extensions
+    for k in modes[1:]:
+        exec(f"shared.args.{k} = False")
+    if interface_mode != "default":
+        exec(f"shared.args.{interface_mode} = True")
+
+    for k in cmd_list:
+        exec(f"shared.args.{k} = False")
+    for k in cmd_active:
+        exec(f"shared.args.{k} = True")
+
+    shared.need_restart = True
+
 available_models = get_available_models()
 available_presets = get_available_presets()
 available_characters = get_available_characters()
@@ -384,31 +402,24 @@ def create_interface():
             shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.main_js}}}")
 
         with gr.Tab("Interface mode", elem_id="interface-mode"):
-            def set_interface_mode(mode, choices, stream):
-                shared.args.extensions = choices
-                shared.args.no_stream = stream
-                for k in ["notebook", "chat", "cai_chat"]:
-                    exec(f"shared.args.{k} = False")
-                if mode != "default":
-                    exec(f"shared.args.{mode} = True")
-                shared.need_restart = True
-
-            extensions = get_available_extensions()
             modes = ["default", "notebook", "chat", "cai_chat"]
             current_mode = "default"
-            for mode in modes:
-                if hasattr(shared.args, mode) and eval(f"shared.args.{mode}"):
+            for mode in modes[1:]:
+                if eval(f"shared.args.{mode}"):
                     current_mode = mode
+                    break
+            cmd_list = vars(shared.args)
+            cmd_list = [k for k in cmd_list if type(cmd_list[k]) is bool and k not in modes]
+            active_cmd_list = [k for k in cmd_list if vars(shared.args)[k]]
 
             gr.Markdown("*Experimental*")
-            modes_menu = gr.Dropdown(choices=modes, value=current_mode, label="Mode")
-            group = gr.CheckboxGroup(choices=extensions, value=shared.args.extensions, label="Available extensions")
+            shared.gradio['interface_modes_menu'] = gr.Dropdown(choices=modes, value=current_mode, label="Mode")
+            shared.gradio['extensions_menu'] = gr.CheckboxGroup(choices=get_available_extensions(), value=shared.args.extensions, label="Available extensions")
+            shared.gradio['cmd_arguments_menu'] = gr.CheckboxGroup(choices=cmd_list, value=active_cmd_list, label="Boolean command-line flags")
+            shared.gradio['reset_interface'] = gr.Button("Apply and restart the interface", type="primary")
 
-            with gr.Box():
-                stream = gr.Checkbox(label='no-stream', value=shared.args.no_stream)
-            kill = gr.Button("Apply and restart the interface")
-            kill.click(set_interface_mode, [modes_menu, group, stream], None)
-            kill.click(lambda : None, None, None, _js='() => {document.body.innerHTML=\'<h1 style="font-family:monospace;margin-top:20%;color:lightgray;text-align:center;">Reloading...</h1>\'; setTimeout(function(){location.reload()},2500)}')
+            shared.gradio['reset_interface'].click(set_interface_arguments, [shared.gradio[k] for k in ['interface_modes_menu', 'extensions_menu', 'cmd_arguments_menu']], None)
+            shared.gradio['reset_interface'].click(lambda : None, None, None, _js='() => {document.body.innerHTML=\'<h1 style="font-family:monospace;margin-top:20%;color:lightgray;text-align:center;">Reloading...</h1>\'; setTimeout(function(){location.reload()},2500)}')
 
         if shared.args.extensions is not None:
             extensions_module.create_extensions_block()

From dd1c5963da30c862faa5564e423195e98e7e460e Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 16 Mar 2023 12:45:27 -0300
Subject: [PATCH 46/89] Update README

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 992d96ee..4641647c 100644
--- a/README.md
+++ b/README.md
@@ -193,7 +193,7 @@ Before reporting a bug, make sure that you have:
 
 ## Credits
 
-- Gradio dropdown menu refresh button: https://github.com/AUTOMATIC1111/stable-diffusion-webui
+- Gradio dropdown menu refresh button, code for reloading the interface: https://github.com/AUTOMATIC1111/stable-diffusion-webui
 - Verbose preset: Anonymous 4chan user.
 - NovelAI and KoboldAI presets: https://github.com/KoboldAI/KoboldAI-Client/wiki/Settings-Presets
 - Pygmalion preset, code for early stopping in chat mode, code for some of the sliders, --chat mode colors: https://github.com/PygmalionAI/gradio-ui/

From e085cb4333617749d291c27ecefdb29d0e78e342 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 16 Mar 2023 13:34:23 -0300
Subject: [PATCH 47/89] Small changes

---
 modules/models.py | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/modules/models.py b/modules/models.py
index ea5fe757..23eab64f 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -7,8 +7,9 @@ from pathlib import Path
 import numpy as np
 import torch
 import transformers
-from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoConfig
-from accelerate import infer_auto_device_map, init_empty_weights, load_checkpoint_and_dispatch
+from accelerate import infer_auto_device_map, init_empty_weights
+from transformers import (AutoConfig, AutoModelForCausalLM, AutoTokenizer,
+                          BitsAndBytesConfig)
 
 import modules.shared as shared
 
@@ -113,23 +114,20 @@ def load_model(model_name):
 
             if shared.args.gpu_memory:
                 memory_map = shared.args.gpu_memory
-                max_memory = { 0: f'{memory_map[0]}GiB' }
-                for i in range(1, len(memory_map)):
+                max_memory = {}
+                for i in range(len(memory_map)):
                     max_memory[i] = f'{memory_map[i]}GiB'
                 max_memory['cpu'] = f'{shared.args.cpu_memory or 99}GiB'
                 params['max_memory'] = max_memory
             else:
-                total_mem = (torch.cuda.get_device_properties(0).total_memory / (1024 * 1024))
-                suggestion = round((total_mem - 1000) / 1000) * 1000
+                total_mem = (torch.cuda.get_device_properties(0).total_memory / (1024*1024))
+                suggestion = round((total_mem-1000) / 1000) * 1000
                 if total_mem - suggestion < 800:
                     suggestion -= 1000
                 suggestion = int(round(suggestion/1000))
                 print(f"\033[1;32;1mAuto-assiging --gpu-memory {suggestion} for your GPU to try to prevent out-of-memory errors.\nYou can manually set other values.\033[0;37;0m")
                 
-                max_memory = { 
-                    0: f'{suggestion}GiB',
-                    'cpu': f'{shared.args.cpu_memory or 99}GiB'
-                }
+                max_memory = {0: f'{suggestion}GiB', 'cpu': f'{shared.args.cpu_memory or 99}GiB'}
                 params['max_memory'] = max_memory
 
             if shared.args.disk:

From ee164d1821c73f4cc7359d05a68cde5d624766f1 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 16 Mar 2023 18:22:16 -0300
Subject: [PATCH 48/89] Don't split the layers in 8-bit mode by default

---
 modules/models.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/modules/models.py b/modules/models.py
index 23eab64f..63060d43 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -105,8 +105,10 @@ def load_model(model_name):
             params["torch_dtype"] = torch.float32
         else:
             params["device_map"] = 'auto'
-            if shared.args.load_in_8bit:
+            if shared.args.load_in_8bit and any((shared.args.auto_devices, shared.args.gpu_memory)):
                 params['quantization_config'] = BitsAndBytesConfig(load_in_8bit=True, llm_int8_enable_fp32_cpu_offload=True)
+            elif shared.args.load_in_8bit:
+                params['quantization_config'] = BitsAndBytesConfig(load_in_8bit=True)
             elif shared.args.bf16:
                 params["torch_dtype"] = torch.bfloat16
             else:
@@ -119,7 +121,7 @@ def load_model(model_name):
                     max_memory[i] = f'{memory_map[i]}GiB'
                 max_memory['cpu'] = f'{shared.args.cpu_memory or 99}GiB'
                 params['max_memory'] = max_memory
-            else:
+            elif shared.args.auto_devices:
                 total_mem = (torch.cuda.get_device_properties(0).total_memory / (1024*1024))
                 suggestion = round((total_mem-1000) / 1000) * 1000
                 if total_mem - suggestion < 800:

From 104293f411cd517babf19ecb7d80031b9e6df5f6 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 16 Mar 2023 21:31:39 -0300
Subject: [PATCH 49/89] Add LoRA support

---
 css/main.css      | 11 ++++++++++-
 download-model.py | 17 +++++++++++------
 modules/models.py |  2 ++
 modules/shared.py |  3 ++-
 requirements.txt  |  1 +
 server.py         | 25 +++++++++++++++++++++++++
 6 files changed, 51 insertions(+), 8 deletions(-)

diff --git a/css/main.css b/css/main.css
index f5ccfe94..87c3bded 100644
--- a/css/main.css
+++ b/css/main.css
@@ -1,12 +1,15 @@
 .tabs.svelte-710i53 {
     margin-top: 0
 }
+
 .py-6 {
     padding-top: 2.5rem
 }
+
 .dark #refresh-button {
     background-color: #ffffff1f;
 }
+
 #refresh-button {
   flex: none;
   margin: 0;
@@ -17,22 +20,28 @@
   border-radius: 10px;
   background-color: #0000000d;
 }
+
 #download-label, #upload-label {
   min-height: 0
 }
+
 #accordion {
 }
+
 .dark svg {
   fill: white;
 }
+
 svg {
   display: unset !important;
   vertical-align: middle !important;
   margin: 5px;
 }
+
 ol li p, ul li p {
     display: inline-block;
 }
-#main, #parameters, #chat-settings, #interface-mode {
+
+#main, #parameters, #chat-settings, #interface-mode, #lora {
   border: 0;
 }
diff --git a/download-model.py b/download-model.py
index 8be398c4..808b9fc2 100644
--- a/download-model.py
+++ b/download-model.py
@@ -101,6 +101,7 @@ def get_download_links_from_huggingface(model, branch):
     classifications = []
     has_pytorch = False
     has_safetensors = False
+    is_lora = False
     while True:
         content = requests.get(f"{base}{page}{cursor.decode()}").content
 
@@ -110,8 +111,10 @@ def get_download_links_from_huggingface(model, branch):
 
         for i in range(len(dict)):
             fname = dict[i]['path']
+            if not is_lora and fname.endswith(('adapter_config.json', 'adapter_model.bin')):
+                is_lora = True
 
-            is_pytorch = re.match("pytorch_model.*\.bin", fname)
+            is_pytorch = re.match("(pytorch|adapter)_model.*\.bin", fname)
             is_safetensors = re.match("model.*\.safetensors", fname)
             is_tokenizer = re.match("tokenizer.*\.model", fname)
             is_text = re.match(".*\.(txt|json)", fname) or is_tokenizer
@@ -130,6 +133,7 @@ def get_download_links_from_huggingface(model, branch):
                         has_pytorch = True
                         classifications.append('pytorch')
 
+
         cursor = base64.b64encode(f'{{"file_name":"{dict[-1]["path"]}"}}'.encode()) + b':50'
         cursor = base64.b64encode(cursor)
         cursor = cursor.replace(b'=', b'%3D')
@@ -140,7 +144,7 @@ def get_download_links_from_huggingface(model, branch):
             if classifications[i] == 'pytorch':
                 links.pop(i)
 
-    return links
+    return links, is_lora
 
 if __name__ == '__main__':
     model = args.MODEL
@@ -159,15 +163,16 @@ if __name__ == '__main__':
             except ValueError as err_branch:
                 print(f"Error: {err_branch}")
                 sys.exit()
+
+    links, is_lora = get_download_links_from_huggingface(model, branch)
+    base_folder = 'models' if not is_lora else 'loras'
     if branch != 'main':
-        output_folder = Path("models") / (model.split('/')[-1] + f'_{branch}')
+        output_folder = Path(base_folder) / (model.split('/')[-1] + f'_{branch}')
     else:
-        output_folder = Path("models") / model.split('/')[-1]
+        output_folder = Path(base_folder) / model.split('/')[-1]
     if not output_folder.exists():
         output_folder.mkdir()
 
-    links = get_download_links_from_huggingface(model, branch)
-
     # Downloading the files
     print(f"Downloading the model to {output_folder}")
     pool = multiprocessing.Pool(processes=args.threads)
diff --git a/modules/models.py b/modules/models.py
index 63060d43..6df67d3c 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -11,6 +11,8 @@ from accelerate import infer_auto_device_map, init_empty_weights
 from transformers import (AutoConfig, AutoModelForCausalLM, AutoTokenizer,
                           BitsAndBytesConfig)
 
+from peft import PeftModel
+
 import modules.shared as shared
 
 transformers.logging.set_verbosity_error()
diff --git a/modules/shared.py b/modules/shared.py
index da5efbd3..908455e1 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -2,7 +2,8 @@ import argparse
 
 model = None
 tokenizer = None
-model_name = ""
+model_name = "None"
+lora_name = "None"
 soft_prompt_tensor = None
 soft_prompt = False
 is_RWKV = False
diff --git a/requirements.txt b/requirements.txt
index b9a9b385..fcf000a9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,6 +4,7 @@ flexgen==0.1.7
 gradio==3.18.0
 markdown
 numpy
+peft==0.2.0
 requests
 rwkv==0.4.2
 safetensors==0.3.0
diff --git a/server.py b/server.py
index 2024fd42..dd35d9aa 100644
--- a/server.py
+++ b/server.py
@@ -17,6 +17,7 @@ import modules.ui as ui
 from modules.html_generator import generate_chat_html
 from modules.models import load_model, load_soft_prompt
 from modules.text_generation import generate_reply
+from modules.LoRA import add_lora_to_model
 
 # Loading custom settings
 settings_file = None
@@ -48,6 +49,9 @@ def get_available_extensions():
 def get_available_softprompts():
     return ['None'] + sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('softprompts').glob('*.zip'))), key=str.lower)
 
+def get_available_loras():
+    return ['None'] + sorted([item.name for item in list(Path('loras/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower)
+
 def load_model_wrapper(selected_model):
     if selected_model != shared.model_name:
         shared.model_name = selected_model
@@ -59,6 +63,13 @@ def load_model_wrapper(selected_model):
 
     return selected_model
 
+def load_lora_wrapper(selected_lora):
+    if not shared.args.cpu:
+        gc.collect()
+        torch.cuda.empty_cache()
+    add_lora_to_model(selected_lora)
+    return selected_lora
+
 def load_preset_values(preset_menu, return_dict=False):
     generate_params = {
         'do_sample': True,
@@ -181,6 +192,7 @@ available_models = get_available_models()
 available_presets = get_available_presets()
 available_characters = get_available_characters()
 available_softprompts = get_available_softprompts()
+available_loras = get_available_loras()
 
 # Default extensions
 extensions_module.available_extensions = get_available_extensions()
@@ -401,6 +413,19 @@ def create_interface():
             shared.gradio['Stop'].click(None, None, None, cancels=gen_events)
             shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.main_js}}}")
 
+        with gr.Tab("LoRA", elem_id="lora"):
+            with gr.Row():
+                with gr.Column():
+                    gr.Markdown("Load")
+                    with gr.Row():
+                        shared.gradio['lora_menu'] = gr.Dropdown(choices=available_loras, value=shared.lora_name, label='LoRA')
+                        ui.create_refresh_button(shared.gradio['lora_menu'], lambda : None, lambda : {'choices': get_available_loras()}, 'refresh-button')
+                with gr.Column():
+                    gr.Markdown("Train (TODO)")
+                    gr.Button("Practice your button clicking skills")
+
+            shared.gradio['lora_menu'].change(load_lora_wrapper, [shared.gradio['lora_menu']], [shared.gradio['lora_menu']], show_progress=True)
+
         with gr.Tab("Interface mode", elem_id="interface-mode"):
             modes = ["default", "notebook", "chat", "cai_chat"]
             current_mode = "default"

From 0cecfc684c6f5fa2531980d856c5ea56bc6f97ee Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 16 Mar 2023 21:35:53 -0300
Subject: [PATCH 50/89] Add files

---
 loras/place-your-loras-here.txt |  0
 modules/LoRA.py                 | 15 +++++++++++++++
 2 files changed, 15 insertions(+)
 create mode 100644 loras/place-your-loras-here.txt
 create mode 100644 modules/LoRA.py

diff --git a/loras/place-your-loras-here.txt b/loras/place-your-loras-here.txt
new file mode 100644
index 00000000..e69de29b
diff --git a/modules/LoRA.py b/modules/LoRA.py
new file mode 100644
index 00000000..84e128fb
--- /dev/null
+++ b/modules/LoRA.py
@@ -0,0 +1,15 @@
+from pathlib import Path
+
+from peft import PeftModel
+
+import modules.shared as shared
+from modules.models import load_model
+
+
+def add_lora_to_model(lora_name):
+
+    # Is there a more efficient way of returning to the base model?
+    if lora_name == "None":
+        shared.model, shared.tokenizer = load_model(shared.model_name)
+    else:
+        shared.model = PeftModel.from_pretrained(shared.model, Path(f"loras/{lora_name}"))

From 53b6a66beb7520eb332642907eb6f82df28581ca Mon Sep 17 00:00:00 2001
From: askmyteapot <62238146+askmyteapot@users.noreply.github.com>
Date: Fri, 17 Mar 2023 18:34:13 +1000
Subject: [PATCH 51/89] Update GPTQ_Loader.py

Correcting decoder layer for renamed class.
---
 modules/GPTQ_loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/GPTQ_loader.py b/modules/GPTQ_loader.py
index c2723490..662182e7 100644
--- a/modules/GPTQ_loader.py
+++ b/modules/GPTQ_loader.py
@@ -61,7 +61,7 @@ def load_quantized(model_name):
             max_memory[i] = f"{shared.args.gpu_memory[i]}GiB"
         max_memory['cpu'] = f"{shared.args.cpu_memory or '99'}GiB"
 
-        device_map = accelerate.infer_auto_device_map(model, max_memory=max_memory, no_split_module_classes=["LLaMADecoderLayer"])
+        device_map = accelerate.infer_auto_device_map(model, max_memory=max_memory, no_split_module_classes=["LlamaDecoderLayer"])
         model = accelerate.dispatch_model(model, device_map=device_map)
 
     # Single GPU

From 214dc6868ecb07c0c7c974a2b2afa48ce766e8ce Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 17 Mar 2023 11:24:52 -0300
Subject: [PATCH 52/89] Several QoL changes related to LoRA

---
 modules/shared.py      |  5 +++
 server.py              |  8 ++++-
 settings-template.json | 69 ++++++++++++++++++++++--------------------
 3 files changed, 48 insertions(+), 34 deletions(-)

diff --git a/modules/shared.py b/modules/shared.py
index 908455e1..9d4484c4 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -53,6 +53,10 @@ settings = {
         '^(gpt4chan|gpt-4chan|4chan)': '-----\n--- 865467536\nInput text\n--- 865467537\n',
         '(rosey|chip|joi)_.*_instruct.*': 'User: \n',
         'oasst-*': '<|prompter|>Write a story about future of AI development<|endoftext|><|assistant|>'
+    },
+    'lora_prompts': {
+        'default': 'Common sense questions and answers\n\nQuestion: \nFactual answer:',
+        'alpaca-lora-7b': "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction:\nWrite a Python script that generates text using the transformers library.\n### Response:\n"
     }
 }
 
@@ -68,6 +72,7 @@ def str2bool(v):
 
 parser = argparse.ArgumentParser(formatter_class=lambda prog: argparse.HelpFormatter(prog,max_help_position=54))
 parser.add_argument('--model', type=str, help='Name of the model to load by default.')
+parser.add_argument('--lora', type=str, help='Name of the LoRA to apply to the model by default.')
 parser.add_argument('--notebook', action='store_true', help='Launch the web UI in notebook mode, where the output is written to the same text box as the input.')
 parser.add_argument('--chat', action='store_true', help='Launch the web UI in chat mode.')
 parser.add_argument('--cai-chat', action='store_true', help='Launch the web UI in chat mode with a style similar to Character.AI\'s. If the file img_bot.png or img_bot.jpg exists in the same folder as server.py, this image will be used as the bot\'s profile picture. Similarly, img_me.png or img_me.jpg will be used as your profile picture.')
diff --git a/server.py b/server.py
index dd35d9aa..8dacc132 100644
--- a/server.py
+++ b/server.py
@@ -225,10 +225,16 @@ else:
         print()
     shared.model_name = available_models[i]
 shared.model, shared.tokenizer = load_model(shared.model_name)
+if shared.args.lora:
+    shared.lora_name = shared.args.lora
+    print(f"Adding the LoRA {shared.lora_name} to the model...")
+    add_lora_to_model(shared.lora_name)
 
 # Default UI settings
 default_preset = shared.settings['presets'][next((k for k in shared.settings['presets'] if re.match(k.lower(), shared.model_name.lower())), 'default')]
-default_text = shared.settings['prompts'][next((k for k in shared.settings['prompts'] if re.match(k.lower(), shared.model_name.lower())), 'default')]
+default_text = shared.settings['lora_prompts'][next((k for k in shared.settings['lora_prompts'] if re.match(k.lower(), shared.lora_name.lower())), 'default')]
+if default_text == '':
+    default_text = shared.settings['prompts'][next((k for k in shared.settings['prompts'] if re.match(k.lower(), shared.model_name.lower())), 'default')]
 title ='Text generation web UI'
 description = '\n\n# Text generation lab\nGenerate text using Large Language Models.\n'
 suffix = '_pygmalion' if 'pygmalion' in shared.model_name.lower() else ''
diff --git a/settings-template.json b/settings-template.json
index 9da43970..3d1129ad 100644
--- a/settings-template.json
+++ b/settings-template.json
@@ -1,35 +1,38 @@
 {
-    "max_new_tokens": 200,
-    "max_new_tokens_min": 1,
-    "max_new_tokens_max": 2000,
-    "name1": "Person 1",
-    "name2": "Person 2",
-    "context": "This is a conversation between two people.",
-    "stop_at_newline": true,
-    "chat_prompt_size": 2048,
-    "chat_prompt_size_min": 0,
-    "chat_prompt_size_max": 2048,
-    "chat_generation_attempts": 1,
-    "chat_generation_attempts_min": 1,
-    "chat_generation_attempts_max": 5,
-    "name1_pygmalion": "You",
-    "name2_pygmalion": "Kawaii",
-    "context_pygmalion": "Kawaii's persona: Kawaii is a cheerful person who loves to make others smile. She is an optimist who loves to spread happiness and positivity wherever she goes.\n<START>",
-    "stop_at_newline_pygmalion": false,
-    "default_extensions": [],
-    "chat_default_extensions": [
-        "gallery"
-    ],
-    "presets": {
-        "default": "NovelAI-Sphinx Moth",
-        "pygmalion-*": "Pygmalion",
-        "RWKV-*": "Naive",
-        "(rosey|chip|joi)_.*_instruct.*": "Instruct Joi (Contrastive Search)"
-    },
-    "prompts": {
-        "default": "Common sense questions and answers\n\nQuestion: \nFactual answer:",
-        "^(gpt4chan|gpt-4chan|4chan)": "-----\n--- 865467536\nInput text\n--- 865467537\n",
-        "(rosey|chip|joi)_.*_instruct.*": "User: \n",
-        "oasst-*": "<|prompter|>Write a story about future of AI development<|endoftext|><|assistant|>"
-    }
+  "max_new_tokens": 200,
+  "max_new_tokens_min": 1,
+  "max_new_tokens_max": 2000,
+  "name1": "Person 1",
+  "name2": "Person 2",
+  "context": "This is a conversation between two people.",
+  "stop_at_newline": true,
+  "chat_prompt_size": 2048,
+  "chat_prompt_size_min": 0,
+  "chat_prompt_size_max": 2048,
+  "chat_generation_attempts": 1,
+  "chat_generation_attempts_min": 1,
+  "chat_generation_attempts_max": 5,
+  "name1_pygmalion": "You",
+  "name2_pygmalion": "Kawaii",
+  "context_pygmalion": "Kawaii's persona: Kawaii is a cheerful person who loves to make others smile. She is an optimist who loves to spread happiness and positivity wherever she goes.\n<START>",
+  "stop_at_newline_pygmalion": false,
+  "default_extensions": [],
+  "chat_default_extensions": [
+    "gallery"
+  ],
+  "presets": {
+    "default": "NovelAI-Sphinx Moth",
+    "pygmalion-*": "Pygmalion",
+    "RWKV-*": "Naive"
+  },
+  "prompts": {
+    "default": "Common sense questions and answers\n\nQuestion: \nFactual answer:",
+    "^(gpt4chan|gpt-4chan|4chan)": "-----\n--- 865467536\nInput text\n--- 865467537\n",
+    "(rosey|chip|joi)_.*_instruct.*": "User: \n",
+    "oasst-*": "<|prompter|>Write a story about future of AI development<|endoftext|><|assistant|>"
+  },
+  "lora_prompts": {
+    "default": "Common sense questions and answers\n\nQuestion: \nFactual answer:",
+    "alpaca-lora-7b": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction:\nWrite a Python script that generates text using the transformers library.\n### Response:\n"
+  }
 }

From 29fe7b1c74c9dc583c60c9865bb93854a04a8e4c Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 17 Mar 2023 11:39:48 -0300
Subject: [PATCH 53/89] Remove LoRA tab, move it into the Parameters menu

---
 modules/LoRA.py   |  2 ++
 modules/shared.py |  2 +-
 server.py         | 26 +++++++++++---------------
 3 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/modules/LoRA.py b/modules/LoRA.py
index 84e128fb..c95da6ee 100644
--- a/modules/LoRA.py
+++ b/modules/LoRA.py
@@ -10,6 +10,8 @@ def add_lora_to_model(lora_name):
 
     # Is there a more efficient way of returning to the base model?
     if lora_name == "None":
+        print(f"Reloading the model to remove the LoRA...")
         shared.model, shared.tokenizer = load_model(shared.model_name)
     else:
+        print(f"Adding the LoRA {lora_name} to the model...")
         shared.model = PeftModel.from_pretrained(shared.model, Path(f"loras/{lora_name}"))
diff --git a/modules/shared.py b/modules/shared.py
index 9d4484c4..488a1e96 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -56,7 +56,7 @@ settings = {
     },
     'lora_prompts': {
         'default': 'Common sense questions and answers\n\nQuestion: \nFactual answer:',
-        'alpaca-lora-7b': "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction:\nWrite a Python script that generates text using the transformers library.\n### Response:\n"
+        'alpaca-lora-7b': "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction:\nWrite a poem about the transformers Python library. \nMention the word \"large language models\" in that poem.\n### Response:\n"
     }
 }
 
diff --git a/server.py b/server.py
index 8dacc132..7d5ecc74 100644
--- a/server.py
+++ b/server.py
@@ -64,11 +64,15 @@ def load_model_wrapper(selected_model):
     return selected_model
 
 def load_lora_wrapper(selected_lora):
+    shared.lora_name = selected_lora
+    default_text = shared.settings['lora_prompts'][next((k for k in shared.settings['lora_prompts'] if re.match(k.lower(), shared.lora_name.lower())), 'default')]
+
     if not shared.args.cpu:
         gc.collect()
         torch.cuda.empty_cache()
     add_lora_to_model(selected_lora)
-    return selected_lora
+
+    return selected_lora, default_text
 
 def load_preset_values(preset_menu, return_dict=False):
     generate_params = {
@@ -156,6 +160,10 @@ def create_settings_menus(default_preset):
                         shared.gradio['length_penalty'] = gr.Slider(-5, 5, value=generate_params['length_penalty'], label='length_penalty')
                 shared.gradio['early_stopping'] = gr.Checkbox(value=generate_params['early_stopping'], label='early_stopping')
 
+    with gr.Row():
+        shared.gradio['lora_menu'] = gr.Dropdown(choices=available_loras, value=shared.lora_name, label='LoRA')
+        ui.create_refresh_button(shared.gradio['lora_menu'], lambda : None, lambda : {'choices': get_available_loras()}, 'refresh-button')
+
     with gr.Accordion('Soft prompt', open=False):
         with gr.Row():
             shared.gradio['softprompts_menu'] = gr.Dropdown(choices=available_softprompts, value='None', label='Soft prompt')
@@ -167,6 +175,7 @@ def create_settings_menus(default_preset):
 
     shared.gradio['model_menu'].change(load_model_wrapper, [shared.gradio['model_menu']], [shared.gradio['model_menu']], show_progress=True)
     shared.gradio['preset_menu'].change(load_preset_values, [shared.gradio['preset_menu']], [shared.gradio['do_sample'], shared.gradio['temperature'], shared.gradio['top_p'], shared.gradio['typical_p'], shared.gradio['repetition_penalty'], shared.gradio['encoder_repetition_penalty'], shared.gradio['top_k'], shared.gradio['min_length'], shared.gradio['no_repeat_ngram_size'], shared.gradio['num_beams'], shared.gradio['penalty_alpha'], shared.gradio['length_penalty'], shared.gradio['early_stopping']])
+    shared.gradio['lora_menu'].change(load_lora_wrapper, [shared.gradio['lora_menu']], [shared.gradio['lora_menu'], shared.gradio['textbox']], show_progress=True)
     shared.gradio['softprompts_menu'].change(load_soft_prompt, [shared.gradio['softprompts_menu']], [shared.gradio['softprompts_menu']], show_progress=True)
     shared.gradio['upload_softprompt'].upload(upload_soft_prompt, [shared.gradio['upload_softprompt']], [shared.gradio['softprompts_menu']])
 
@@ -226,8 +235,8 @@ else:
     shared.model_name = available_models[i]
 shared.model, shared.tokenizer = load_model(shared.model_name)
 if shared.args.lora:
+    print(shared.args.lora)
     shared.lora_name = shared.args.lora
-    print(f"Adding the LoRA {shared.lora_name} to the model...")
     add_lora_to_model(shared.lora_name)
 
 # Default UI settings
@@ -419,19 +428,6 @@ def create_interface():
             shared.gradio['Stop'].click(None, None, None, cancels=gen_events)
             shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.main_js}}}")
 
-        with gr.Tab("LoRA", elem_id="lora"):
-            with gr.Row():
-                with gr.Column():
-                    gr.Markdown("Load")
-                    with gr.Row():
-                        shared.gradio['lora_menu'] = gr.Dropdown(choices=available_loras, value=shared.lora_name, label='LoRA')
-                        ui.create_refresh_button(shared.gradio['lora_menu'], lambda : None, lambda : {'choices': get_available_loras()}, 'refresh-button')
-                with gr.Column():
-                    gr.Markdown("Train (TODO)")
-                    gr.Button("Practice your button clicking skills")
-
-            shared.gradio['lora_menu'].change(load_lora_wrapper, [shared.gradio['lora_menu']], [shared.gradio['lora_menu']], show_progress=True)
-
         with gr.Tab("Interface mode", elem_id="interface-mode"):
             modes = ["default", "notebook", "chat", "cai_chat"]
             current_mode = "default"

From 7d97287e691edf48012f193828562a42f0b41674 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 17 Mar 2023 11:41:12 -0300
Subject: [PATCH 54/89] Update settings-template.json

---
 settings-template.json | 72 +++++++++++++++++++++---------------------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/settings-template.json b/settings-template.json
index 3d1129ad..df7403d6 100644
--- a/settings-template.json
+++ b/settings-template.json
@@ -1,38 +1,38 @@
 {
-  "max_new_tokens": 200,
-  "max_new_tokens_min": 1,
-  "max_new_tokens_max": 2000,
-  "name1": "Person 1",
-  "name2": "Person 2",
-  "context": "This is a conversation between two people.",
-  "stop_at_newline": true,
-  "chat_prompt_size": 2048,
-  "chat_prompt_size_min": 0,
-  "chat_prompt_size_max": 2048,
-  "chat_generation_attempts": 1,
-  "chat_generation_attempts_min": 1,
-  "chat_generation_attempts_max": 5,
-  "name1_pygmalion": "You",
-  "name2_pygmalion": "Kawaii",
-  "context_pygmalion": "Kawaii's persona: Kawaii is a cheerful person who loves to make others smile. She is an optimist who loves to spread happiness and positivity wherever she goes.\n<START>",
-  "stop_at_newline_pygmalion": false,
-  "default_extensions": [],
-  "chat_default_extensions": [
-    "gallery"
-  ],
-  "presets": {
-    "default": "NovelAI-Sphinx Moth",
-    "pygmalion-*": "Pygmalion",
-    "RWKV-*": "Naive"
-  },
-  "prompts": {
-    "default": "Common sense questions and answers\n\nQuestion: \nFactual answer:",
-    "^(gpt4chan|gpt-4chan|4chan)": "-----\n--- 865467536\nInput text\n--- 865467537\n",
-    "(rosey|chip|joi)_.*_instruct.*": "User: \n",
-    "oasst-*": "<|prompter|>Write a story about future of AI development<|endoftext|><|assistant|>"
-  },
-  "lora_prompts": {
-    "default": "Common sense questions and answers\n\nQuestion: \nFactual answer:",
-    "alpaca-lora-7b": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction:\nWrite a Python script that generates text using the transformers library.\n### Response:\n"
-  }
+    "max_new_tokens": 200,
+    "max_new_tokens_min": 1,
+    "max_new_tokens_max": 2000,
+    "name1": "Person 1",
+    "name2": "Person 2",
+    "context": "This is a conversation between two people.",
+    "stop_at_newline": true,
+    "chat_prompt_size": 2048,
+    "chat_prompt_size_min": 0,
+    "chat_prompt_size_max": 2048,
+    "chat_generation_attempts": 1,
+    "chat_generation_attempts_min": 1,
+    "chat_generation_attempts_max": 5,
+    "name1_pygmalion": "You",
+    "name2_pygmalion": "Kawaii",
+    "context_pygmalion": "Kawaii's persona: Kawaii is a cheerful person who loves to make others smile. She is an optimist who loves to spread happiness and positivity wherever she goes.\n<START>",
+    "stop_at_newline_pygmalion": false,
+    "default_extensions": [],
+    "chat_default_extensions": [
+        "gallery"
+    ],
+    "presets": {
+        "default": "NovelAI-Sphinx Moth",
+        "pygmalion-*": "Pygmalion",
+        "RWKV-*": "Naive"
+    },
+    "prompts": {
+        "default": "Common sense questions and answers\n\nQuestion: \nFactual answer:",
+        "^(gpt4chan|gpt-4chan|4chan)": "-----\n--- 865467536\nInput text\n--- 865467537\n",
+        "(rosey|chip|joi)_.*_instruct.*": "User: \n",
+        "oasst-*": "<|prompter|>Write a story about future of AI development<|endoftext|><|assistant|>"
+    },
+    "lora_prompts": {
+        "default": "Common sense questions and answers\n\nQuestion: \nFactual answer:",
+        "alpaca-lora-7b": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction:\nWrite a poem about the transformers Python library. \nMention the word \"large language models\" in that poem.\n### Response:\n"
+    }
 }

From a717fd709d4ef5ab1a5bf97b9e59593ea7e36569 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 17 Mar 2023 11:42:25 -0300
Subject: [PATCH 55/89] Sort the imports

---
 modules/callbacks.py | 1 +
 modules/chat.py      | 3 ++-
 modules/models.py    | 3 +--
 server.py            | 2 +-
 4 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/modules/callbacks.py b/modules/callbacks.py
index faa4a5e9..12a90cc3 100644
--- a/modules/callbacks.py
+++ b/modules/callbacks.py
@@ -7,6 +7,7 @@ import transformers
 
 import modules.shared as shared
 
+
 # Copied from https://github.com/PygmalionAI/gradio-ui/
 class _SentinelTokenStoppingCriteria(transformers.StoppingCriteria):
 
diff --git a/modules/chat.py b/modules/chat.py
index d7202bee..3f313db2 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -12,7 +12,8 @@ import modules.extensions as extensions_module
 import modules.shared as shared
 from modules.extensions import apply_extensions
 from modules.html_generator import generate_chat_html
-from modules.text_generation import encode, generate_reply, get_max_prompt_length
+from modules.text_generation import (encode, generate_reply,
+                                     get_max_prompt_length)
 
 
 # This gets the new line characters right.
diff --git a/modules/models.py b/modules/models.py
index 6df67d3c..e4507e57 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -8,11 +8,10 @@ import numpy as np
 import torch
 import transformers
 from accelerate import infer_auto_device_map, init_empty_weights
+from peft import PeftModel
 from transformers import (AutoConfig, AutoModelForCausalLM, AutoTokenizer,
                           BitsAndBytesConfig)
 
-from peft import PeftModel
-
 import modules.shared as shared
 
 transformers.logging.set_verbosity_error()
diff --git a/server.py b/server.py
index 7d5ecc74..5c21f4cd 100644
--- a/server.py
+++ b/server.py
@@ -15,9 +15,9 @@ import modules.extensions as extensions_module
 import modules.shared as shared
 import modules.ui as ui
 from modules.html_generator import generate_chat_html
+from modules.LoRA import add_lora_to_model
 from modules.models import load_model, load_soft_prompt
 from modules.text_generation import generate_reply
-from modules.LoRA import add_lora_to_model
 
 # Loading custom settings
 settings_file = None

From 614dad007530574e6c4680362d0497c20a9da07d Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 17 Mar 2023 11:43:11 -0300
Subject: [PATCH 56/89] Remove unused import

---
 modules/LoRA.py   | 2 +-
 modules/models.py | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/modules/LoRA.py b/modules/LoRA.py
index c95da6ee..74030c25 100644
--- a/modules/LoRA.py
+++ b/modules/LoRA.py
@@ -10,7 +10,7 @@ def add_lora_to_model(lora_name):
 
     # Is there a more efficient way of returning to the base model?
     if lora_name == "None":
-        print(f"Reloading the model to remove the LoRA...")
+        print("Reloading the model to remove the LoRA...")
         shared.model, shared.tokenizer = load_model(shared.model_name)
     else:
         print(f"Adding the LoRA {lora_name} to the model...")
diff --git a/modules/models.py b/modules/models.py
index e4507e57..63060d43 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -8,7 +8,6 @@ import numpy as np
 import torch
 import transformers
 from accelerate import infer_auto_device_map, init_empty_weights
-from peft import PeftModel
 from transformers import (AutoConfig, AutoModelForCausalLM, AutoTokenizer,
                           BitsAndBytesConfig)
 

From cdfa787bcb29b038320358e1c8ee9cd05ee8a301 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 17 Mar 2023 11:53:28 -0300
Subject: [PATCH 57/89] Update README

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index 4641647c..ba0a2558 100644
--- a/README.md
+++ b/README.md
@@ -28,6 +28,7 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.
 * [DeepSpeed ZeRO-3 offload](https://github.com/oobabooga/text-generation-webui/wiki/DeepSpeed).
 * Get responses via API, [with](https://github.com/oobabooga/text-generation-webui/blob/main/api-example-streaming.py) or [without](https://github.com/oobabooga/text-generation-webui/blob/main/api-example.py) streaming.
 * [Supports the LLaMA model, including 4-bit mode](https://github.com/oobabooga/text-generation-webui/wiki/LLaMA-model).
+* [Supports LoRAs](https://github.com/oobabooga/text-generation-webui/wiki/Using-LoRAs).
 * [Supports the RWKV model](https://github.com/oobabooga/text-generation-webui/wiki/RWKV-model).
 * Supports softprompts.
 * [Supports extensions](https://github.com/oobabooga/text-generation-webui/wiki/Extensions).
@@ -138,6 +139,7 @@ Optionally, you can use the following command-line flags:
 |-------------|-------------|
 | `-h`, `--help`  | show this help message and exit |
 | `--model MODEL`    | Name of the model to load by default. |
+| `--lora LORA`      | Name of the LoRA to apply to the model by default. |
 | `--notebook`  | Launch the web UI in notebook mode, where the output is written to the same text box as the input. |
 | `--chat`      | Launch the web UI in chat mode.|
 | `--cai-chat`  | Launch the web UI in chat mode with a style similar to Character.AI's. If the file `img_bot.png` or `img_bot.jpg` exists in the same folder as server.py, this image will be used as the bot's profile picture. Similarly, `img_me.png` or `img_me.jpg` will be used as your profile picture. |

From ebef4a510b10bc2df9c891202446355ac5d8014a Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 17 Mar 2023 11:58:45 -0300
Subject: [PATCH 58/89] Update README

---
 README.md | 68 +++++++++++++++++++++++++++----------------------------
 1 file changed, 34 insertions(+), 34 deletions(-)

diff --git a/README.md b/README.md
index ba0a2558..269845ba 100644
--- a/README.md
+++ b/README.md
@@ -27,11 +27,11 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.
 * [FlexGen offload](https://github.com/oobabooga/text-generation-webui/wiki/FlexGen).
 * [DeepSpeed ZeRO-3 offload](https://github.com/oobabooga/text-generation-webui/wiki/DeepSpeed).
 * Get responses via API, [with](https://github.com/oobabooga/text-generation-webui/blob/main/api-example-streaming.py) or [without](https://github.com/oobabooga/text-generation-webui/blob/main/api-example.py) streaming.
-* [Supports the LLaMA model, including 4-bit mode](https://github.com/oobabooga/text-generation-webui/wiki/LLaMA-model).
-* [Supports LoRAs](https://github.com/oobabooga/text-generation-webui/wiki/Using-LoRAs).
-* [Supports the RWKV model](https://github.com/oobabooga/text-generation-webui/wiki/RWKV-model).
-* Supports softprompts.
-* [Supports extensions](https://github.com/oobabooga/text-generation-webui/wiki/Extensions).
+* [LLaMA model, including 4-bit mode](https://github.com/oobabooga/text-generation-webui/wiki/LLaMA-model).
+* [LoRAs](https://github.com/oobabooga/text-generation-webui/wiki/Using-LoRAs).
+* [RWKV model](https://github.com/oobabooga/text-generation-webui/wiki/RWKV-model).
+* Softprompts.
+* [Extensions](https://github.com/oobabooga/text-generation-webui/wiki/Extensions).
 * [Works on Google Colab](https://github.com/oobabooga/text-generation-webui/wiki/Running-on-Colab).
 
 ## Installation option 1: conda
@@ -135,42 +135,42 @@ Then browse to
 
 Optionally, you can use the following command-line flags:
 
-| Flag        | Description |
-|-------------|-------------|
-| `-h`, `--help`  | show this help message and exit |
-| `--model MODEL`    | Name of the model to load by default. |
-| `--lora LORA`      | Name of the LoRA to apply to the model by default. |
-| `--notebook`  | Launch the web UI in notebook mode, where the output is written to the same text box as the input. |
-| `--chat`      | Launch the web UI in chat mode.|
-| `--cai-chat`  | Launch the web UI in chat mode with a style similar to Character.AI's. If the file `img_bot.png` or `img_bot.jpg` exists in the same folder as server.py, this image will be used as the bot's profile picture. Similarly, `img_me.png` or `img_me.jpg` will be used as your profile picture. |
-| `--cpu`       | Use the CPU to generate text.|
-| `--load-in-8bit`  | Load the model with 8-bit precision.|
-| `--load-in-4bit`  | DEPRECATED: use `--gptq-bits 4` instead. |
-| `--gptq-bits GPTQ_BITS`  |  Load a pre-quantized model with specified precision. 2, 3, 4 and 8 (bit) are supported. Currently only works with LLaMA and OPT. |
-| `--gptq-model-type MODEL_TYPE`  |  Model type of pre-quantized model. Currently only LLaMa and OPT are supported. |
-| `--bf16`  | Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU. |
+| Flag             | Description |
+|------------------|-------------|
+| `-h`, `--help`   | show this help message and exit |
+| `--model MODEL`  | Name of the model to load by default. |
+| `--lora LORA`    | Name of the LoRA to apply to the model by default. |
+| `--notebook`     | Launch the web UI in notebook mode, where the output is written to the same text box as the input. |
+| `--chat`         | Launch the web UI in chat mode.|
+| `--cai-chat`     | Launch the web UI in chat mode with a style similar to Character.AI's. If the file `img_bot.png` or `img_bot.jpg` exists in the same folder as server.py, this image will be used as the bot's profile picture. Similarly, `img_me.png` or `img_me.jpg` will be used as your profile picture. |
+| `--cpu`          | Use the CPU to generate text.|
+| `--load-in-8bit` | Load the model with 8-bit precision.|
+| `--load-in-4bit` | DEPRECATED: use `--gptq-bits 4` instead. |
+| `--gptq-bits GPTQ_BITS` |  Load a pre-quantized model with specified precision. 2, 3, 4 and 8 (bit) are supported. Currently only works with LLaMA and OPT. |
+| `--gptq-model-type MODEL_TYPE` |  Model type of pre-quantized model. Currently only LLaMa and OPT are supported. |
+| `--bf16`         | Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU. |
 | `--auto-devices` | Automatically split the model across the available GPU(s) and CPU.|
-| `--disk` | If the model is too large for your GPU(s) and CPU combined, send the remaining layers to the disk. |
+| `--disk`         | If the model is too large for your GPU(s) and CPU combined, send the remaining layers to the disk. |
 | `--disk-cache-dir DISK_CACHE_DIR` | Directory to save the disk cache to. Defaults to `cache/`. |
 |  `--gpu-memory GPU_MEMORY [GPU_MEMORY ...]` |  Maxmimum GPU memory in GiB to be allocated per GPU. Example: `--gpu-memory 10` for a single GPU, `--gpu-memory 10 5` for two GPUs. |
-| `--cpu-memory CPU_MEMORY`    | Maximum CPU memory in GiB to allocate for offloaded weights. Must be an integer number. Defaults to 99.|
-| `--flexgen`                   |         Enable the use of FlexGen offloading. |
-|  `--percent PERCENT [PERCENT ...]`    |  FlexGen: allocation percentages. Must be 6 numbers separated by spaces (default: 0, 100, 100, 0, 100, 0). |
-|  `--compress-weight`                  |  FlexGen: Whether to compress weight (default: False).|
-|  `--pin-weight [PIN_WEIGHT]`          |       FlexGen: whether to pin weights (setting this to False reduces CPU memory by 20%). |
+| `--cpu-memory CPU_MEMORY` | Maximum CPU memory in GiB to allocate for offloaded weights. Must be an integer number. Defaults to 99.|
+| `--flexgen`      |         Enable the use of FlexGen offloading. |
+|  `--percent PERCENT [PERCENT ...]` |  FlexGen: allocation percentages. Must be 6 numbers separated by spaces (default: 0, 100, 100, 0, 100, 0). |
+|  `--compress-weight` |  FlexGen: Whether to compress weight (default: False).|
+|  `--pin-weight [PIN_WEIGHT]` |       FlexGen: whether to pin weights (setting this to False reduces CPU memory by 20%). |
 | `--deepspeed`    | Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration. |
-| `--nvme-offload-dir NVME_OFFLOAD_DIR`    | DeepSpeed: Directory to use for ZeRO-3 NVME offloading. |
-| `--local_rank LOCAL_RANK`    | DeepSpeed: Optional argument for distributed setups. |
-|  `--rwkv-strategy RWKV_STRATEGY`         |    RWKV: The strategy to use while loading the model. Examples: "cpu fp32", "cuda fp16", "cuda fp16i8". |
-|  `--rwkv-cuda-on`                        |   RWKV: Compile the CUDA kernel for better performance. |
-| `--no-stream`   | Don't stream the text output in real time. |
+| `--nvme-offload-dir NVME_OFFLOAD_DIR` | DeepSpeed: Directory to use for ZeRO-3 NVME offloading. |
+| `--local_rank LOCAL_RANK` | DeepSpeed: Optional argument for distributed setups. |
+|  `--rwkv-strategy RWKV_STRATEGY` |    RWKV: The strategy to use while loading the model. Examples: "cpu fp32", "cuda fp16", "cuda fp16i8". |
+|  `--rwkv-cuda-on` |   RWKV: Compile the CUDA kernel for better performance. |
+| `--no-stream`    | Don't stream the text output in real time. |
 | `--settings SETTINGS_FILE` | Load the default interface settings from this json file. See `settings-template.json` for an example. If you create a file called `settings.json`, this file will be loaded by default without the need to use the `--settings` flag.|
 |  `--extensions EXTENSIONS [EXTENSIONS ...]` |  The list of extensions to load. If you want to load more than one extension, write the names separated by spaces. |
-| `--listen`   | Make the web UI reachable from your local network.|
+| `--listen`       | Make the web UI reachable from your local network.|
 |  `--listen-port LISTEN_PORT` | The listening port that the server will use. |
-| `--share`   | Create a public URL. This is useful for running the web UI on Google Colab or similar. |
-| `--auto-launch` | Open the web UI in the default browser upon launch. |
-| `--verbose`   | Print the prompts to the terminal. |
+| `--share`        | Create a public URL. This is useful for running the web UI on Google Colab or similar. |
+| `--auto-launch`  | Open the web UI in the default browser upon launch. |
+| `--verbose`      | Print the prompts to the terminal. |
 
 Out of memory errors? [Check this guide](https://github.com/oobabooga/text-generation-webui/wiki/Low-VRAM-guide).
 

From f0b26451b49e5f89f93ee572e39a92d8c7b56dc3 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 17 Mar 2023 13:07:17 -0300
Subject: [PATCH 59/89] Add a comment

---
 modules/LoRA.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/LoRA.py b/modules/LoRA.py
index 74030c25..fe11c0da 100644
--- a/modules/LoRA.py
+++ b/modules/LoRA.py
@@ -13,5 +13,6 @@ def add_lora_to_model(lora_name):
         print("Reloading the model to remove the LoRA...")
         shared.model, shared.tokenizer = load_model(shared.model_name)
     else:
+        # Why doesn't this work in 16-bit mode?
         print(f"Adding the LoRA {lora_name} to the model...")
         shared.model = PeftModel.from_pretrained(shared.model, Path(f"loras/{lora_name}"))

From 9ed2c4501ca31381b38f3bbe490c8d5d6d746229 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 17 Mar 2023 16:06:11 -0300
Subject: [PATCH 60/89] Use markdown in the "HTML" tab

---
 modules/chat.py           | 13 +++----------
 modules/html_generator.py | 28 ++++++++++++++++++++++------
 2 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/modules/chat.py b/modules/chat.py
index 3f313db2..36265990 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -11,18 +11,11 @@ from PIL import Image
 import modules.extensions as extensions_module
 import modules.shared as shared
 from modules.extensions import apply_extensions
-from modules.html_generator import generate_chat_html
+from modules.html_generator import fix_newlines, generate_chat_html
 from modules.text_generation import (encode, generate_reply,
                                      get_max_prompt_length)
 
 
-# This gets the new line characters right.
-def clean_chat_message(text):
-    text = text.replace('\n', '\n\n')
-    text = re.sub(r"\n{3,}", "\n\n", text)
-    text = text.strip()
-    return text
-
 def generate_chat_output(history, name1, name2, character):
     if shared.args.cai_chat:
         return generate_chat_html(history, name1, name2, character)
@@ -30,7 +23,7 @@ def generate_chat_output(history, name1, name2, character):
         return history
 
 def generate_chat_prompt(user_input, max_new_tokens, name1, name2, context, chat_prompt_size, impersonate=False):
-    user_input = clean_chat_message(user_input)
+    user_input = fix_newlines(user_input)
     rows = [f"{context.strip()}\n"]
 
     if shared.soft_prompt:
@@ -83,7 +76,7 @@ def extract_message_from_reply(question, reply, name1, name2, check, impersonate
         if idx != -1:
             reply = reply[:idx]
             next_character_found = True
-        reply = clean_chat_message(reply)
+        reply = fix_newlines(reply)
 
         # If something like "\nYo" is generated just before "\nYou:"
         # is completed, trim it
diff --git a/modules/html_generator.py b/modules/html_generator.py
index 9942e6c9..940d5486 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -1,6 +1,6 @@
 '''
 
-This is a library for formatting GPT-4chan and chat outputs as nice HTML.
+This is a library for formatting text outputs as nice HTML.
 
 '''
 
@@ -21,10 +21,26 @@ with open(Path(__file__).resolve().parent / '../css/html_4chan_style.css', 'r')
 with open(Path(__file__).resolve().parent / '../css/html_cai_style.css', 'r') as f:
     cai_css = f.read()
 
-def generate_basic_html(s):
-    s = '\n'.join([f'<p>{line}</p>' for line in s.split('\n')])
-    s = f'<style>{readable_css}</style><div class="container">{s}</div>'
-    return s
+def fix_newlines(string):
+    string = string.replace('\n', '\n\n')
+    string = re.sub(r"\n{3,}", "\n\n", string)
+    string = string.strip()
+    return string
+
+# This could probably be generalized and improved
+def convert_to_markdown(string):
+    string = string.replace('\\begin{code}', '```')
+    string = string.replace('\\end{code}', '```')
+    string = string.replace('\\begin{blockquote}', '> ')
+    string = string.replace('\\end{blockquote}', '')
+    string = re.sub(r"(.)```", r"\1\n```", string)
+#    string = fix_newlines(string)
+    return markdown.markdown(string, extensions=['fenced_code']) 
+
+def generate_basic_html(string):
+    string = convert_to_markdown(string)
+    string = f'<style>{readable_css}</style><div class="container">{string}</div>'
+    return string
 
 def process_post(post, c):
     t = post.split('\n')
@@ -108,7 +124,7 @@ def generate_chat_html(history, name1, name2, character):
     img_me = load_html_image(["img_me.png", "img_me.jpg", "img_me.jpeg"])
 
     for i,_row in enumerate(history[::-1]):
-        row = [markdown.markdown(re.sub(r"(.)```", r"\1\n```", entry), extensions=['fenced_code']) for entry in _row]
+        row = [convert_to_markdown(entry) for entry in _row]
         
         output += f"""
               <div class="message">

From 9256e937d6e7d34c539b99bcb35183d9cf6fe157 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 17 Mar 2023 17:45:28 -0300
Subject: [PATCH 61/89] Add some LoRA params

---
 modules/LoRA.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/modules/LoRA.py b/modules/LoRA.py
index fe11c0da..b568e57b 100644
--- a/modules/LoRA.py
+++ b/modules/LoRA.py
@@ -15,4 +15,8 @@ def add_lora_to_model(lora_name):
     else:
         # Why doesn't this work in 16-bit mode?
         print(f"Adding the LoRA {lora_name} to the model...")
-        shared.model = PeftModel.from_pretrained(shared.model, Path(f"loras/{lora_name}"))
+
+        params = {}
+        #params['device_map'] = {'': 0}
+        #params['dtype'] = shared.model.dtype
+        shared.model = PeftModel.from_pretrained(shared.model, Path(f"loras/{lora_name}"), **params)

From 4426f941e07cb64a1f4754978517c56269d9434e Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 17 Mar 2023 18:51:07 -0300
Subject: [PATCH 62/89] Update the installation instructions. Tldr use WSL

---
 README.md | 54 ++++++++++++++++++++++++++++++++----------------------
 1 file changed, 32 insertions(+), 22 deletions(-)

diff --git a/README.md b/README.md
index 269845ba..ca30d686 100644
--- a/README.md
+++ b/README.md
@@ -34,39 +34,45 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.
 * [Extensions](https://github.com/oobabooga/text-generation-webui/wiki/Extensions).
 * [Works on Google Colab](https://github.com/oobabooga/text-generation-webui/wiki/Running-on-Colab).
 
-## Installation option 1: conda
+## Installation
 
-Open a terminal and copy and paste these commands one at a time ([install conda](https://docs.conda.io/en/latest/miniconda.html) first if you don't have it already):
+The recommended installation methods are the following:
+
+* Linux and macOS: using conda natively.
+* Windows: using conda on WSL ([WSL installation guide](https://github.com/oobabooga/text-generation-webui/wiki/Windows-Subsystem-for-Linux-(Ubuntu)-Installation-Guide)).
+
+Conda can be downloaded here: https://docs.conda.io/en/latest/miniconda.html
+
+#### 1. Create a new conda environment
 
 ```
-conda create -n textgen
+conda create -n textgen python=3.10.9
 conda activate textgen
-conda install torchvision=0.14.1 torchaudio=0.13.1 pytorch-cuda=11.7 git -c pytorch -c nvidia
+```
+
+#### 2. Install Pytorch
+
+| System | GPU | Command |
+|--------|---------|---------|
+| Linux/WSL | NVIDIA | `conda install pytorch torchvision torchaudio pytorch-cuda=11.7 -c pytorch -c nvidia` |
+| Linux | AMD | `pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm5.4.2` |
+| MacOS + MPS (untested) | Any | `conda install pytorch torchvision torchaudio -c pytorch` |
+
+The up to date commands can be found here: https://pytorch.org/get-started/locally/
+
+#### 3. Install the web UI
+
+```
 git clone https://github.com/oobabooga/text-generation-webui
 cd text-generation-webui
 pip install -r requirements.txt
 ```
 
-The third line assumes that you have an NVIDIA GPU. 
+### Alternative: native Windows installation
 
-* If you have an AMD GPU, replace the third command with this one:
+As an alternative to the recommended WSL method, you can install the web UI natively on Windows using this guide. It will be a lot harder and the performance may be slower: [Installation instructions for human beings](https://github.com/oobabooga/text-generation-webui/wiki/Installation-instructions-for-human-beings).
 
-```
-pip3 install torch torchvision=0.14.1 torchaudio=0.13.1 --extra-index-url https://download.pytorch.org/whl/rocm5.2
-```
-  	  
-* If you are running it in CPU mode, replace the third command with this one:
-
-```
-conda install pytorch torchvision=0.14.1 torchaudio=0.13.1 git -c pytorch
-```
-
-> **Note**
-> 1. If you are on Windows, it may be easier to run the commands above in a WSL environment. The performance may also be better. A full guide can be found here: [Windows Subsystem for Linux (Ubuntu) Installation Guide
-](https://github.com/oobabooga/text-generation-webui/wiki/Windows-Subsystem-for-Linux-(Ubuntu)-Installation-Guide).
-> 2. For a more detailed, user-contributed guide, see: [Installation instructions for human beings](https://github.com/oobabooga/text-generation-webui/wiki/Installation-instructions-for-human-beings).
-
-## Installation option 2: one-click installers
+### Alternative: one click installers
 
 [oobabooga-windows.zip](https://github.com/oobabooga/one-click-installers/archive/refs/heads/oobabooga-windows.zip)
 
@@ -77,6 +83,10 @@ Just download the zip above, extract it, and double click on "install". The web
 * To download a model, double click on "download-model"
 * To start the web UI, double click on "start-webui" 
 
+Source codes: https://github.com/oobabooga/one-click-installers
+
+This method lags behind the newest developments and does not support 8-bit mode on Windows without additional set up.
+
 ## Downloading models
 
 Models should be placed under `models/model-name`. For instance, `models/gpt-j-6B` for [GPT-J 6B](https://huggingface.co/EleutherAI/gpt-j-6B/tree/main).

From ad7c82995395eee730c72842f76f72a4d00b82e8 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 17 Mar 2023 18:55:01 -0300
Subject: [PATCH 63/89] Update README.md

---
 README.md | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index ca30d686..04ac456f 100644
--- a/README.md
+++ b/README.md
@@ -89,17 +89,15 @@ This method lags behind the newest developments and does not support 8-bit mode
 
 ## Downloading models
 
-Models should be placed under `models/model-name`. For instance, `models/gpt-j-6B` for [GPT-J 6B](https://huggingface.co/EleutherAI/gpt-j-6B/tree/main).
-
-#### Hugging Face
+Models should be placed inside the `models` folder.
 
 [Hugging Face](https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads) is the main place to download models. These are some noteworthy examples:
 
-* [GPT-J 6B](https://huggingface.co/EleutherAI/gpt-j-6B/tree/main)
-* [GPT-Neo](https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads&search=eleutherai+%2F+gpt-neo)
 * [Pythia](https://huggingface.co/models?search=eleutherai/pythia)
 * [OPT](https://huggingface.co/models?search=facebook/opt)
 * [GALACTICA](https://huggingface.co/models?search=facebook/galactica)
+* [GPT-J 6B](https://huggingface.co/EleutherAI/gpt-j-6B/tree/main)
+* [GPT-Neo](https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads&search=eleutherai+%2F+gpt-neo)
 * [\*-Erebus](https://huggingface.co/models?search=erebus) (NSFW)
 * [Pygmalion](https://huggingface.co/models?search=pygmalion) (NSFW)
 
@@ -113,7 +111,7 @@ For instance:
 
 If you want to download a model manually, note that all you need are the json, txt, and pytorch\*.bin (or model*.safetensors) files. The remaining files are not necessary.
 
-#### GPT-4chan
+### GPT-4chan
 
 [GPT-4chan](https://huggingface.co/ykilcher/gpt-4chan) has been shut down from Hugging Face, so you need to download it elsewhere. You have two options:
 

From d4f38b6a1ff5d67a5adac7ed262881cfa310c544 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 17 Mar 2023 18:57:48 -0300
Subject: [PATCH 64/89] Update README.md

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 04ac456f..b9ea01cd 100644
--- a/README.md
+++ b/README.md
@@ -38,7 +38,7 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.
 
 The recommended installation methods are the following:
 
-* Linux and macOS: using conda natively.
+* Linux and MacOS: using conda natively.
 * Windows: using conda on WSL ([WSL installation guide](https://github.com/oobabooga/text-generation-webui/wiki/Windows-Subsystem-for-Linux-(Ubuntu)-Installation-Guide)).
 
 Conda can be downloaded here: https://docs.conda.io/en/latest/miniconda.html
@@ -72,7 +72,7 @@ pip install -r requirements.txt
 
 As an alternative to the recommended WSL method, you can install the web UI natively on Windows using this guide. It will be a lot harder and the performance may be slower: [Installation instructions for human beings](https://github.com/oobabooga/text-generation-webui/wiki/Installation-instructions-for-human-beings).
 
-### Alternative: one click installers
+### Alternative: one-click installers
 
 [oobabooga-windows.zip](https://github.com/oobabooga/one-click-installers/archive/refs/heads/oobabooga-windows.zip)
 

From 9a871117d79149c67146bb5325d4f98fa9a8dc03 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 17 Mar 2023 19:52:22 -0300
Subject: [PATCH 65/89] Update README.md

---
 README.md | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/README.md b/README.md
index b9ea01cd..19242863 100644
--- a/README.md
+++ b/README.md
@@ -43,8 +43,19 @@ The recommended installation methods are the following:
 
 Conda can be downloaded here: https://docs.conda.io/en/latest/miniconda.html
 
+On Linux or WSL, it can be installed with these two commands:
+
+```
+curl -sL "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh" > "Miniconda3.sh"
+bash Miniconda3.sh
+```
+
+Source: https://educe-ubc.github.io/conda.html
+
 #### 1. Create a new conda environment
 
+Now you need to copy and paste these commands one at a time in your terminal window. Start by creating a new conda environment:
+
 ```
 conda create -n textgen python=3.10.9
 conda activate textgen

From 66e8d123549c8a198fabb836dc50c1ec2340db35 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 17 Mar 2023 19:59:37 -0300
Subject: [PATCH 66/89] Update README.md

---
 README.md | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 19242863..0d73c8fa 100644
--- a/README.md
+++ b/README.md
@@ -21,7 +21,7 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.
 * Advanced chat features (send images, get audio responses with TTS).
 * Stream the text output in real time.
 * Load parameter presets from text files.
-* Load large models in 8-bit mode (see [here](https://github.com/oobabooga/text-generation-webui/issues/147#issuecomment-1456040134), [here](https://github.com/oobabooga/text-generation-webui/issues/20#issuecomment-1411650652) and [here](https://www.reddit.com/r/PygmalionAI/comments/1115gom/running_pygmalion_6b_with_8gb_of_vram/) if you are on Windows).
+* Load large models in 8-bit mode.
 * Split large models across your GPU(s), CPU, and disk.
 * CPU mode.
 * [FlexGen offload](https://github.com/oobabooga/text-generation-webui/wiki/FlexGen).
@@ -54,8 +54,6 @@ Source: https://educe-ubc.github.io/conda.html
 
 #### 1. Create a new conda environment
 
-Now you need to copy and paste these commands one at a time in your terminal window. Start by creating a new conda environment:
-
 ```
 conda create -n textgen python=3.10.9
 conda activate textgen
@@ -79,6 +77,8 @@ cd text-generation-webui
 pip install -r requirements.txt
 ```
 
+If you experience bitsandbytes issues on WSL while trying to use `--load-in-8bit`, see this thread: https://github.com/microsoft/WSL/issues/5548#issuecomment-1292858815
+
 ### Alternative: native Windows installation
 
 As an alternative to the recommended WSL method, you can install the web UI natively on Windows using this guide. It will be a lot harder and the performance may be slower: [Installation instructions for human beings](https://github.com/oobabooga/text-generation-webui/wiki/Installation-instructions-for-human-beings).
@@ -96,7 +96,11 @@ Just download the zip above, extract it, and double click on "install". The web
 
 Source codes: https://github.com/oobabooga/one-click-installers
 
-This method lags behind the newest developments and does not support 8-bit mode on Windows without additional set up.
+This method lags behind the newest developments and does not support 8-bit mode on Windows without additional set up: https://github.com/oobabooga/text-generation-webui/issues/147#issuecomment-1456040134, https://github.com/oobabooga/text-generation-webui/issues/20#issuecomment-1411650652
+
+### Alternative: Docker
+
+https://github.com/oobabooga/text-generation-webui/issues/174, https://github.com/oobabooga/text-generation-webui/issues/87
 
 ## Downloading models
 

From 20f5b455bf9e0c268ff5b4e6e452add0c2aabe78 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 17 Mar 2023 20:19:04 -0300
Subject: [PATCH 67/89] Add parameters reference #386 #331

---
 css/main.css | 5 +++++
 server.py    | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/css/main.css b/css/main.css
index 87c3bded..c6b0b07e 100644
--- a/css/main.css
+++ b/css/main.css
@@ -32,6 +32,11 @@
   fill: white;
 }
 
+.dark a {
+  color: white !important;
+  text-decoration: none !important;
+}
+
 svg {
   display: unset !important;
   vertical-align: middle !important;
diff --git a/server.py b/server.py
index 5c21f4cd..1d324fba 100644
--- a/server.py
+++ b/server.py
@@ -133,7 +133,7 @@ def create_settings_menus(default_preset):
     with gr.Row():
         with gr.Column():
             with gr.Box():
-                gr.Markdown('Custom generation parameters')
+                gr.Markdown('Custom generation parameters ([reference](https://huggingface.co/docs/transformers/main_classes/text_generation#transformers.GenerationConfig))')
                 with gr.Row():
                     with gr.Column():
                         shared.gradio['temperature'] = gr.Slider(0.01, 1.99, value=generate_params['temperature'], step=0.01, label='temperature')

From 0c05e65e5c25b9d63ffa6cd7b8fdf785c546db59 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 17 Mar 2023 20:25:42 -0300
Subject: [PATCH 68/89] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 0d73c8fa..26e5bfcc 100644
--- a/README.md
+++ b/README.md
@@ -43,7 +43,7 @@ The recommended installation methods are the following:
 
 Conda can be downloaded here: https://docs.conda.io/en/latest/miniconda.html
 
-On Linux or WSL, it can be installed with these two commands:
+On Linux or WSL, it can be automatically installed with these two commands:
 
 ```
 curl -sL "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh" > "Miniconda3.sh"

From 8c8286b0e6ff291dc431ef92e12a794a303071f3 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 17 Mar 2023 20:49:40 -0300
Subject: [PATCH 69/89] Update README.md

---
 README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 26e5bfcc..e3f8485f 100644
--- a/README.md
+++ b/README.md
@@ -28,10 +28,10 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.
 * [DeepSpeed ZeRO-3 offload](https://github.com/oobabooga/text-generation-webui/wiki/DeepSpeed).
 * Get responses via API, [with](https://github.com/oobabooga/text-generation-webui/blob/main/api-example-streaming.py) or [without](https://github.com/oobabooga/text-generation-webui/blob/main/api-example.py) streaming.
 * [LLaMA model, including 4-bit mode](https://github.com/oobabooga/text-generation-webui/wiki/LLaMA-model).
-* [LoRAs](https://github.com/oobabooga/text-generation-webui/wiki/Using-LoRAs).
 * [RWKV model](https://github.com/oobabooga/text-generation-webui/wiki/RWKV-model).
-* Softprompts.
-* [Extensions](https://github.com/oobabooga/text-generation-webui/wiki/Extensions).
+* [Supports LoRAs](https://github.com/oobabooga/text-generation-webui/wiki/Using-LoRAs).
+* Supports softprompts.
+* [Supports extensions](https://github.com/oobabooga/text-generation-webui/wiki/Extensions).
 * [Works on Google Colab](https://github.com/oobabooga/text-generation-webui/wiki/Running-on-Colab).
 
 ## Installation

From f2a5ca7d49f59646811d9363a3e068b929405c99 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 17 Mar 2023 20:50:27 -0300
Subject: [PATCH 70/89] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index e3f8485f..d3058d7f 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.
 * Generate Markdown output for [GALACTICA](https://github.com/paperswithcode/galai), including LaTeX support.
 * Support for [Pygmalion](https://huggingface.co/models?search=pygmalionai/pygmalion) and custom characters in JSON or TavernAI Character Card formats ([FAQ](https://github.com/oobabooga/text-generation-webui/wiki/Pygmalion-chat-model-FAQ)).
 * Advanced chat features (send images, get audio responses with TTS).
-* Stream the text output in real time.
+* Stream the text output in real time very efficiently.
 * Load parameter presets from text files.
 * Load large models in 8-bit mode.
 * Split large models across your GPU(s), CPU, and disk.

From 7d97da1dcb21962a80fed4abe3d9dd3dc0171bda Mon Sep 17 00:00:00 2001
From: Wojtek Kowaluk <wojtek@Wojteks-MacBook-Pro.local>
Date: Sat, 18 Mar 2023 00:17:05 +0100
Subject: [PATCH 71/89] add venv paths to gitignore

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index 1b7f0fb8..e2017e49 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,3 +19,6 @@ img_me*
 !models/place-your-models-here.txt
 !softprompts/place-your-softprompts-here.txt
 !torch-dumps/place-your-pt-models-here.txt
+
+venv/
+.venv/

From 30939e2aee539bd3a46573aa1ed86168b31fddf4 Mon Sep 17 00:00:00 2001
From: Wojtek Kowaluk <wojtek@Wojteks-MacBook-Pro.local>
Date: Sat, 18 Mar 2023 00:56:23 +0100
Subject: [PATCH 72/89] add mps support on apple silicon

---
 modules/models.py          | 9 ++++++++-
 modules/text_generation.py | 4 ++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/modules/models.py b/modules/models.py
index 63060d43..c37b0586 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -46,6 +46,13 @@ def load_model(model_name):
     if not any([shared.args.cpu, shared.args.load_in_8bit, shared.args.gptq_bits, shared.args.auto_devices, shared.args.disk, shared.args.gpu_memory is not None, shared.args.cpu_memory is not None, shared.args.deepspeed, shared.args.flexgen, shared.is_RWKV]):
         if any(size in shared.model_name.lower() for size in ('13b', '20b', '30b')):
             model = AutoModelForCausalLM.from_pretrained(Path(f"models/{shared.model_name}"), device_map='auto', load_in_8bit=True)
+        if torch.has_mps:
+            model = AutoModelForCausalLM.from_pretrained(
+                Path(f"models/{shared.model_name}"),low_cpu_mem_usage=True,
+                torch_dtype=torch.bfloat16 if shared.args.bf16 else torch.float16
+            )
+            device = torch.device('mps')
+            model = model.to(device)
         else:
             model = AutoModelForCausalLM.from_pretrained(Path(f"models/{shared.model_name}"), low_cpu_mem_usage=True, torch_dtype=torch.bfloat16 if shared.args.bf16 else torch.float16).cuda()
 
@@ -97,7 +104,7 @@ def load_model(model_name):
     # Custom
     else:
         params = {"low_cpu_mem_usage": True}
-        if not shared.args.cpu and not torch.cuda.is_available():
+        if not shared.args.cpu and not torch.cuda.is_available() and not torch.has_mps:
             print("Warning: torch.cuda.is_available() returned False.\nThis means that no GPU has been detected.\nFalling back to CPU mode.\n")
             shared.args.cpu = True
 
diff --git a/modules/text_generation.py b/modules/text_generation.py
index e5b4ad91..3a7bfa6e 100644
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@@ -33,9 +33,13 @@ def encode(prompt, tokens_to_generate=0, add_special_tokens=True):
             return input_ids.numpy()
         elif shared.args.deepspeed:
             return input_ids.to(device=local_rank)
+        elif torch.has_mps:
+            device = torch.device('mps')
+            return input_ids.to(device)
         else:
             return input_ids.cuda()
 
+
 def decode(output_ids):
     # Open Assistant relies on special tokens like <|endoftext|>
     if re.match('(oasst|galactica)-*', shared.model_name.lower()):

From dc35861184b4f658e604fb1ab45c024749884ab7 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 17 Mar 2023 21:05:17 -0300
Subject: [PATCH 73/89] Update README.md

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index d3058d7f..f5b6faa7 100644
--- a/README.md
+++ b/README.md
@@ -148,6 +148,7 @@ python download-model.py EleutherAI/gpt-j-6B --text-only
 ## Starting the web UI
 
     conda activate textgen
+    cd text-generation-webui
     python server.py
 
 Then browse to 

From 7994b580d5c9fd4691276f40f7a9b6a204ce2564 Mon Sep 17 00:00:00 2001
From: Wojtek Kowaluk <wojtek@Wojteks-MacBook-Pro.local>
Date: Sat, 18 Mar 2023 02:27:26 +0100
Subject: [PATCH 74/89] clean up duplicated code

---
 modules/models.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/modules/models.py b/modules/models.py
index c37b0586..8fa7307e 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -46,15 +46,17 @@ def load_model(model_name):
     if not any([shared.args.cpu, shared.args.load_in_8bit, shared.args.gptq_bits, shared.args.auto_devices, shared.args.disk, shared.args.gpu_memory is not None, shared.args.cpu_memory is not None, shared.args.deepspeed, shared.args.flexgen, shared.is_RWKV]):
         if any(size in shared.model_name.lower() for size in ('13b', '20b', '30b')):
             model = AutoModelForCausalLM.from_pretrained(Path(f"models/{shared.model_name}"), device_map='auto', load_in_8bit=True)
-        if torch.has_mps:
-            model = AutoModelForCausalLM.from_pretrained(
-                Path(f"models/{shared.model_name}"),low_cpu_mem_usage=True,
-                torch_dtype=torch.bfloat16 if shared.args.bf16 else torch.float16
-            )
-            device = torch.device('mps')
-            model = model.to(device)
         else:
-            model = AutoModelForCausalLM.from_pretrained(Path(f"models/{shared.model_name}"), low_cpu_mem_usage=True, torch_dtype=torch.bfloat16 if shared.args.bf16 else torch.float16).cuda()
+            model = AutoModelForCausalLM.from_pretrained(
+                Path(f"models/{shared.model_name}"),
+                low_cpu_mem_usage=True, torch_dtype=torch.bfloat16 if shared.args.bf16 else torch.float16
+            )
+            if torch.has_mps:
+                device = torch.device('mps')
+                model = model.to(device)
+            else:
+                model = model.cuda()
+
 
     # FlexGen
     elif shared.args.flexgen:

From e26763a51017988f3c95feafe3458e1b8ecbb4a3 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 17 Mar 2023 22:56:46 -0300
Subject: [PATCH 75/89] Minor changes

---
 .gitignore                 | 5 ++---
 modules/models.py          | 8 ++------
 modules/text_generation.py | 1 -
 3 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/.gitignore b/.gitignore
index e2017e49..d98b81d8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,6 +9,8 @@ torch-dumps/*
 *pycache*
 */*pycache*
 */*/pycache*
+venv/
+.venv/
 
 settings.json
 img_bot*
@@ -19,6 +21,3 @@ img_me*
 !models/place-your-models-here.txt
 !softprompts/place-your-softprompts-here.txt
 !torch-dumps/place-your-pt-models-here.txt
-
-venv/
-.venv/
diff --git a/modules/models.py b/modules/models.py
index 8fa7307e..f07e738b 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -47,17 +47,13 @@ def load_model(model_name):
         if any(size in shared.model_name.lower() for size in ('13b', '20b', '30b')):
             model = AutoModelForCausalLM.from_pretrained(Path(f"models/{shared.model_name}"), device_map='auto', load_in_8bit=True)
         else:
-            model = AutoModelForCausalLM.from_pretrained(
-                Path(f"models/{shared.model_name}"),
-                low_cpu_mem_usage=True, torch_dtype=torch.bfloat16 if shared.args.bf16 else torch.float16
-            )
+            model = AutoModelForCausalLM.from_pretrained(Path(f"models/{shared.model_name}"), low_cpu_mem_usage=True, torch_dtype=torch.bfloat16 if shared.args.bf16 else torch.float16)
             if torch.has_mps:
                 device = torch.device('mps')
                 model = model.to(device)
             else:
                 model = model.cuda()
 
-
     # FlexGen
     elif shared.args.flexgen:
         # Initialize environment
@@ -106,7 +102,7 @@ def load_model(model_name):
     # Custom
     else:
         params = {"low_cpu_mem_usage": True}
-        if not shared.args.cpu and not torch.cuda.is_available() and not torch.has_mps:
+        if not any((shared.args.cpu, torch.cuda.is_available(), torch.has_mps)):
             print("Warning: torch.cuda.is_available() returned False.\nThis means that no GPU has been detected.\nFalling back to CPU mode.\n")
             shared.args.cpu = True
 
diff --git a/modules/text_generation.py b/modules/text_generation.py
index 3a7bfa6e..1d11de12 100644
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@@ -39,7 +39,6 @@ def encode(prompt, tokens_to_generate=0, add_special_tokens=True):
         else:
             return input_ids.cuda()
 
-
 def decode(output_ids):
     # Open Assistant relies on special tokens like <|endoftext|>
     if re.match('(oasst|galactica)-*', shared.model_name.lower()):

From a7acfa4893956da3138465b8de58fa6165ea070f Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 17 Mar 2023 22:57:46 -0300
Subject: [PATCH 76/89] Update README.md

---
 README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index f5b6faa7..364231ff 100644
--- a/README.md
+++ b/README.md
@@ -67,7 +67,9 @@ conda activate textgen
 | Linux | AMD | `pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm5.4.2` |
 | MacOS + MPS (untested) | Any | `conda install pytorch torchvision torchaudio -c pytorch` |
 
-The up to date commands can be found here: https://pytorch.org/get-started/locally/
+The up to date commands can be found here: https://pytorch.org/get-started/locally/. 
+
+MacOS users, refer to the comments here: https://github.com/oobabooga/text-generation-webui/pull/393
 
 #### 3. Install the web UI
 

From a163807f86c3be072fd28771e50c0df85830ff28 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 18 Mar 2023 03:07:27 -0300
Subject: [PATCH 77/89] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 364231ff..0d16fa35 100644
--- a/README.md
+++ b/README.md
@@ -79,7 +79,7 @@ cd text-generation-webui
 pip install -r requirements.txt
 ```
 
-If you experience bitsandbytes issues on WSL while trying to use `--load-in-8bit`, see this thread: https://github.com/microsoft/WSL/issues/5548#issuecomment-1292858815
+If you experience bitsandbytes issues on WSL while trying to use `--load-in-8bit`, see this comment: https://github.com/TimDettmers/bitsandbytes/issues/156#issuecomment-1462329713
 
 ### Alternative: native Windows installation
 

From 86b99006d9c2aea21aa8d71413560246737e3c57 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 18 Mar 2023 10:27:52 -0300
Subject: [PATCH 78/89] Remove rwkv dependency

---
 requirements.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index fcf000a9..b3a17ea4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,7 +6,6 @@ markdown
 numpy
 peft==0.2.0
 requests
-rwkv==0.4.2
 safetensors==0.3.0
 sentencepiece
 tqdm

From 7c945cfe8ecb38e41ee57c8884d582e0b453bc8a Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 18 Mar 2023 10:55:24 -0300
Subject: [PATCH 79/89] Don't include PeftModel every time

---
 modules/LoRA.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/LoRA.py b/modules/LoRA.py
index b568e57b..f29523d2 100644
--- a/modules/LoRA.py
+++ b/modules/LoRA.py
@@ -1,13 +1,13 @@
 from pathlib import Path
 
-from peft import PeftModel
-
 import modules.shared as shared
 from modules.models import load_model
 
 
 def add_lora_to_model(lora_name):
 
+    from peft import PeftModel
+
     # Is there a more efficient way of returning to the base model?
     if lora_name == "None":
         print("Reloading the model to remove the LoRA...")

From c753261338177a60fe326ef8ef7db9024c4f2cfa Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 18 Mar 2023 10:55:57 -0300
Subject: [PATCH 80/89] Disable stop_at_newline by default

---
 modules/shared.py      | 2 +-
 settings-template.json | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/shared.py b/modules/shared.py
index 488a1e96..2592ace7 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -30,7 +30,7 @@ settings = {
     'name1': 'Person 1',
     'name2': 'Person 2',
     'context': 'This is a conversation between two people.',
-    'stop_at_newline': True,
+    'stop_at_newline': False,
     'chat_prompt_size': 2048,
     'chat_prompt_size_min': 0,
     'chat_prompt_size_max': 2048,
diff --git a/settings-template.json b/settings-template.json
index df7403d6..7a7de7af 100644
--- a/settings-template.json
+++ b/settings-template.json
@@ -5,7 +5,7 @@
     "name1": "Person 1",
     "name2": "Person 2",
     "context": "This is a conversation between two people.",
-    "stop_at_newline": true,
+    "stop_at_newline": false,
     "chat_prompt_size": 2048,
     "chat_prompt_size_min": 0,
     "chat_prompt_size_max": 2048,

From a0b1a30fd563cc80fe653f6ba588ea19bde40fe7 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 18 Mar 2023 11:23:56 -0300
Subject: [PATCH 81/89] Specify torchvision/torchaudio versions

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 0d16fa35..407d8e46 100644
--- a/README.md
+++ b/README.md
@@ -63,7 +63,7 @@ conda activate textgen
 
 | System | GPU | Command |
 |--------|---------|---------|
-| Linux/WSL | NVIDIA | `conda install pytorch torchvision torchaudio pytorch-cuda=11.7 -c pytorch -c nvidia` |
+| Linux/WSL | NVIDIA | `conda install torchvision=0.14.1 torchaudio=0.13.1 pytorch-cuda=11.7 -c pytorch -c nvidia` |
 | Linux | AMD | `pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm5.4.2` |
 | MacOS + MPS (untested) | Any | `conda install pytorch torchvision torchaudio -c pytorch` |
 

From 705f513c4c1209651dac9242608399451ec6c06b Mon Sep 17 00:00:00 2001
From: ThisIsPIRI <thisispiri@gmail.com>
Date: Sat, 18 Mar 2023 23:33:24 +0900
Subject: [PATCH 82/89] Add loras to .gitignore

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index d98b81d8..702bb1eb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@ characters/*
 extensions/silero_tts/outputs/*
 extensions/elevenlabs_tts/outputs/*
 logs/*
+loras/*
 models/*
 softprompts/*
 torch-dumps/*
@@ -18,6 +19,7 @@ img_me*
 
 !characters/Example.json
 !characters/Example.png
+!loras/place-your-loras-here.txt
 !models/place-your-models-here.txt
 !softprompts/place-your-softprompts-here.txt
 !torch-dumps/place-your-pt-models-here.txt

From d2a7fac8ea673ec29c82e41f68b50ac63a6c3d2e Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 18 Mar 2023 11:56:04 -0300
Subject: [PATCH 83/89] Use pip instead of conda for pytorch

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 407d8e46..078671af 100644
--- a/README.md
+++ b/README.md
@@ -63,9 +63,9 @@ conda activate textgen
 
 | System | GPU | Command |
 |--------|---------|---------|
-| Linux/WSL | NVIDIA | `conda install torchvision=0.14.1 torchaudio=0.13.1 pytorch-cuda=11.7 -c pytorch -c nvidia` |
+| Linux/WSL | NVIDIA | `pip3 install torch torchvision torchaudio` |
 | Linux | AMD | `pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm5.4.2` |
-| MacOS + MPS (untested) | Any | `conda install pytorch torchvision torchaudio -c pytorch` |
+| MacOS + MPS (untested) | Any | `pip3 install torch torchvision torchaudio` |
 
 The up to date commands can be found here: https://pytorch.org/get-started/locally/. 
 

From 0cbe2dd7e9614746073cb56233b40e2cc24031ca Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 18 Mar 2023 12:24:54 -0300
Subject: [PATCH 84/89] Update README.md

---
 README.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 078671af..ded9b351 100644
--- a/README.md
+++ b/README.md
@@ -71,6 +71,7 @@ The up to date commands can be found here: https://pytorch.org/get-started/local
 
 MacOS users, refer to the comments here: https://github.com/oobabooga/text-generation-webui/pull/393
 
+
 #### 3. Install the web UI
 
 ```
@@ -79,7 +80,9 @@ cd text-generation-webui
 pip install -r requirements.txt
 ```
 
-If you experience bitsandbytes issues on WSL while trying to use `--load-in-8bit`, see this comment: https://github.com/TimDettmers/bitsandbytes/issues/156#issuecomment-1462329713
+> **Note**
+> 
+> For bitsandbytes and `--load-in-8bit` to work on Linux/WSL, this dirty fix is currently necessary: https://github.com/oobabooga/text-generation-webui/issues/400#issuecomment-1474876859
 
 ### Alternative: native Windows installation
 

From c79fc69e95fbe2aac85ccd414b92b7b3a425bd81 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 19 Mar 2023 10:36:57 -0300
Subject: [PATCH 85/89] Fix the API example with streaming #417

---
 api-example-stream.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/api-example-stream.py b/api-example-stream.py
index add1df41..055d605b 100644
--- a/api-example-stream.py
+++ b/api-example-stream.py
@@ -44,14 +44,14 @@ async def run(context):
                 case "send_hash":
                     await websocket.send(json.dumps({
                         "session_hash": session,
-                        "fn_index": 7
+                        "fn_index": 9
                     }))
                 case "estimation":
                     pass
                 case "send_data":
                     await websocket.send(json.dumps({
                         "session_hash": session,
-                        "fn_index": 7,
+                        "fn_index": 9,
                         "data": [
                             context,
                             params['max_new_tokens'],

From 7073e960933e8c2aad1881d57da1b42303cca528 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 19 Mar 2023 12:05:28 -0300
Subject: [PATCH 86/89] Add back RWKV dependency #98

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index b3a17ea4..e5b3de69 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,6 +6,7 @@ markdown
 numpy
 peft==0.2.0
 requests
+rwkv==0.7.0
 safetensors==0.3.0
 sentencepiece
 tqdm

From a78b6508fcc0f5b597365e7ff0fa1a9f9e43d8ad Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 19 Mar 2023 12:11:35 -0300
Subject: [PATCH 87/89] Make custom LoRAs work by default #385

---
 modules/LoRA.py   | 2 +-
 modules/shared.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/LoRA.py b/modules/LoRA.py
index f29523d2..6915e157 100644
--- a/modules/LoRA.py
+++ b/modules/LoRA.py
@@ -17,6 +17,6 @@ def add_lora_to_model(lora_name):
         print(f"Adding the LoRA {lora_name} to the model...")
 
         params = {}
-        #params['device_map'] = {'': 0}
+        params['device_map'] = {'': 0}
         #params['dtype'] = shared.model.dtype
         shared.model = PeftModel.from_pretrained(shared.model, Path(f"loras/{lora_name}"), **params)
diff --git a/modules/shared.py b/modules/shared.py
index 2592ace7..e3920f22 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -56,7 +56,7 @@ settings = {
     },
     'lora_prompts': {
         'default': 'Common sense questions and answers\n\nQuestion: \nFactual answer:',
-        'alpaca-lora-7b': "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction:\nWrite a poem about the transformers Python library. \nMention the word \"large language models\" in that poem.\n### Response:\n"
+        '(alpaca-lora-7b|alpaca-lora-13b|alpaca-lora-30b)': "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction:\nWrite a poem about the transformers Python library. \nMention the word \"large language models\" in that poem.\n### Response:\n"
     }
 }
 

From 257edf5f56ebe9765135509e3cf4833207c34138 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 19 Mar 2023 12:30:51 -0300
Subject: [PATCH 88/89] Make the Default preset more reasonable

Credits: anonymous 4chan user who got it off
"some twitter post or something someone linked,
who even knows anymore"
---
 presets/Default.txt | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/presets/Default.txt b/presets/Default.txt
index 9f0983ec..d5283836 100644
--- a/presets/Default.txt
+++ b/presets/Default.txt
@@ -1,12 +1,7 @@
 do_sample=True
-temperature=1
-top_p=1
-typical_p=1
-repetition_penalty=1
-top_k=50
-num_beams=1
-penalty_alpha=0
-min_length=0
-length_penalty=1
-no_repeat_ngram_size=0
+top_p=0.5
+top_k=40
+temperature=0.7
+repetition_penalty=1.2
+typical_p=1.0
 early_stopping=False

From 4d701a6eb902919f35da40240d74a079d7a53df6 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 19 Mar 2023 12:51:47 -0300
Subject: [PATCH 89/89] Create a mirror for the preset menu

---
 presets/Individual Today.txt | 6 ------
 server.py                    | 9 +++++++--
 2 files changed, 7 insertions(+), 8 deletions(-)
 delete mode 100644 presets/Individual Today.txt

diff --git a/presets/Individual Today.txt b/presets/Individual Today.txt
deleted file mode 100644
index f40b879c..00000000
--- a/presets/Individual Today.txt	
+++ /dev/null
@@ -1,6 +0,0 @@
-do_sample=True
-top_p=0.9
-top_k=50
-temperature=1.39
-repetition_penalty=1.08
-typical_p=0.2
diff --git a/server.py b/server.py
index 1d324fba..060f09d5 100644
--- a/server.py
+++ b/server.py
@@ -102,7 +102,7 @@ def load_preset_values(preset_menu, return_dict=False):
     if return_dict:
         return generate_params
     else:
-        return generate_params['do_sample'], generate_params['temperature'], generate_params['top_p'], generate_params['typical_p'], generate_params['repetition_penalty'], generate_params['encoder_repetition_penalty'], generate_params['top_k'], generate_params['min_length'], generate_params['no_repeat_ngram_size'], generate_params['num_beams'], generate_params['penalty_alpha'], generate_params['length_penalty'], generate_params['early_stopping']
+        return preset_menu, generate_params['do_sample'], generate_params['temperature'], generate_params['top_p'], generate_params['typical_p'], generate_params['repetition_penalty'], generate_params['encoder_repetition_penalty'], generate_params['top_k'], generate_params['min_length'], generate_params['no_repeat_ngram_size'], generate_params['num_beams'], generate_params['penalty_alpha'], generate_params['length_penalty'], generate_params['early_stopping']
 
 def upload_soft_prompt(file):
     with zipfile.ZipFile(io.BytesIO(file)) as zf:
@@ -130,6 +130,10 @@ def create_model_and_preset_menus():
 def create_settings_menus(default_preset):
     generate_params = load_preset_values(default_preset if not shared.args.flexgen else 'Naive', return_dict=True)
 
+    with gr.Row():
+        shared.gradio['preset_menu_mirror'] = gr.Dropdown(choices=available_presets, value=default_preset if not shared.args.flexgen else 'Naive', label='Generation parameters preset')
+        ui.create_refresh_button(shared.gradio['preset_menu_mirror'], lambda : None, lambda : {'choices': get_available_presets()}, 'refresh-button')
+
     with gr.Row():
         with gr.Column():
             with gr.Box():
@@ -174,7 +178,8 @@ def create_settings_menus(default_preset):
             shared.gradio['upload_softprompt'] = gr.File(type='binary', file_types=['.zip'])
 
     shared.gradio['model_menu'].change(load_model_wrapper, [shared.gradio['model_menu']], [shared.gradio['model_menu']], show_progress=True)
-    shared.gradio['preset_menu'].change(load_preset_values, [shared.gradio['preset_menu']], [shared.gradio['do_sample'], shared.gradio['temperature'], shared.gradio['top_p'], shared.gradio['typical_p'], shared.gradio['repetition_penalty'], shared.gradio['encoder_repetition_penalty'], shared.gradio['top_k'], shared.gradio['min_length'], shared.gradio['no_repeat_ngram_size'], shared.gradio['num_beams'], shared.gradio['penalty_alpha'], shared.gradio['length_penalty'], shared.gradio['early_stopping']])
+    shared.gradio['preset_menu'].change(load_preset_values, [shared.gradio['preset_menu']], [shared.gradio[k] for k in ['preset_menu_mirror', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']])
+    shared.gradio['preset_menu_mirror'].change(load_preset_values, [shared.gradio['preset_menu_mirror']], [shared.gradio[k] for k in ['preset_menu', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']])
     shared.gradio['lora_menu'].change(load_lora_wrapper, [shared.gradio['lora_menu']], [shared.gradio['lora_menu'], shared.gradio['textbox']], show_progress=True)
     shared.gradio['softprompts_menu'].change(load_soft_prompt, [shared.gradio['softprompts_menu']], [shared.gradio['softprompts_menu']], show_progress=True)
     shared.gradio['upload_softprompt'].upload(upload_soft_prompt, [shared.gradio['upload_softprompt']], [shared.gradio['softprompts_menu']])