From 24e86bb21b7b8e919c1d2ca665b32ffd7511ecc8 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 3 Mar 2024 12:14:48 -0800
Subject: [PATCH 01/25] Bump llama-cpp-python to 0.2.55

---
 requirements.txt                 | 24 ++++++++++++------------
 requirements_amd.txt             | 12 ++++++------
 requirements_amd_noavx2.txt      |  8 ++++----
 requirements_apple_intel.txt     | 12 ++++++------
 requirements_apple_silicon.txt   | 16 ++++++++--------
 requirements_cpu_only.txt        |  8 ++++----
 requirements_cpu_only_noavx2.txt |  8 ++++----
 requirements_noavx2.txt          | 24 ++++++++++++------------
 8 files changed, 56 insertions(+), 56 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index abbd1a62..9051386a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -28,22 +28,22 @@ bitsandbytes==0.42.*; platform_system != "Windows"
 https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
 
 # llama-cpp-python (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.52+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.52+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.52+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.52+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 
 # llama-cpp-python (CUDA, no tensor cores)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.52+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.52+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.52+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.52+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.55+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.55+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.55+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.55+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 
 # llama-cpp-python (CUDA, tensor cores)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.52+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.52+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.52+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.52+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.55+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.55+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.55+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.55+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 
 # CUDA wheels
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements_amd.txt b/requirements_amd.txt
index 56c1bb03..3ddf54e5 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -24,14 +24,14 @@ tqdm
 wandb
 
 # llama-cpp-python (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.52+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.52+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.52+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.52+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 
 # AMD wheels
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.52+rocm5.6.1-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.52+rocm5.6.1-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.55+rocm5.6.1-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.55+rocm5.6.1-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/oobabooga/exllamav2/releases/download/v0.0.14/exllamav2-0.0.14+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index 58b3fd1a..7017734f 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -24,10 +24,10 @@ tqdm
 wandb
 
 # llama-cpp-python (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.52+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.52+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.52+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.52+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 
 # AMD wheels
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt
index cd9eaacb..1cdf8f52 100644
--- a/requirements_apple_intel.txt
+++ b/requirements_apple_intel.txt
@@ -24,10 +24,10 @@ tqdm
 wandb
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.52-cp311-cp311-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.52-cp310-cp310-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.52-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.52-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.52-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.52-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.55-cp311-cp311-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.55-cp310-cp310-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.55-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.55-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.55-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.55-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
 https://github.com/oobabooga/exllamav2/releases/download/v0.0.14/exllamav2-0.0.14-py3-none-any.whl
diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt
index c0faebc0..94334156 100644
--- a/requirements_apple_silicon.txt
+++ b/requirements_apple_silicon.txt
@@ -24,12 +24,12 @@ tqdm
 wandb
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.52-cp311-cp311-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.52-cp310-cp310-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.52-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.52-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.52-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.52-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.52-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.52-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.55-cp311-cp311-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.55-cp310-cp310-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.55-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.55-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.55-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.55-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.55-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.55-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
 https://github.com/oobabooga/exllamav2/releases/download/v0.0.14/exllamav2-0.0.14-py3-none-any.whl
diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt
index 19286bf1..32a0ff3e 100644
--- a/requirements_cpu_only.txt
+++ b/requirements_cpu_only.txt
@@ -24,7 +24,7 @@ tqdm
 wandb
 
 # llama-cpp-python (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.52+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.52+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.52+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.52+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt
index a71e4a7c..fadc3e81 100644
--- a/requirements_cpu_only_noavx2.txt
+++ b/requirements_cpu_only_noavx2.txt
@@ -24,7 +24,7 @@ tqdm
 wandb
 
 # llama-cpp-python (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.52+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.52+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.52+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.52+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index 8244aa5f..a5b9bc10 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -28,22 +28,22 @@ bitsandbytes==0.42.*; platform_system != "Windows"
 https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
 
 # llama-cpp-python (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.52+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.52+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.52+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.52+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 
 # llama-cpp-python (CUDA, no tensor cores)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.52+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.52+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.52+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.52+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.55+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.55+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.55+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.55+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 
 # llama-cpp-python (CUDA, tensor cores)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.52+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.52+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.52+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.52+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.55+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.55+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.55+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.55+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 
 # CUDA wheels
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"

From 70047a5c57d2392bf46b6148e34866f37526c36e Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 3 Mar 2024 13:19:27 -0800
Subject: [PATCH 02/25] Bump bitsandytes to 0.42.0 on Windows

---
 requirements.txt        | 2 +-
 requirements_noavx2.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 9051386a..7e7c2a3f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -25,7 +25,7 @@ wandb
 
 # bitsandbytes
 bitsandbytes==0.42.*; platform_system != "Windows"
-https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.42.0-py3-none-win_amd64.whl; platform_system == "Windows"
 
 # llama-cpp-python (CPU only, AVX2)
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index a5b9bc10..40aa3c81 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -25,7 +25,7 @@ wandb
 
 # bitsandbytes
 bitsandbytes==0.42.*; platform_system != "Windows"
-https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.42.0-py3-none-win_amd64.whl; platform_system == "Windows"
 
 # llama-cpp-python (CPU only, no AVX2)
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"

From 8bd4960d055b717416d5e0f1bef51767a16065ad Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 3 Mar 2024 19:40:32 -0300
Subject: [PATCH 03/25] Update PyTorch to 2.2 (also update flash-attn to 2.5.6)
 (#5618)

---
 README.md                   |  14 ++---
 one_click.py                | 100 ++++++++++++++++++++++++++----------
 requirements.txt            |  18 +++----
 requirements_amd.txt        |   6 +--
 requirements_amd_noavx2.txt |   6 +--
 requirements_noavx2.txt     |  18 +++----
 6 files changed, 103 insertions(+), 59 deletions(-)

diff --git a/README.md b/README.md
index 686acac1..ca97302d 100644
--- a/README.md
+++ b/README.md
@@ -75,12 +75,12 @@ conda activate textgen
 
 | System | GPU | Command |
 |--------|---------|---------|
-| Linux/WSL | NVIDIA | `pip3 install torch==2.1.* torchvision==0.16.* torchaudio==2.1.* --index-url https://download.pytorch.org/whl/cu121` |
-| Linux/WSL | CPU only | `pip3 install torch==2.1.* torchvision==0.16.* torchaudio==2.1.* --index-url https://download.pytorch.org/whl/cpu` |
-| Linux | AMD | `pip3 install torch==2.1.* torchvision==0.16.* torchaudio==2.1.* --index-url https://download.pytorch.org/whl/rocm5.6` |
-| MacOS + MPS | Any | `pip3 install torch==2.1.* torchvision==0.16.* torchaudio==2.1.*` |
-| Windows | NVIDIA | `pip3 install torch==2.1.* torchvision==0.16.* torchaudio==2.1.* --index-url https://download.pytorch.org/whl/cu121` |
-| Windows | CPU only | `pip3 install torch==2.1.* torchvision==0.16.* torchaudio==2.1.*` |
+| Linux/WSL | NVIDIA | `pip3 install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cu121` |
+| Linux/WSL | CPU only | `pip3 install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cpu` |
+| Linux | AMD | `pip3 install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/rocm5.6` |
+| MacOS + MPS | Any | `pip3 install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1` |
+| Windows | NVIDIA | `pip3 install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cu121` |
+| Windows | CPU only | `pip3 install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1` |
 
 The up-to-date commands can be found here: https://pytorch.org/get-started/locally/.
 
@@ -145,7 +145,7 @@ Then browse to
 1) For Kepler GPUs and older, you will need to install CUDA 11.8 instead of 12:
 
 ```
-pip3 install torch==2.1.* torchvision==0.16.* torchaudio==2.1.* --index-url https://download.pytorch.org/whl/cu118
+pip3 install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cu118
 conda install -y -c "nvidia/label/cuda-11.8.0" cuda-runtime
 ```
 
diff --git a/one_click.py b/one_click.py
index 105d6519..4aa5595c 100644
--- a/one_click.py
+++ b/one_click.py
@@ -9,14 +9,21 @@ import site
 import subprocess
 import sys
 
-script_dir = os.getcwd()
-conda_env_path = os.path.join(script_dir, "installer_files", "env")
-
 # Remove the '# ' from the following lines as needed for your AMD GPU on Linux
 # os.environ["ROCM_PATH"] = '/opt/rocm'
 # os.environ["HSA_OVERRIDE_GFX_VERSION"] = '10.3.0'
 # os.environ["HCC_AMDGPU_TARGET"] = 'gfx1030'
 
+
+# Define the required PyTorch version
+TORCH_VERSION = "2.2.1"
+TORCHVISION_VERSION = "0.17.1"
+TORCHAUDIO_VERSION = "2.2.1"
+
+# Environment
+script_dir = os.getcwd()
+conda_env_path = os.path.join(script_dir, "installer_files", "env")
+
 # Command-line flags
 cmd_flags_path = os.path.join(script_dir, "CMD_FLAGS.txt")
 if os.path.exists(cmd_flags_path):
@@ -86,13 +93,42 @@ def torch_version():
 
     if site_packages_path:
         torch_version_file = open(os.path.join(site_packages_path, 'torch', 'version.py')).read().splitlines()
-        torver = [line for line in torch_version_file if '__version__' in line][0].split('__version__ = ')[1].strip("'")
+        torver = [line for line in torch_version_file if line.startswith('__version__')][0].split('__version__ = ')[1].strip("'")
     else:
         from torch import __version__ as torver
 
     return torver
 
 
+def update_pytorch():
+    print_big_message("Checking for PyTorch updates")
+
+    torver = torch_version()
+    is_cuda = '+cu' in torver
+    is_cuda118 = '+cu118' in torver  # 2.1.0+cu118
+    is_rocm = '+rocm' in torver  # 2.0.1+rocm5.4.2
+    is_intel = '+cxx11' in torver  # 2.0.1a0+cxx11.abi
+    is_cpu = '+cpu' in torver  # 2.0.1+cpu
+
+    install_pytorch = f"python -m pip install --upgrade torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION} "
+
+    if is_cuda118:
+        install_pytorch += "--index-url https://download.pytorch.org/whl/cu118"
+    elif is_cuda:
+        install_pytorch += "--index-url https://download.pytorch.org/whl/cu121"
+    elif is_rocm:
+        install_pytorch += "--index-url https://download.pytorch.org/whl/rocm5.6"
+    elif is_cpu:
+        install_pytorch += "--index-url https://download.pytorch.org/whl/cpu"
+    elif is_intel:
+        if is_linux():
+            install_pytorch = "python -m pip install --upgrade torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
+        else:
+            install_pytorch = "python -m pip install --upgrade torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
+
+    run_cmd(f"{install_pytorch}", assert_success=True, environment=True)
+
+
 def is_installed():
     site_packages_path = None
     for sitedir in site.getsitepackages():
@@ -166,7 +202,8 @@ def run_cmd(cmd, assert_success=False, environment=False, capture_output=False,
 
 
 def install_webui():
-    # Select your GPU, or choose to run in CPU mode
+
+    # Ask the user for the GPU vendor
     if "GPU_CHOICE" in os.environ:
         choice = os.environ["GPU_CHOICE"].upper()
         print_big_message(f"Selected GPU choice \"{choice}\" based on the GPU_CHOICE environment variable.")
@@ -195,23 +232,20 @@ def install_webui():
     }
 
     selected_gpu = gpu_choice_to_name[choice]
+    use_cuda118 = "N"
 
+    # Write a flag to CMD_FLAGS.txt for CPU mode
     if selected_gpu == "NONE":
         with open(cmd_flags_path, 'r+') as cmd_flags_file:
             if "--cpu" not in cmd_flags_file.read():
                 print_big_message("Adding the --cpu flag to CMD_FLAGS.txt.")
                 cmd_flags_file.write("\n--cpu")
 
-    # Find the proper Pytorch installation command
-    install_git = "conda install -y -k ninja git"
-    install_pytorch = "python -m pip install torch==2.1.* torchvision==0.16.* torchaudio==2.1.* "
-
-    use_cuda118 = "N"
-    if any((is_windows(), is_linux())) and selected_gpu == "NVIDIA":
+    # Check if the user wants CUDA 11.8
+    elif any((is_windows(), is_linux())) and selected_gpu == "NVIDIA":
         if "USE_CUDA118" in os.environ:
             use_cuda118 = "Y" if os.environ.get("USE_CUDA118", "").lower() in ("yes", "y", "true", "1", "t", "on") else "N"
         else:
-            # Ask for CUDA version if using NVIDIA
             print("\nDo you want to use CUDA 11.8 instead of 12.1? Only choose this option if your GPU is very old (Kepler or older).\nFor RTX and GTX series GPUs, say \"N\". If unsure, say \"N\".\n")
             use_cuda118 = input("Input (Y/N)> ").upper().strip('"\'').strip()
             while use_cuda118 not in 'YN':
@@ -220,29 +254,35 @@ def install_webui():
 
         if use_cuda118 == 'Y':
             print("CUDA: 11.8")
-            install_pytorch += "--index-url https://download.pytorch.org/whl/cu118"
         else:
             print("CUDA: 12.1")
-            install_pytorch += "--index-url https://download.pytorch.org/whl/cu121"
-    elif not is_macos() and selected_gpu == "AMD":
-        if is_linux():
-            install_pytorch += "--index-url https://download.pytorch.org/whl/rocm5.6"
+
+    # No PyTorch for AMD on Windows (?)
+    elif is_windows() and selected_gpu == "AMD":
+        print("PyTorch setup on Windows is not implemented yet. Exiting...")
+        sys.exit(1)
+
+    # Find the Pytorch installation command
+    install_pytorch = f"python -m pip install torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION} "
+
+    if selected_gpu == "NVIDIA":
+        if use_cuda118 == 'Y':
+            install_pytorch += "--index-url https://download.pytorch.org/whl/cu118"
         else:
-            print("AMD GPUs are only supported on Linux. Exiting...")
-            sys.exit(1)
-    elif is_linux() and selected_gpu in ["APPLE", "NONE"]:
+            install_pytorch += "--index-url https://download.pytorch.org/whl/cu121"
+    elif selected_gpu == "AMD":
+        install_pytorch += "--index-url https://download.pytorch.org/whl/rocm5.6"
+    elif selected_gpu in ["APPLE", "NONE"]:
         install_pytorch += "--index-url https://download.pytorch.org/whl/cpu"
     elif selected_gpu == "INTEL":
-        install_pytorch = "python -m pip install torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
+        if is_linux():
+            install_pytorch = "python -m pip install torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
+        else:
+            install_pytorch = "python -m pip install torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
 
     # Install Git and then Pytorch
     print_big_message("Installing PyTorch.")
-    run_cmd(f"{install_git} && {install_pytorch} && python -m pip install py-cpuinfo==9.0.0", assert_success=True, environment=True)
-
-    # Install CUDA libraries (this wasn't necessary for Pytorch before...)
-    if selected_gpu == "NVIDIA":
-        print_big_message("Installing the CUDA runtime libraries.")
-        run_cmd(f"conda install -y -c \"nvidia/label/{'cuda-12.1.1' if use_cuda118 == 'N' else 'cuda-11.8.0'}\" cuda-runtime", assert_success=True, environment=True)
+    run_cmd(f"conda install -y -k ninja git && {install_pytorch} && python -m pip install py-cpuinfo==9.0.0", assert_success=True, environment=True)
 
     if selected_gpu == "INTEL":
         # Install oneAPI dependencies via conda
@@ -295,7 +335,11 @@ def update_requirements(initial_installation=False):
     elif initial_installation:
         print_big_message("Will not install extensions due to INSTALL_EXTENSIONS environment variable.")
 
-    # Detect the Python and PyTorch versions
+    # Update PyTorch
+    if not initial_installation:
+        update_pytorch()
+
+    # Detect the PyTorch version
     torver = torch_version()
     is_cuda = '+cu' in torver
     is_cuda118 = '+cu118' in torver  # 2.1.0+cu118
diff --git a/requirements.txt b/requirements.txt
index 7e7c2a3f..4a608081 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -50,15 +50,15 @@ https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu1
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.0.14/exllamav2-0.0.14+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.0.14/exllamav2-0.0.14+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.0.14/exllamav2-0.0.14+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.0.14/exllamav2-0.0.14+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.0.14/exllamav2-0.0.14-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
-https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu122torch2.1cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu122torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.14.1/exllamav2-0.0.14.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.14.1/exllamav2-0.0.14.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.14.1/exllamav2-0.0.14.1+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.14.1/exllamav2-0.0.14.1+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.14.1/exllamav2-0.0.14.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
+https://github.com/oobabooga/flash-attention/releases/download/v2.5.6/flash_attn-2.5.6+cu122torch2.2.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/flash-attention/releases/download/v2.5.6/flash_attn-2.5.6+cu122torch2.2.0cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.6/flash_attn-2.5.6+cu122torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.6/flash_attn-2.5.6+cu122torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
diff --git a/requirements_amd.txt b/requirements_amd.txt
index 3ddf54e5..dfb132de 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -34,8 +34,8 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/ro
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.55+rocm5.6.1-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.0.14/exllamav2-0.0.14+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.0.14/exllamav2-0.0.14+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.0.14/exllamav2-0.0.14-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.14.1/exllamav2-0.0.14.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.14.1/exllamav2-0.0.14.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.14.1/exllamav2-0.0.14.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
 https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index 7017734f..d04c8158 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -32,8 +32,8 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp
 # AMD wheels
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.0.14/exllamav2-0.0.14+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.0.14/exllamav2-0.0.14+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.0.14/exllamav2-0.0.14-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.14.1/exllamav2-0.0.14.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.14.1/exllamav2-0.0.14.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.14.1/exllamav2-0.0.14.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
 https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index 40aa3c81..9077bde3 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -50,15 +50,15 @@ https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu1
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.0.14/exllamav2-0.0.14+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.0.14/exllamav2-0.0.14+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.0.14/exllamav2-0.0.14+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.0.14/exllamav2-0.0.14+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.0.14/exllamav2-0.0.14-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
-https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu122torch2.1cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu122torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.14.1/exllamav2-0.0.14.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.14.1/exllamav2-0.0.14.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.14.1/exllamav2-0.0.14.1+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.14.1/exllamav2-0.0.14.1+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.14.1/exllamav2-0.0.14.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
+https://github.com/oobabooga/flash-attention/releases/download/v2.5.6/flash_attn-2.5.6+cu122torch2.2.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/flash-attention/releases/download/v2.5.6/flash_attn-2.5.6+cu122torch2.2.0cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.6/flash_attn-2.5.6+cu122torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.6/flash_attn-2.5.6+cu122torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"

From fa4ce0eee82c42a25629fdf1ff56863b72c5a912 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 3 Mar 2024 17:42:59 -0800
Subject: [PATCH 04/25] One-click installer: minor change to CMD_FLAGS.txt in
 CPU mode

---
 one_click.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/one_click.py b/one_click.py
index 4aa5595c..92663df4 100644
--- a/one_click.py
+++ b/one_click.py
@@ -239,7 +239,7 @@ def install_webui():
         with open(cmd_flags_path, 'r+') as cmd_flags_file:
             if "--cpu" not in cmd_flags_file.read():
                 print_big_message("Adding the --cpu flag to CMD_FLAGS.txt.")
-                cmd_flags_file.write("\n--cpu")
+                cmd_flags_file.write("\n--cpu\n")
 
     # Check if the user wants CUDA 11.8
     elif any((is_windows(), is_linux())) and selected_gpu == "NVIDIA":

From 527ba981056c5e94c153ae9bf50081410fdc6059 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 4 Mar 2024 04:46:39 -0300
Subject: [PATCH 05/25] Do not install extensions requirements by default
 (#5621)

---
 Colab-TextGen-GPU.ipynb            |  1 -
 README.md                          |  3 +-
 docker/amd/Dockerfile              |  2 +-
 docker/cpu/Dockerfile              |  2 +-
 docker/intel/Dockerfile            |  2 +-
 docker/nvidia/Dockerfile           |  2 +-
 docs/12 - OpenAI API.md            |  6 ----
 extensions/openai/requirements.txt |  4 ---
 extensions_reqs_linux.sh           | 26 +++++++++++++++++
 extensions_reqs_macos.sh           | 26 +++++++++++++++++
 extensions_reqs_windows.bat        | 37 ++++++++++++++++++++++++
 extensions_reqs_wsl.sh             | 11 +++++++
 one_click.py                       | 46 ++++++++++++------------------
 requirements.txt                   |  6 ++++
 requirements_amd.txt               |  6 ++++
 requirements_amd_noavx2.txt        |  6 ++++
 requirements_apple_intel.txt       |  6 ++++
 requirements_apple_silicon.txt     |  6 ++++
 requirements_cpu_only.txt          |  6 ++++
 requirements_cpu_only_noavx2.txt   |  6 ++++
 requirements_noavx2.txt            |  6 ++++
 requirements_nowheels.txt          |  6 ++++
 wsl.sh                             |  1 +
 23 files changed, 180 insertions(+), 43 deletions(-)
 delete mode 100644 extensions/openai/requirements.txt
 create mode 100755 extensions_reqs_linux.sh
 create mode 100755 extensions_reqs_macos.sh
 create mode 100755 extensions_reqs_windows.bat
 create mode 100755 extensions_reqs_wsl.sh

diff --git a/Colab-TextGen-GPU.ipynb b/Colab-TextGen-GPU.ipynb
index bd46992f..53f60986 100644
--- a/Colab-TextGen-GPU.ipynb
+++ b/Colab-TextGen-GPU.ipynb
@@ -72,7 +72,6 @@
         "  with open('temp_requirements.txt', 'w') as file:\n",
         "      file.write('\\n'.join(textgen_requirements))\n",
         "\n",
-        "  !pip install -r extensions/openai/requirements.txt --upgrade\n",
         "  !pip install -r temp_requirements.txt --upgrade\n",
         "\n",
         "  print(\"\\033[1;32;1m\\n --> If you see a warning about \\\"previously imported packages\\\", just ignore it.\\033[0;37;0m\")\n",
diff --git a/README.md b/README.md
index ca97302d..09216397 100644
--- a/README.md
+++ b/README.md
@@ -46,8 +46,9 @@ The script uses Miniconda to set up a Conda environment in the `installer_files`
 If you ever need to install something manually in the `installer_files` environment, you can launch an interactive shell using the cmd script: `cmd_linux.sh`, `cmd_windows.bat`, `cmd_macos.sh`, or `cmd_wsl.bat`.
 
 * There is no need to run any of those scripts (`start_`, `update_`, or `cmd_`) as admin/root.
+* To install the requirements for extensions, you can use the `extensions_reqs` script for your OS. At the end, this script will install the main requirements for the project to make sure that they take precedence in case of version conflicts.
 * For additional instructions about AMD and WSL setup, consult [the documentation](https://github.com/oobabooga/text-generation-webui/wiki).
-* For automated installation, you can use the `GPU_CHOICE`, `USE_CUDA118`, `LAUNCH_AFTER_INSTALL`, and `INSTALL_EXTENSIONS` environment variables. For instance: `GPU_CHOICE=A USE_CUDA118=FALSE LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=FALSE ./start_linux.sh`.
+* For automated installation, you can use the `GPU_CHOICE`, `USE_CUDA118`, and `LAUNCH_AFTER_INSTALL` environment variables. For instance: `GPU_CHOICE=A USE_CUDA118=FALSE LAUNCH_AFTER_INSTALL=FALSE ./start_linux.sh`.
 
 ### Manual installation using Conda
 
diff --git a/docker/amd/Dockerfile b/docker/amd/Dockerfile
index 365e88e3..000303d4 100644
--- a/docker/amd/Dockerfile
+++ b/docker/amd/Dockerfile
@@ -13,7 +13,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
 WORKDIR /home/app/
 RUN git clone https://github.com/oobabooga/text-generation-webui.git 
 WORKDIR /home/app/text-generation-webui
-RUN GPU_CHOICE=B USE_CUDA118=FALSE LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose
+RUN GPU_CHOICE=B USE_CUDA118=FALSE LAUNCH_AFTER_INSTALL=FALSE ./start_linux.sh --verbose
 COPY CMD_FLAGS.txt /home/app/text-generation-webui/
 EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005}
 WORKDIR /home/app/text-generation-webui
diff --git a/docker/cpu/Dockerfile b/docker/cpu/Dockerfile
index 04ccf94a..472fc652 100644
--- a/docker/cpu/Dockerfile
+++ b/docker/cpu/Dockerfile
@@ -17,7 +17,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
 WORKDIR /home/app/
 RUN git clone https://github.com/oobabooga/text-generation-webui.git 
 WORKDIR /home/app/text-generation-webui
-RUN GPU_CHOICE=N USE_CUDA118=FALSE LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose
+RUN GPU_CHOICE=N USE_CUDA118=FALSE LAUNCH_AFTER_INSTALL=FALSE ./start_linux.sh --verbose
 COPY CMD_FLAGS.txt /home/app/text-generation-webui/
 EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005}
 # set umask to ensure group read / write at runtime
diff --git a/docker/intel/Dockerfile b/docker/intel/Dockerfile
index bc67a185..53fdf25b 100644
--- a/docker/intel/Dockerfile
+++ b/docker/intel/Dockerfile
@@ -13,7 +13,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
 WORKDIR /home/app/
 RUN git clone https://github.com/oobabooga/text-generation-webui.git 
 WORKDIR /home/app/text-generation-webui
-RUN GPU_CHOICE=D USE_CUDA118=FALSE LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose
+RUN GPU_CHOICE=D USE_CUDA118=FALSE LAUNCH_AFTER_INSTALL=FALSE ./start_linux.sh --verbose
 COPY CMD_FLAGS.txt /home/app/text-generation-webui/
 EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005}
 # set umask to ensure group read / write at runtime
diff --git a/docker/nvidia/Dockerfile b/docker/nvidia/Dockerfile
index ca17c17b..1414bd38 100644
--- a/docker/nvidia/Dockerfile
+++ b/docker/nvidia/Dockerfile
@@ -13,7 +13,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
 WORKDIR /home/app/
 RUN git clone https://github.com/oobabooga/text-generation-webui.git 
 WORKDIR /home/app/text-generation-webui
-RUN GPU_CHOICE=A USE_CUDA118=FALSE LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose
+RUN GPU_CHOICE=A USE_CUDA118=FALSE LAUNCH_AFTER_INSTALL=FALSE ./start_linux.sh --verbose
 COPY CMD_FLAGS.txt /home/app/text-generation-webui/
 EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005}
 WORKDIR /home/app/text-generation-webui
diff --git a/docs/12 - OpenAI API.md b/docs/12 - OpenAI API.md
index eb6bd468..fee57d33 100644
--- a/docs/12 - OpenAI API.md	
+++ b/docs/12 - OpenAI API.md	
@@ -7,12 +7,6 @@ The main API for this project is meant to be a drop-in replacement to the OpenAI
 * It doesn't connect to OpenAI.
 * It doesn't use the openai-python library.
 
-If you did not use the one-click installers, you may need to install the requirements first:
-
-```
-pip install -r extensions/openai/requirements.txt
-```
-
 ### Starting the API
 
 Add `--api` to your command-line flags.
diff --git a/extensions/openai/requirements.txt b/extensions/openai/requirements.txt
deleted file mode 100644
index e5d6d797..00000000
--- a/extensions/openai/requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-SpeechRecognition==3.10.0
-flask_cloudflared==0.0.14
-sse-starlette==1.6.5
-tiktoken
diff --git a/extensions_reqs_linux.sh b/extensions_reqs_linux.sh
new file mode 100755
index 00000000..ea0c3773
--- /dev/null
+++ b/extensions_reqs_linux.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+cd "$(dirname "${BASH_SOURCE[0]}")"
+
+if [[ "$(pwd)" =~ " " ]]; then echo This script relies on Miniconda which can not be silently installed under a path with spaces. && exit; fi
+
+# deactivate existing conda envs as needed to avoid conflicts
+{ conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null
+
+# config
+CONDA_ROOT_PREFIX="$(pwd)/installer_files/conda"
+INSTALL_ENV_DIR="$(pwd)/installer_files/env"
+
+# environment isolation
+export PYTHONNOUSERSITE=1
+unset PYTHONPATH
+unset PYTHONHOME
+export CUDA_PATH="$INSTALL_ENV_DIR"
+export CUDA_HOME="$CUDA_PATH"
+
+# activate installer env
+source "$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh" # otherwise conda complains about 'shell not initialized' (needed when running in a script)
+conda activate "$INSTALL_ENV_DIR"
+
+# update installer env
+python one_click.py --install-extensions && echo -e "\nDone!"
diff --git a/extensions_reqs_macos.sh b/extensions_reqs_macos.sh
new file mode 100755
index 00000000..ea0c3773
--- /dev/null
+++ b/extensions_reqs_macos.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+cd "$(dirname "${BASH_SOURCE[0]}")"
+
+if [[ "$(pwd)" =~ " " ]]; then echo This script relies on Miniconda which can not be silently installed under a path with spaces. && exit; fi
+
+# deactivate existing conda envs as needed to avoid conflicts
+{ conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null
+
+# config
+CONDA_ROOT_PREFIX="$(pwd)/installer_files/conda"
+INSTALL_ENV_DIR="$(pwd)/installer_files/env"
+
+# environment isolation
+export PYTHONNOUSERSITE=1
+unset PYTHONPATH
+unset PYTHONHOME
+export CUDA_PATH="$INSTALL_ENV_DIR"
+export CUDA_HOME="$CUDA_PATH"
+
+# activate installer env
+source "$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh" # otherwise conda complains about 'shell not initialized' (needed when running in a script)
+conda activate "$INSTALL_ENV_DIR"
+
+# update installer env
+python one_click.py --install-extensions && echo -e "\nDone!"
diff --git a/extensions_reqs_windows.bat b/extensions_reqs_windows.bat
new file mode 100755
index 00000000..a3fb1f65
--- /dev/null
+++ b/extensions_reqs_windows.bat
@@ -0,0 +1,37 @@
+@echo off
+
+cd /D "%~dp0"
+
+set PATH=%PATH%;%SystemRoot%\system32
+
+echo "%CD%"| findstr /C:" " >nul && echo This script relies on Miniconda which can not be silently installed under a path with spaces. && goto end
+
+@rem fix failed install when installing to a separate drive
+set TMP=%cd%\installer_files
+set TEMP=%cd%\installer_files
+
+@rem deactivate existing conda envs as needed to avoid conflicts
+(call conda deactivate && call conda deactivate && call conda deactivate) 2>nul
+
+@rem config
+set CONDA_ROOT_PREFIX=%cd%\installer_files\conda
+set INSTALL_ENV_DIR=%cd%\installer_files\env
+
+@rem environment isolation
+set PYTHONNOUSERSITE=1
+set PYTHONPATH=
+set PYTHONHOME=
+set "CUDA_PATH=%INSTALL_ENV_DIR%"
+set "CUDA_HOME=%CUDA_PATH%"
+
+@rem activate installer env
+call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%" || ( echo. && echo Miniconda hook not found. && goto end )
+
+@rem update installer env
+call python one_click.py --install-extensions && (
+    echo.
+    echo Done!
+)
+
+:end
+pause
diff --git a/extensions_reqs_wsl.sh b/extensions_reqs_wsl.sh
new file mode 100755
index 00000000..450c8abb
--- /dev/null
+++ b/extensions_reqs_wsl.sh
@@ -0,0 +1,11 @@
+@echo off
+
+cd /D "%~dp0"
+
+set PATH=%PATH%;%SystemRoot%\system32
+
+@rem sed -i 's/\x0D$//' ./wsl.sh converts newlines to unix format in the wsl script   calling wsl.sh with 'update' will run updater
+call wsl -e bash -lic "sed -i 's/\x0D$//' ./wsl.sh; source ./wsl.sh install-extensions"
+
+:end
+pause
diff --git a/one_click.py b/one_click.py
index 92663df4..106e86d9 100644
--- a/one_click.py
+++ b/one_click.py
@@ -246,7 +246,7 @@ def install_webui():
         if "USE_CUDA118" in os.environ:
             use_cuda118 = "Y" if os.environ.get("USE_CUDA118", "").lower() in ("yes", "y", "true", "1", "t", "on") else "N"
         else:
-            print("\nDo you want to use CUDA 11.8 instead of 12.1? Only choose this option if your GPU is very old (Kepler or older).\nFor RTX and GTX series GPUs, say \"N\". If unsure, say \"N\".\n")
+            print("\nDo you want to use CUDA 11.8 instead of 12.1? Only choose this option if your GPU is very old (Kepler or older).\n\nFor RTX and GTX series GPUs, say \"N\".\nIf unsure, say \"N\".\n")
             use_cuda118 = input("Input (Y/N)> ").upper().strip('"\'').strip()
             while use_cuda118 not in 'YN':
                 print("Invalid choice. Please try again.")
@@ -295,6 +295,15 @@ def install_webui():
     update_requirements(initial_installation=True)
 
 
+def install_extensions_requirements():
+    print_big_message("Installing extensions requirements.\nSome of these may fail on Windows. Don\'t worry if you see error messages. They will not affect the main program.")
+    extensions = [foldername for foldername in os.listdir('extensions') if os.path.isfile(os.path.join('extensions', foldername, 'requirements.txt'))]
+    for i, extension in enumerate(extensions):
+        print(f"\n\n--- [{i+1}/{len(extensions)}]: {extension}\n\n")
+        extension_req_path = os.path.join("extensions", extension, "requirements.txt")
+        run_cmd(f"python -m pip install -r {extension_req_path} --upgrade", assert_success=False, environment=True)
+
+
 def update_requirements(initial_installation=False):
     # Create .git directory if missing
     if not os.path.exists(os.path.join(script_dir, ".git")):
@@ -317,24 +326,6 @@ def update_requirements(initial_installation=False):
             print_big_message(f"File '{file_name}' was updated during 'git pull'. Please run the script again.")
             exit(1)
 
-    # Extensions requirements are installed only during the initial install by default.
-    # That can be changed with the INSTALL_EXTENSIONS environment variable.
-    install = initial_installation
-    if "INSTALL_EXTENSIONS" in os.environ:
-        install = os.environ["INSTALL_EXTENSIONS"].lower() in ("yes", "y", "true", "1", "t", "on")
-
-    if install:
-        print_big_message("Installing extensions requirements.")
-        skip = ['superbooga', 'superboogav2', 'coqui_tts']  # Fail to install on Windows
-        extensions = [foldername for foldername in os.listdir('extensions') if os.path.isfile(os.path.join('extensions', foldername, 'requirements.txt'))]
-        extensions = [x for x in extensions if x not in skip]
-        for i, extension in enumerate(extensions):
-            print(f"\n\n--- [{i+1}/{len(extensions)}]: {extension}\n\n")
-            extension_req_path = os.path.join("extensions", extension, "requirements.txt")
-            run_cmd(f"python -m pip install -r {extension_req_path} --upgrade", assert_success=False, environment=True)
-    elif initial_installation:
-        print_big_message("Will not install extensions due to INSTALL_EXTENSIONS environment variable.")
-
     # Update PyTorch
     if not initial_installation:
         update_pytorch()
@@ -379,11 +370,6 @@ def update_requirements(initial_installation=False):
         run_cmd(f"python -m pip uninstall -y {package_name}", environment=True)
         print(f"Uninstalled {package_name}")
 
-    # Make sure that API requirements are installed (temporary)
-    extension_req_path = os.path.join("extensions", "openai", "requirements.txt")
-    if os.path.exists(extension_req_path):
-        run_cmd(f"python -m pip install -r {extension_req_path} --upgrade", environment=True)
-
     # Install/update the project requirements
     run_cmd("python -m pip install -r temp_requirements.txt --upgrade", assert_success=True, environment=True)
     os.remove('temp_requirements.txt')
@@ -409,18 +395,24 @@ if __name__ == "__main__":
 
     parser = argparse.ArgumentParser(add_help=False)
     parser.add_argument('--update', action='store_true', help='Update the web UI.')
+    parser.add_argument('--install-extensions', action='store_true', help='Install extensions requirements.')
     args, _ = parser.parse_known_args()
 
     if args.update:
         update_requirements()
+    elif args.install_extensions:
+        install_extensions_requirements()
+        update_requirements()
     else:
-        # If webui has already been installed, skip and run
         if not is_installed():
             install_webui()
             os.chdir(script_dir)
 
+            script_name = "extensions_reqs_windows.bat" if is_windows() else ("extensions_reqs_linux.sh" if is_linux() else "extensions_reqs_macos.sh")
+            print_big_message(f"The installation is finished.\n\nIf you wish to install or update extensions requirements, you can\nrun the following script at any time: {script_name}.")
+
         if os.environ.get("LAUNCH_AFTER_INSTALL", "").lower() in ("no", "n", "false", "0", "f", "off"):
-            print_big_message("Install finished successfully and will now exit due to LAUNCH_AFTER_INSTALL.")
+            print_big_message("Will now exit due to LAUNCH_AFTER_INSTALL.")
             sys.exit()
 
         # Check if a model has been downloaded yet
@@ -432,7 +424,7 @@ if __name__ == "__main__":
             model_dir = 'models'
 
         if len([item for item in glob.glob(f'{model_dir}/*') if not item.endswith(('.txt', '.yaml'))]) == 0:
-            print_big_message("WARNING: You haven't downloaded any model yet.\nOnce the web UI launches, head over to the \"Model\" tab and download one.")
+            print_big_message("You haven't downloaded any model yet.\nOnce the web UI launches, head over to the \"Model\" tab and download one.")
 
         # Workaround for llama-cpp-python loading paths in CUDA env vars even if they do not exist
         conda_path_bin = os.path.join(conda_env_path, "bin")
diff --git a/requirements.txt b/requirements.txt
index 4a608081..037fabf4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -23,6 +23,12 @@ transformers==4.38.*
 tqdm
 wandb
 
+# API
+SpeechRecognition==3.10.0
+flask_cloudflared==0.0.14
+sse-starlette==1.6.5
+tiktoken
+
 # bitsandbytes
 bitsandbytes==0.42.*; platform_system != "Windows"
 https://github.com/oobabooga/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.42.0-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements_amd.txt b/requirements_amd.txt
index dfb132de..2bece4fd 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -23,6 +23,12 @@ transformers==4.38.*
 tqdm
 wandb
 
+# API
+SpeechRecognition==3.10.0
+flask_cloudflared==0.0.14
+sse-starlette==1.6.5
+tiktoken
+
 # llama-cpp-python (CPU only, AVX2)
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index d04c8158..c83b4ece 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -23,6 +23,12 @@ transformers==4.38.*
 tqdm
 wandb
 
+# API
+SpeechRecognition==3.10.0
+flask_cloudflared==0.0.14
+sse-starlette==1.6.5
+tiktoken
+
 # llama-cpp-python (CPU only, no AVX2)
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt
index 1cdf8f52..c1513b13 100644
--- a/requirements_apple_intel.txt
+++ b/requirements_apple_intel.txt
@@ -23,6 +23,12 @@ transformers==4.38.*
 tqdm
 wandb
 
+# API
+SpeechRecognition==3.10.0
+flask_cloudflared==0.0.14
+sse-starlette==1.6.5
+tiktoken
+
 # Mac wheels
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.55-cp311-cp311-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.55-cp310-cp310-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt
index 94334156..4515f7b6 100644
--- a/requirements_apple_silicon.txt
+++ b/requirements_apple_silicon.txt
@@ -23,6 +23,12 @@ transformers==4.38.*
 tqdm
 wandb
 
+# API
+SpeechRecognition==3.10.0
+flask_cloudflared==0.0.14
+sse-starlette==1.6.5
+tiktoken
+
 # Mac wheels
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.55-cp311-cp311-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.55-cp310-cp310-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt
index 32a0ff3e..6bbbba51 100644
--- a/requirements_cpu_only.txt
+++ b/requirements_cpu_only.txt
@@ -23,6 +23,12 @@ transformers==4.38.*
 tqdm
 wandb
 
+# API
+SpeechRecognition==3.10.0
+flask_cloudflared==0.0.14
+sse-starlette==1.6.5
+tiktoken
+
 # llama-cpp-python (CPU only, AVX2)
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt
index fadc3e81..91a5db7a 100644
--- a/requirements_cpu_only_noavx2.txt
+++ b/requirements_cpu_only_noavx2.txt
@@ -23,6 +23,12 @@ transformers==4.38.*
 tqdm
 wandb
 
+# API
+SpeechRecognition==3.10.0
+flask_cloudflared==0.0.14
+sse-starlette==1.6.5
+tiktoken
+
 # llama-cpp-python (CPU only, no AVX2)
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index 9077bde3..6a840f50 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -23,6 +23,12 @@ transformers==4.38.*
 tqdm
 wandb
 
+# API
+SpeechRecognition==3.10.0
+flask_cloudflared==0.0.14
+sse-starlette==1.6.5
+tiktoken
+
 # bitsandbytes
 bitsandbytes==0.42.*; platform_system != "Windows"
 https://github.com/oobabooga/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.42.0-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt
index 4dc697a6..0319b6c3 100644
--- a/requirements_nowheels.txt
+++ b/requirements_nowheels.txt
@@ -22,3 +22,9 @@ tensorboard
 transformers==4.38.*
 tqdm
 wandb
+
+# API
+SpeechRecognition==3.10.0
+flask_cloudflared==0.0.14
+sse-starlette==1.6.5
+tiktoken
diff --git a/wsl.sh b/wsl.sh
index 153ae85d..c94f3e8c 100755
--- a/wsl.sh
+++ b/wsl.sh
@@ -108,5 +108,6 @@ fi
 # setup installer env   update env if called with 'wsl.sh update'
 case "$1" in
 ("update") python one_click.py --update;;
+("install-extensions") python one_click.py --install-extensions;;
 (*) python one_click.py $@;;
 esac

From dc2dd5b9d897beb8553a9ab874122ed5675247c0 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 4 Mar 2024 08:00:39 -0800
Subject: [PATCH 06/25] One-click installer: add an info message before git
 pull

---
 one_click.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/one_click.py b/one_click.py
index 106e86d9..8af9482d 100644
--- a/one_click.py
+++ b/one_click.py
@@ -316,6 +316,8 @@ def update_requirements(initial_installation=False):
         'one_click.py'
     ]
 
+    print_big_message("Updating the local copy of the repository with \"git pull\"")
+
     before_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check}
     run_cmd("git pull --autostash", assert_success=True, environment=True)
     after_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check}

From 74564fe8d0da13b2daba3de310c06e1888d0812d Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 4 Mar 2024 08:11:03 -0800
Subject: [PATCH 07/25] One-click installer: delete the Miniconda installer
 after completion

---
 start_linux.sh    | 3 +++
 start_macos.sh    | 3 +++
 start_windows.bat | 3 +++
 wsl.sh            | 3 +++
 4 files changed, 12 insertions(+)

diff --git a/start_linux.sh b/start_linux.sh
index 85638a5e..5620c831 100755
--- a/start_linux.sh
+++ b/start_linux.sh
@@ -39,6 +39,9 @@ if [ "$conda_exists" == "F" ]; then
     # test the conda binary
     echo "Miniconda version:"
     "$CONDA_ROOT_PREFIX/bin/conda" --version
+
+    # delete the Miniconda installer
+    rm "$INSTALL_DIR/miniconda_installer.sh"
 fi
 
 # create the installer env
diff --git a/start_macos.sh b/start_macos.sh
index b30bc4c6..6761f531 100755
--- a/start_macos.sh
+++ b/start_macos.sh
@@ -39,6 +39,9 @@ if [ "$conda_exists" == "F" ]; then
     # test the conda binary
     echo "Miniconda version:"
     "$CONDA_ROOT_PREFIX/bin/conda" --version
+
+    # delete the Miniconda installer
+    rm "$INSTALL_DIR/miniconda_installer.sh"
 fi
 
 # create the installer env
diff --git a/start_windows.bat b/start_windows.bat
index 7c437b60..f607e518 100755
--- a/start_windows.bat
+++ b/start_windows.bat
@@ -45,6 +45,9 @@ if "%conda_exists%" == "F" (
 	@rem test the conda binary
 	echo Miniconda version:
 	call "%CONDA_ROOT_PREFIX%\_conda.exe" --version || ( echo. && echo Miniconda not found. && goto end )
+
+    @rem delete the Miniconda installer
+    del "%INSTALL_DIR%\miniconda_installer.exe"
 )
 
 @rem create the installer env
diff --git a/wsl.sh b/wsl.sh
index c94f3e8c..0b744455 100755
--- a/wsl.sh
+++ b/wsl.sh
@@ -66,6 +66,9 @@ if [ "$conda_exists" == "F" ]; then
     # test the conda binary
     echo "Miniconda version:"
     "$CONDA_ROOT_PREFIX/bin/conda" --version
+
+    # delete the Miniconda installer
+    rm "$INSTALL_DIR/miniconda_installer.sh"
 fi
 
 # create the installer env

From 4bb79c57acf7094028c3eea195daf777fdc62162 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 4 Mar 2024 08:11:55 -0800
Subject: [PATCH 08/25] One-click installer: change an info message

---
 one_click.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/one_click.py b/one_click.py
index 8af9482d..d4d09c61 100644
--- a/one_click.py
+++ b/one_click.py
@@ -246,7 +246,7 @@ def install_webui():
         if "USE_CUDA118" in os.environ:
             use_cuda118 = "Y" if os.environ.get("USE_CUDA118", "").lower() in ("yes", "y", "true", "1", "t", "on") else "N"
         else:
-            print("\nDo you want to use CUDA 11.8 instead of 12.1? Only choose this option if your GPU is very old (Kepler or older).\n\nFor RTX and GTX series GPUs, say \"N\".\nIf unsure, say \"N\".\n")
+            print("\nDo you want to use CUDA 11.8 instead of 12.1? Only choose this option if your GPU is\nvery old (Kepler or older).\n\nFor RTX and GTX series GPUs, say \"N\".\nIf unsure, say \"N\".\n")
             use_cuda118 = input("Input (Y/N)> ").upper().strip('"\'').strip()
             while use_cuda118 not in 'YN':
                 print("Invalid choice. Please try again.")

From 6adf222599efc83a7257b4dd9d3b9d6b6d8fcc0e Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 4 Mar 2024 08:20:04 -0800
Subject: [PATCH 09/25] One-click installer: change an info message

---
 one_click.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/one_click.py b/one_click.py
index d4d09c61..67b01982 100644
--- a/one_click.py
+++ b/one_click.py
@@ -165,8 +165,7 @@ def print_big_message(message):
     lines = message.split('\n')
     print("\n\n*******************************************************************")
     for line in lines:
-        if line.strip() != '':
-            print("*", line)
+        print("*", line)
 
     print("*******************************************************************\n\n")
 

From 97dc3602fcdffdc5efc69e902ce008d2cc0cdef6 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 4 Mar 2024 15:52:24 -0300
Subject: [PATCH 10/25] Create an update wizard (#5623)

---
 README.md                                     |  4 +-
 extensions_reqs_linux.sh                      | 26 -------
 extensions_reqs_macos.sh                      | 26 -------
 extensions_reqs_windows.bat                   | 37 ----------
 extensions_reqs_wsl.sh                        | 11 ---
 one_click.py                                  | 72 +++++++++++++------
 update_macos.sh => update_wizard_linux.sh     |  2 +-
 update_linux.sh => update_wizard_macos.sh     |  2 +-
 ...te_windows.bat => update_wizard_windows.sh |  2 +-
 update_wsl.bat => update_wizard_wsl.sh        |  2 +-
 wsl.sh                                        |  3 +-
 11 files changed, 57 insertions(+), 130 deletions(-)
 delete mode 100755 extensions_reqs_linux.sh
 delete mode 100755 extensions_reqs_macos.sh
 delete mode 100755 extensions_reqs_windows.bat
 delete mode 100755 extensions_reqs_wsl.sh
 rename update_macos.sh => update_wizard_linux.sh (93%)
 rename update_linux.sh => update_wizard_macos.sh (93%)
 rename update_windows.bat => update_wizard_windows.sh (95%)
 rename update_wsl.bat => update_wizard_wsl.sh (93%)

diff --git a/README.md b/README.md
index 09216397..71a5c105 100644
--- a/README.md
+++ b/README.md
@@ -32,7 +32,7 @@ To restart the web UI in the future, just run the `start_` script again. This sc
 
 The script accepts command-line flags. Alternatively, you can edit the `CMD_FLAGS.txt` file with a text editor and add your flags there.
 
-To get updates in the future, run `update_linux.sh`, `update_windows.bat`, `update_macos.sh`, or `update_wsl.bat`.
+To get updates in the future, run `update_wizard_linux.sh`, `update_wizard_windows.bat`, `update_wizard_macos.sh`, or `update_wizard_wsl.bat`.
 
 <details>
 <summary>
@@ -45,7 +45,7 @@ The script uses Miniconda to set up a Conda environment in the `installer_files`
 
 If you ever need to install something manually in the `installer_files` environment, you can launch an interactive shell using the cmd script: `cmd_linux.sh`, `cmd_windows.bat`, `cmd_macos.sh`, or `cmd_wsl.bat`.
 
-* There is no need to run any of those scripts (`start_`, `update_`, or `cmd_`) as admin/root.
+* There is no need to run any of those scripts (`start_`, `update_wizard_`, or `cmd_`) as admin/root.
 * To install the requirements for extensions, you can use the `extensions_reqs` script for your OS. At the end, this script will install the main requirements for the project to make sure that they take precedence in case of version conflicts.
 * For additional instructions about AMD and WSL setup, consult [the documentation](https://github.com/oobabooga/text-generation-webui/wiki).
 * For automated installation, you can use the `GPU_CHOICE`, `USE_CUDA118`, and `LAUNCH_AFTER_INSTALL` environment variables. For instance: `GPU_CHOICE=A USE_CUDA118=FALSE LAUNCH_AFTER_INSTALL=FALSE ./start_linux.sh`.
diff --git a/extensions_reqs_linux.sh b/extensions_reqs_linux.sh
deleted file mode 100755
index ea0c3773..00000000
--- a/extensions_reqs_linux.sh
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/bin/bash
-
-cd "$(dirname "${BASH_SOURCE[0]}")"
-
-if [[ "$(pwd)" =~ " " ]]; then echo This script relies on Miniconda which can not be silently installed under a path with spaces. && exit; fi
-
-# deactivate existing conda envs as needed to avoid conflicts
-{ conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null
-
-# config
-CONDA_ROOT_PREFIX="$(pwd)/installer_files/conda"
-INSTALL_ENV_DIR="$(pwd)/installer_files/env"
-
-# environment isolation
-export PYTHONNOUSERSITE=1
-unset PYTHONPATH
-unset PYTHONHOME
-export CUDA_PATH="$INSTALL_ENV_DIR"
-export CUDA_HOME="$CUDA_PATH"
-
-# activate installer env
-source "$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh" # otherwise conda complains about 'shell not initialized' (needed when running in a script)
-conda activate "$INSTALL_ENV_DIR"
-
-# update installer env
-python one_click.py --install-extensions && echo -e "\nDone!"
diff --git a/extensions_reqs_macos.sh b/extensions_reqs_macos.sh
deleted file mode 100755
index ea0c3773..00000000
--- a/extensions_reqs_macos.sh
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/bin/bash
-
-cd "$(dirname "${BASH_SOURCE[0]}")"
-
-if [[ "$(pwd)" =~ " " ]]; then echo This script relies on Miniconda which can not be silently installed under a path with spaces. && exit; fi
-
-# deactivate existing conda envs as needed to avoid conflicts
-{ conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null
-
-# config
-CONDA_ROOT_PREFIX="$(pwd)/installer_files/conda"
-INSTALL_ENV_DIR="$(pwd)/installer_files/env"
-
-# environment isolation
-export PYTHONNOUSERSITE=1
-unset PYTHONPATH
-unset PYTHONHOME
-export CUDA_PATH="$INSTALL_ENV_DIR"
-export CUDA_HOME="$CUDA_PATH"
-
-# activate installer env
-source "$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh" # otherwise conda complains about 'shell not initialized' (needed when running in a script)
-conda activate "$INSTALL_ENV_DIR"
-
-# update installer env
-python one_click.py --install-extensions && echo -e "\nDone!"
diff --git a/extensions_reqs_windows.bat b/extensions_reqs_windows.bat
deleted file mode 100755
index a3fb1f65..00000000
--- a/extensions_reqs_windows.bat
+++ /dev/null
@@ -1,37 +0,0 @@
-@echo off
-
-cd /D "%~dp0"
-
-set PATH=%PATH%;%SystemRoot%\system32
-
-echo "%CD%"| findstr /C:" " >nul && echo This script relies on Miniconda which can not be silently installed under a path with spaces. && goto end
-
-@rem fix failed install when installing to a separate drive
-set TMP=%cd%\installer_files
-set TEMP=%cd%\installer_files
-
-@rem deactivate existing conda envs as needed to avoid conflicts
-(call conda deactivate && call conda deactivate && call conda deactivate) 2>nul
-
-@rem config
-set CONDA_ROOT_PREFIX=%cd%\installer_files\conda
-set INSTALL_ENV_DIR=%cd%\installer_files\env
-
-@rem environment isolation
-set PYTHONNOUSERSITE=1
-set PYTHONPATH=
-set PYTHONHOME=
-set "CUDA_PATH=%INSTALL_ENV_DIR%"
-set "CUDA_HOME=%CUDA_PATH%"
-
-@rem activate installer env
-call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%" || ( echo. && echo Miniconda hook not found. && goto end )
-
-@rem update installer env
-call python one_click.py --install-extensions && (
-    echo.
-    echo Done!
-)
-
-:end
-pause
diff --git a/extensions_reqs_wsl.sh b/extensions_reqs_wsl.sh
deleted file mode 100755
index 450c8abb..00000000
--- a/extensions_reqs_wsl.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-@echo off
-
-cd /D "%~dp0"
-
-set PATH=%PATH%;%SystemRoot%\system32
-
-@rem sed -i 's/\x0D$//' ./wsl.sh converts newlines to unix format in the wsl script   calling wsl.sh with 'update' will run updater
-call wsl -e bash -lic "sed -i 's/\x0D$//' ./wsl.sh; source ./wsl.sh install-extensions"
-
-:end
-pause
diff --git a/one_click.py b/one_click.py
index 67b01982..58f2296a 100644
--- a/one_click.py
+++ b/one_click.py
@@ -32,7 +32,7 @@ if os.path.exists(cmd_flags_path):
 else:
     CMD_FLAGS = ''
 
-flags = f"{' '.join([flag for flag in sys.argv[1:] if flag != '--update'])} {CMD_FLAGS}"
+flags = f"{' '.join([flag for flag in sys.argv[1:] if flag != '--update-wizard'])} {CMD_FLAGS}"
 
 
 def signal_handler(sig, frame):
@@ -200,6 +200,24 @@ def run_cmd(cmd, assert_success=False, environment=False, capture_output=False,
     return result
 
 
+def get_user_choice(question, options_dict):
+    print()
+    print(question)
+    print()
+
+    for key, value in options_dict.items():
+        print(f"{key}) {value}")
+
+    print()
+
+    choice = input("Input> ").upper()
+    while choice not in options_dict.keys():
+        print("Invalid choice. Please try again.")
+        choice = input("Input> ").upper()
+
+    return choice
+
+
 def install_webui():
 
     # Ask the user for the GPU vendor
@@ -207,20 +225,16 @@ def install_webui():
         choice = os.environ["GPU_CHOICE"].upper()
         print_big_message(f"Selected GPU choice \"{choice}\" based on the GPU_CHOICE environment variable.")
     else:
-        print()
-        print("What is your GPU?")
-        print()
-        print("A) NVIDIA")
-        print("B) AMD (Linux/MacOS only. Requires ROCm SDK 5.6 on Linux)")
-        print("C) Apple M Series")
-        print("D) Intel Arc (IPEX)")
-        print("N) None (I want to run models in CPU mode)")
-        print()
-
-        choice = input("Input> ").upper()
-        while choice not in 'ABCDN':
-            print("Invalid choice. Please try again.")
-            choice = input("Input> ").upper()
+        choice = get_user_choice(
+            "What is your GPU?",
+            {
+                'A': 'NVIDIA',
+                'B': 'AMD (Linux/MacOS only. Requires ROCm SDK 5.6 on Linux)',
+                'C': 'Apple M Series',
+                'D': 'Intel Arc (IPEX)',
+                'N': 'None (I want to run models in CPU mode)'
+            },
+        )
 
     gpu_choice_to_name = {
         "A": "NVIDIA",
@@ -395,15 +409,29 @@ if __name__ == "__main__":
     check_env()
 
     parser = argparse.ArgumentParser(add_help=False)
-    parser.add_argument('--update', action='store_true', help='Update the web UI.')
-    parser.add_argument('--install-extensions', action='store_true', help='Install extensions requirements.')
+    parser.add_argument('--update-wizard', action='store_true', help='Launch a menu with update options.')
     args, _ = parser.parse_known_args()
 
-    if args.update:
-        update_requirements()
-    elif args.install_extensions:
-        install_extensions_requirements()
-        update_requirements()
+    if args.update_wizard:
+        choice = get_user_choice(
+            "What would you like to do?",
+            {
+                'A': 'Update the web UI',
+                'B': 'Install/update extensions requirements',
+                'C': 'Revert local changes to repository files with \"git reset --hard\"',
+                'N': 'Nothing (exit).'
+            },
+        )
+
+        if choice == 'A':
+            update_requirements()
+        elif choice == 'B':
+            install_extensions_requirements()
+            update_requirements()
+        elif choice == 'C':
+            run_cmd("git reset --hard", assert_success=True, environment=True)
+        elif choice == 'N':
+            sys.exit()
     else:
         if not is_installed():
             install_webui()
diff --git a/update_macos.sh b/update_wizard_linux.sh
similarity index 93%
rename from update_macos.sh
rename to update_wizard_linux.sh
index 371db554..c5add61e 100755
--- a/update_macos.sh
+++ b/update_wizard_linux.sh
@@ -23,4 +23,4 @@ source "$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh" # otherwise conda complains a
 conda activate "$INSTALL_ENV_DIR"
 
 # update installer env
-python one_click.py --update && echo -e "\nDone!"
+python one_click.py --update-wizard && echo -e "\nDone!"
diff --git a/update_linux.sh b/update_wizard_macos.sh
similarity index 93%
rename from update_linux.sh
rename to update_wizard_macos.sh
index 371db554..c5add61e 100755
--- a/update_linux.sh
+++ b/update_wizard_macos.sh
@@ -23,4 +23,4 @@ source "$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh" # otherwise conda complains a
 conda activate "$INSTALL_ENV_DIR"
 
 # update installer env
-python one_click.py --update && echo -e "\nDone!"
+python one_click.py --update-wizard && echo -e "\nDone!"
diff --git a/update_windows.bat b/update_wizard_windows.sh
similarity index 95%
rename from update_windows.bat
rename to update_wizard_windows.sh
index 0d8f8152..2b23f322 100755
--- a/update_windows.bat
+++ b/update_wizard_windows.sh
@@ -28,7 +28,7 @@ set "CUDA_HOME=%CUDA_PATH%"
 call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%" || ( echo. && echo Miniconda hook not found. && goto end )
 
 @rem update installer env
-call python one_click.py --update && (
+call python one_click.py --update-wizard && (
     echo.
     echo Done!
 )
diff --git a/update_wsl.bat b/update_wizard_wsl.sh
similarity index 93%
rename from update_wsl.bat
rename to update_wizard_wsl.sh
index 36d019a8..35f0a349 100755
--- a/update_wsl.bat
+++ b/update_wizard_wsl.sh
@@ -5,7 +5,7 @@ cd /D "%~dp0"
 set PATH=%PATH%;%SystemRoot%\system32
 
 @rem sed -i 's/\x0D$//' ./wsl.sh converts newlines to unix format in the wsl script   calling wsl.sh with 'update' will run updater
-call wsl -e bash -lic "sed -i 's/\x0D$//' ./wsl.sh; source ./wsl.sh update"
+call wsl -e bash -lic "sed -i 's/\x0D$//' ./wsl.sh; source ./wsl.sh update-wizard"
 
 :end
 pause
diff --git a/wsl.sh b/wsl.sh
index 0b744455..7b17132f 100755
--- a/wsl.sh
+++ b/wsl.sh
@@ -110,7 +110,6 @@ fi
 
 # setup installer env   update env if called with 'wsl.sh update'
 case "$1" in
-("update") python one_click.py --update;;
-("install-extensions") python one_click.py --install-extensions;;
+("update-wizard") python one_click.py --update-wizard;;
 (*) python one_click.py $@;;
 esac

From 90ab0228567e98c8abebd0464ce3597471fe342b Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 4 Mar 2024 10:54:16 -0800
Subject: [PATCH 11/25] Minor message change

---
 one_click.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/one_click.py b/one_click.py
index 58f2296a..997af5ba 100644
--- a/one_click.py
+++ b/one_click.py
@@ -437,7 +437,7 @@ if __name__ == "__main__":
             install_webui()
             os.chdir(script_dir)
 
-            script_name = "extensions_reqs_windows.bat" if is_windows() else ("extensions_reqs_linux.sh" if is_linux() else "extensions_reqs_macos.sh")
+            script_name = "update_wizard_windows.bat" if is_windows() else ("update_wizard_linux.sh" if is_linux() else "update_wizard_macos.sh")
             print_big_message(f"The installation is finished.\n\nIf you wish to install or update extensions requirements, you can\nrun the following script at any time: {script_name}.")
 
         if os.environ.get("LAUNCH_AFTER_INSTALL", "").lower() in ("no", "n", "false", "0", "f", "off"):

From fbe83854ca02058daa1eb476526907aa3fb7da75 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 4 Mar 2024 11:10:37 -0800
Subject: [PATCH 12/25] Minor message change

---
 one_click.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/one_click.py b/one_click.py
index 997af5ba..d68ebe46 100644
--- a/one_click.py
+++ b/one_click.py
@@ -419,7 +419,7 @@ if __name__ == "__main__":
                 'A': 'Update the web UI',
                 'B': 'Install/update extensions requirements',
                 'C': 'Revert local changes to repository files with \"git reset --hard\"',
-                'N': 'Nothing (exit).'
+                'N': 'Nothing (exit)'
             },
         )
 

From 2d7466073348102ef22624ba247adff1ef07340a Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 4 Mar 2024 12:35:41 -0800
Subject: [PATCH 13/25] Don't git pull on "Install/update extensions
 requirements"

---
 one_click.py | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/one_click.py b/one_click.py
index d68ebe46..65b7acbd 100644
--- a/one_click.py
+++ b/one_click.py
@@ -317,29 +317,30 @@ def install_extensions_requirements():
         run_cmd(f"python -m pip install -r {extension_req_path} --upgrade", assert_success=False, environment=True)
 
 
-def update_requirements(initial_installation=False):
+def update_requirements(initial_installation=False, pull=True):
     # Create .git directory if missing
     if not os.path.exists(os.path.join(script_dir, ".git")):
         git_creation_cmd = 'git init -b main && git remote add origin https://github.com/oobabooga/text-generation-webui && git fetch && git symbolic-ref refs/remotes/origin/HEAD refs/remotes/origin/main && git reset --hard origin/main && git branch --set-upstream-to=origin/main'
         run_cmd(git_creation_cmd, environment=True, assert_success=True)
 
-    files_to_check = [
-        'start_linux.sh', 'start_macos.sh', 'start_windows.bat', 'start_wsl.bat',
-        'update_linux.sh', 'update_macos.sh', 'update_windows.bat', 'update_wsl.bat',
-        'one_click.py'
-    ]
+    if pull:
+        print_big_message("Updating the local copy of the repository with \"git pull\"")
 
-    print_big_message("Updating the local copy of the repository with \"git pull\"")
+        files_to_check = [
+            'start_linux.sh', 'start_macos.sh', 'start_windows.bat', 'start_wsl.bat',
+            'update_linux.sh', 'update_macos.sh', 'update_windows.bat', 'update_wsl.bat',
+            'one_click.py'
+        ]
 
-    before_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check}
-    run_cmd("git pull --autostash", assert_success=True, environment=True)
-    after_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check}
+        before_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check}
+        run_cmd("git pull --autostash", assert_success=True, environment=True)
+        after_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check}
 
-    # Check for differences in installation file hashes
-    for file_name in files_to_check:
-        if before_pull_hashes[file_name] != after_pull_hashes[file_name]:
-            print_big_message(f"File '{file_name}' was updated during 'git pull'. Please run the script again.")
-            exit(1)
+        # Check for differences in installation file hashes
+        for file_name in files_to_check:
+            if before_pull_hashes[file_name] != after_pull_hashes[file_name]:
+                print_big_message(f"File '{file_name}' was updated during 'git pull'. Please run the script again.")
+                exit(1)
 
     # Update PyTorch
     if not initial_installation:
@@ -427,7 +428,7 @@ if __name__ == "__main__":
             update_requirements()
         elif choice == 'B':
             install_extensions_requirements()
-            update_requirements()
+            update_requirements(pull=False)
         elif choice == 'C':
             run_cmd("git reset --hard", assert_success=True, environment=True)
         elif choice == 'N':

From f697cb4609b4950e9d5c994fec82458bbfe94f65 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 4 Mar 2024 19:26:24 -0800
Subject: [PATCH 14/25] Move update_wizard_windows.sh to
 update_wizard_windows.bat (oops)

---
 modules/extensions.py                                 | 2 +-
 one_click.py                                          | 5 +----
 update_wizard_windows.sh => update_wizard_windows.bat | 0
 3 files changed, 2 insertions(+), 5 deletions(-)
 rename update_wizard_windows.sh => update_wizard_windows.bat (100%)

diff --git a/modules/extensions.py b/modules/extensions.py
index 69a977f2..6729b996 100644
--- a/modules/extensions.py
+++ b/modules/extensions.py
@@ -36,7 +36,7 @@ def load_extensions():
                 try:
                     extension = importlib.import_module(f"extensions.{name}.script")
                 except ModuleNotFoundError:
-                    logger.error(f"Could not import the requirements for '{name}'. Make sure to install the requirements for the extension.\n\nLinux / Mac:\n\npip install -r extensions/{name}/requirements.txt --upgrade\n\nWindows:\n\npip install -r extensions\\{name}\\requirements.txt --upgrade\n\nIf you used the one-click installer, paste the command above in the terminal window opened after launching the cmd script for your OS.")
+                    logger.error(f"Could not import the requirements for '{name}'. Make sure to install the requirements for the extension.\n\n* To install requirements for all available extensions, launch the\n  update_wizard script for your OS and choose the B option.\n\n* To install the requirements for this extension alone, launch the\n  cmd script for your OS and paste the following command in the\n  terminal window that appears:\n\nLinux / Mac:\n\npip install -r extensions/{name}/requirements.txt --upgrade\n\nWindows:\n\npip install -r extensions\\{name}\\requirements.txt --upgrade\n")
                     raise
 
                 # Only run setup() and apply settings from settings.yaml once
diff --git a/one_click.py b/one_click.py
index 65b7acbd..65755a4f 100644
--- a/one_click.py
+++ b/one_click.py
@@ -259,7 +259,7 @@ def install_webui():
         if "USE_CUDA118" in os.environ:
             use_cuda118 = "Y" if os.environ.get("USE_CUDA118", "").lower() in ("yes", "y", "true", "1", "t", "on") else "N"
         else:
-            print("\nDo you want to use CUDA 11.8 instead of 12.1? Only choose this option if your GPU is\nvery old (Kepler or older).\n\nFor RTX and GTX series GPUs, say \"N\".\nIf unsure, say \"N\".\n")
+            print("\nDo you want to use CUDA 11.8 instead of 12.1?\nOnly choose this option if your GPU is very old (Kepler or older).\n\nFor RTX and GTX series GPUs, say \"N\".\nIf unsure, say \"N\".\n")
             use_cuda118 = input("Input (Y/N)> ").upper().strip('"\'').strip()
             while use_cuda118 not in 'YN':
                 print("Invalid choice. Please try again.")
@@ -438,9 +438,6 @@ if __name__ == "__main__":
             install_webui()
             os.chdir(script_dir)
 
-            script_name = "update_wizard_windows.bat" if is_windows() else ("update_wizard_linux.sh" if is_linux() else "update_wizard_macos.sh")
-            print_big_message(f"The installation is finished.\n\nIf you wish to install or update extensions requirements, you can\nrun the following script at any time: {script_name}.")
-
         if os.environ.get("LAUNCH_AFTER_INSTALL", "").lower() in ("no", "n", "false", "0", "f", "off"):
             print_big_message("Will now exit due to LAUNCH_AFTER_INSTALL.")
             sys.exit()
diff --git a/update_wizard_windows.sh b/update_wizard_windows.bat
similarity index 100%
rename from update_wizard_windows.sh
rename to update_wizard_windows.bat

From 907bda0d56b58c6f1d8d070af5ad30c8f4215fbc Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 4 Mar 2024 19:57:49 -0800
Subject: [PATCH 15/25] Move update_wizard_wsl.sh to update_wizard_wsl.bat

---
 update_wizard_wsl.sh => update_wizard_wsl.bat | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename update_wizard_wsl.sh => update_wizard_wsl.bat (100%)

diff --git a/update_wizard_wsl.sh b/update_wizard_wsl.bat
similarity index 100%
rename from update_wizard_wsl.sh
rename to update_wizard_wsl.bat

From 3cfcab63a51910eadb5cd1e7b005db52496749e1 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 4 Mar 2024 20:37:44 -0800
Subject: [PATCH 16/25] Update an installation message

---
 one_click.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/one_click.py b/one_click.py
index 65755a4f..e2f8c485 100644
--- a/one_click.py
+++ b/one_click.py
@@ -309,7 +309,7 @@ def install_webui():
 
 
 def install_extensions_requirements():
-    print_big_message("Installing extensions requirements.\nSome of these may fail on Windows. Don\'t worry if you see error messages. They will not affect the main program.")
+    print_big_message("Installing extensions requirements.\nSome of these may fail on Windows.\nDon\'t worry if you see error messages, as they will not affect the main program.")
     extensions = [foldername for foldername in os.listdir('extensions') if os.path.isfile(os.path.join('extensions', foldername, 'requirements.txt'))]
     for i, extension in enumerate(extensions):
         print(f"\n\n--- [{i+1}/{len(extensions)}]: {extension}\n\n")

From 164ff2440d8a4b05db6671f00df22b83bb49436f Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 4 Mar 2024 20:58:21 -0800
Subject: [PATCH 17/25] Use the correct PyTorch in the Colab notebook

---
 Colab-TextGen-GPU.ipynb | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Colab-TextGen-GPU.ipynb b/Colab-TextGen-GPU.ipynb
index 53f60986..82e6c18e 100644
--- a/Colab-TextGen-GPU.ipynb
+++ b/Colab-TextGen-GPU.ipynb
@@ -66,6 +66,11 @@
         "  print(f\"TORCH: {torver}\")\n",
         "  is_cuda118 = '+cu118' in torver  # 2.1.0+cu118\n",
         "\n",
+        "  if is_cuda118:\n",
+        "    !python -m pip install --upgrade torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cu118\n",
+        "  else:\n",
+        "    !python -m pip install --upgrade torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cu121\n",
+        "\n",
         "  textgen_requirements = open('requirements.txt').read().splitlines()\n",
         "  if is_cuda118:\n",
         "      textgen_requirements = [req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') for req in textgen_requirements]\n",

From 63a1d4afc8404cd2fa181797d7b461db6eb0769e Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 5 Mar 2024 07:32:28 -0300
Subject: [PATCH 18/25] Bump gradio to 4.19 (#5522)

---
 css/main.css                     |  5 +++++
 extensions/gallery/script.py     |  2 +-
 extensions/whisper_stt/script.py |  2 +-
 js/switch_tabs.js                |  8 ++++----
 modules/block_requests.py        |  3 ++-
 modules/gradio_hijack.py         |  9 +++++++++
 modules/shared.py                |  1 -
 modules/text_generation.py       | 14 +++-----------
 modules/ui.py                    |  1 -
 modules/ui_chat.py               | 32 ++++++++++++++++----------------
 modules/ui_default.py            |  6 +++---
 modules/ui_model_menu.py         |  2 +-
 modules/ui_notebook.py           |  6 +++---
 modules/ui_parameters.py         |  1 -
 modules/ui_session.py            |  4 ++--
 requirements.txt                 |  2 +-
 requirements_amd.txt             |  2 +-
 requirements_amd_noavx2.txt      |  2 +-
 requirements_apple_intel.txt     |  2 +-
 requirements_apple_silicon.txt   |  2 +-
 requirements_cpu_only.txt        |  2 +-
 requirements_cpu_only_noavx2.txt |  2 +-
 requirements_noavx2.txt          |  2 +-
 requirements_nowheels.txt        |  2 +-
 server.py                        | 15 ++++++++-------
 settings-template.yaml           |  1 -
 26 files changed, 67 insertions(+), 63 deletions(-)
 create mode 100644 modules/gradio_hijack.py

diff --git a/css/main.css b/css/main.css
index b41985d8..9681a5e3 100644
--- a/css/main.css
+++ b/css/main.css
@@ -89,6 +89,11 @@ div.svelte-15lo0d8 > *, div.svelte-15lo0d8 > .form > * {
     flex-wrap: nowrap;
 }
 
+gradio-app > :first-child {
+    padding-left: var(--size-4) !important;
+    padding-right: var(--size-4) !important;
+}
+
 .header_bar {
     background-color: #f7f7f7;
     box-shadow: 0 2px 3px rgba(22 22 22 / 35%);
diff --git a/extensions/gallery/script.py b/extensions/gallery/script.py
index 1cb7f27f..1bb8068a 100644
--- a/extensions/gallery/script.py
+++ b/extensions/gallery/script.py
@@ -119,7 +119,7 @@ def ui():
             samples_per_page=settings["gallery-items_per_page"]
         )
 
-    filter_box.change(lambda: None, None, None, _js=f'() => {{{custom_js()}; gotoFirstPage()}}').success(
+    filter_box.change(lambda: None, None, None, js=f'() => {{{custom_js()}; gotoFirstPage()}}').success(
         filter_cards, filter_box, gallery).then(
         lambda x: gr.update(elem_classes='highlighted-border' if x != '' else ''), filter_box, filter_box, show_progress=False)
 
diff --git a/extensions/whisper_stt/script.py b/extensions/whisper_stt/script.py
index cdc55687..efa58ce9 100644
--- a/extensions/whisper_stt/script.py
+++ b/extensions/whisper_stt/script.py
@@ -64,7 +64,7 @@ def ui():
 
     audio.change(
         auto_transcribe, [audio, auto_submit, whipser_model, whipser_language], [shared.gradio['textbox'], audio]).then(
-        None, auto_submit, None, _js="(check) => {if (check) { document.getElementById('Generate').click() }}")
+        None, auto_submit, None, js="(check) => {if (check) { document.getElementById('Generate').click() }}")
 
     whipser_model.change(lambda x: params.update({"whipser_model": x}), whipser_model, None)
     whipser_language.change(lambda x: params.update({"whipser_language": x}), whipser_language, None)
diff --git a/js/switch_tabs.js b/js/switch_tabs.js
index 75d56367..0564f891 100644
--- a/js/switch_tabs.js
+++ b/js/switch_tabs.js
@@ -32,27 +32,27 @@ function switch_to_chat() {
 }
 
 function switch_to_default() {
-  let default_tab_button = main_parent.childNodes[0].childNodes[4];
+  let default_tab_button = main_parent.childNodes[0].childNodes[5];
   default_tab_button.click();
   scrollToTop();
 }
 
 function switch_to_notebook() {
-  let notebook_tab_button = main_parent.childNodes[0].childNodes[7];
+  let notebook_tab_button = main_parent.childNodes[0].childNodes[9];
   notebook_tab_button.click();
   findButtonsByText("Raw")[1].click();
   scrollToTop();
 }
 
 function switch_to_generation_parameters() {
-  let parameters_tab_button = main_parent.childNodes[0].childNodes[10];
+  let parameters_tab_button = main_parent.childNodes[0].childNodes[13];
   parameters_tab_button.click();
   findButtonsByText("Generation")[0].click();
   scrollToTop();
 }
 
 function switch_to_character() {
-  let parameters_tab_button = main_parent.childNodes[0].childNodes[10];
+  let parameters_tab_button = main_parent.childNodes[0].childNodes[13];
   parameters_tab_button.click();
   findButtonsByText("Character")[0].click();
   scrollToTop();
diff --git a/modules/block_requests.py b/modules/block_requests.py
index fbc45de4..ac6c6800 100644
--- a/modules/block_requests.py
+++ b/modules/block_requests.py
@@ -43,8 +43,9 @@ def my_open(*args, **kwargs):
         with original_open(*args, **kwargs) as f:
             file_contents = f.read()
 
-        file_contents = file_contents.replace(b'\t\t<script\n\t\t\tsrc="https://cdnjs.cloudflare.com/ajax/libs/iframe-resizer/4.3.7/iframeResizer.contentWindow.min.js"\n\t\t\tasync\n\t\t></script>', b'')
+        file_contents = file_contents.replace(b'\t\t<script\n\t\t\tsrc="https://cdnjs.cloudflare.com/ajax/libs/iframe-resizer/4.3.9/iframeResizer.contentWindow.min.js"\n\t\t\tasync\n\t\t></script>', b'')
         file_contents = file_contents.replace(b'cdnjs.cloudflare.com', b'127.0.0.1')
+
         return io.BytesIO(file_contents)
     else:
         return original_open(*args, **kwargs)
diff --git a/modules/gradio_hijack.py b/modules/gradio_hijack.py
new file mode 100644
index 00000000..026f3d6c
--- /dev/null
+++ b/modules/gradio_hijack.py
@@ -0,0 +1,9 @@
+import gradio as gr
+
+
+def Box(*args, **kwargs):
+    return gr.Blocks(*args, **kwargs)
+
+
+if not hasattr(gr, 'Box'):
+    gr.Box = Box
diff --git a/modules/shared.py b/modules/shared.py
index 7bef04bf..6dfc140c 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -46,7 +46,6 @@ settings = {
     'truncation_length_min': 0,
     'truncation_length_max': 200000,
     'max_tokens_second': 0,
-    'max_updates_second': 0,
     'prompt_lookup_num_tokens': 0,
     'custom_stopping_strings': '',
     'custom_token_bans': '',
diff --git a/modules/text_generation.py b/modules/text_generation.py
index 227d1822..60ba51fb 100644
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@@ -80,19 +80,16 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap
         state = copy.deepcopy(state)
         state['stream'] = True
 
-    min_update_interval = 0
-    if state.get('max_updates_second', 0) > 0:
-        min_update_interval = 1 / state['max_updates_second']
-
     # Generate
     for reply in generate_func(question, original_question, seed, state, stopping_strings, is_chat=is_chat):
         reply, stop_found = apply_stopping_strings(reply, all_stop_strings)
         if escape_html:
             reply = html.escape(reply)
+
         if is_stream:
             cur_time = time.time()
 
-            # Maximum number of tokens/second
+            # Limit number of tokens/second to make text readable in real time
             if state['max_tokens_second'] > 0:
                 diff = 1 / state['max_tokens_second'] - (cur_time - last_update)
                 if diff > 0:
@@ -100,13 +97,8 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap
 
                 last_update = time.time()
                 yield reply
-
-            # Limit updates to avoid lag in the Gradio UI
-            # API updates are not limited
             else:
-                if cur_time - last_update > min_update_interval:
-                    last_update = cur_time
-                    yield reply
+                yield reply
 
         if stop_found or (state['max_tokens_second'] > 0 and shared.stop_everything):
             break
diff --git a/modules/ui.py b/modules/ui.py
index 6249bb48..67613c53 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -113,7 +113,6 @@ def list_interface_input_elements():
         'max_new_tokens',
         'auto_max_new_tokens',
         'max_tokens_second',
-        'max_updates_second',
         'prompt_lookup_num_tokens',
         'seed',
         'temperature',
diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index ad4a4f0f..0990d233 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -175,7 +175,7 @@ def create_event_handlers():
         chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then(
-        lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}')
+        lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
 
     shared.gradio['textbox'].submit(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
@@ -183,28 +183,28 @@ def create_event_handlers():
         chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then(
-        lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}')
+        lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
 
     shared.gradio['Regenerate'].click(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         partial(chat.generate_chat_reply_wrapper, regenerate=True), gradio(inputs), gradio('display', 'history'), show_progress=False).then(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then(
-        lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}')
+        lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
 
     shared.gradio['Continue'].click(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         partial(chat.generate_chat_reply_wrapper, _continue=True), gradio(inputs), gradio('display', 'history'), show_progress=False).then(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then(
-        lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}')
+        lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
 
     shared.gradio['Impersonate'].click(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         lambda x: x, gradio('textbox'), gradio('Chat input'), show_progress=False).then(
         chat.impersonate_wrapper, gradio(inputs), gradio('textbox', 'display'), show_progress=False).then(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
-        lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}')
+        lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
 
     shared.gradio['Replace last reply'].click(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
@@ -282,7 +282,7 @@ def create_event_handlers():
         chat.redraw_html, gradio(reload_arr), gradio('display')).then(
         lambda x: gr.update(choices=(histories := chat.find_all_histories(x)), value=histories[0]), gradio('interface_state'), gradio('unique_id')).then(
         chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then(
-        lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_chat()}}')
+        lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_chat()}}')
 
     shared.gradio['character_menu'].change(
         chat.load_character, gradio('character_menu', 'name1', 'name2'), gradio('name1', 'name2', 'character_picture', 'greeting', 'context')).success(
@@ -290,7 +290,7 @@ def create_event_handlers():
         chat.load_latest_history, gradio('interface_state'), gradio('history')).then(
         chat.redraw_html, gradio(reload_arr), gradio('display')).then(
         lambda x: gr.update(choices=(histories := chat.find_all_histories(x)), value=histories[0]), gradio('interface_state'), gradio('unique_id')).then(
-        lambda: None, None, None, _js=f'() => {{{ui.update_big_picture_js}; updateBigPicture()}}')
+        lambda: None, None, None, js=f'() => {{{ui.update_big_picture_js}; updateBigPicture()}}')
 
     shared.gradio['mode'].change(
         lambda x: gr.update(visible=x != 'instruct'), gradio('mode'), gradio('chat_style'), show_progress=False).then(
@@ -326,15 +326,15 @@ def create_event_handlers():
 
     shared.gradio['save_chat_history'].click(
         lambda x: json.dumps(x, indent=4), gradio('history'), gradio('temporary_text')).then(
-        None, gradio('temporary_text', 'character_menu', 'mode'), None, _js=f'(hist, char, mode) => {{{ui.save_files_js}; saveHistory(hist, char, mode)}}')
+        None, gradio('temporary_text', 'character_menu', 'mode'), None, js=f'(hist, char, mode) => {{{ui.save_files_js}; saveHistory(hist, char, mode)}}')
 
     shared.gradio['Submit character'].click(
         chat.upload_character, gradio('upload_json', 'upload_img_bot'), gradio('character_menu')).then(
-        lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}')
+        lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}')
 
     shared.gradio['Submit tavern character'].click(
         chat.upload_tavern_character, gradio('upload_img_tavern', 'tavern_json'), gradio('character_menu')).then(
-        lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}')
+        lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}')
 
     shared.gradio['upload_json'].upload(lambda: gr.update(interactive=True), None, gradio('Submit character'))
     shared.gradio['upload_json'].clear(lambda: gr.update(interactive=False), None, gradio('Submit character'))
@@ -348,28 +348,28 @@ def create_event_handlers():
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         lambda x: x.update({'mode': 'instruct', 'history': {'internal': [], 'visible': []}}), gradio('interface_state'), None).then(
         partial(chat.generate_chat_prompt, 'Input'), gradio('interface_state'), gradio('textbox-default')).then(
-        lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_default()}}')
+        lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_default()}}')
 
     shared.gradio['send_instruction_to_notebook'].click(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         lambda x: x.update({'mode': 'instruct', 'history': {'internal': [], 'visible': []}}), gradio('interface_state'), None).then(
         partial(chat.generate_chat_prompt, 'Input'), gradio('interface_state'), gradio('textbox-notebook')).then(
-        lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}')
+        lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}')
 
     shared.gradio['send_instruction_to_negative_prompt'].click(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         lambda x: x.update({'mode': 'instruct', 'history': {'internal': [], 'visible': []}}), gradio('interface_state'), None).then(
         partial(chat.generate_chat_prompt, 'Input'), gradio('interface_state'), gradio('negative_prompt')).then(
-        lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_generation_parameters()}}')
+        lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_generation_parameters()}}')
 
     shared.gradio['send-chat-to-default'].click(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         partial(chat.generate_chat_prompt, '', _continue=True), gradio('interface_state'), gradio('textbox-default')).then(
-        lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_default()}}')
+        lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_default()}}')
 
     shared.gradio['send-chat-to-notebook'].click(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         partial(chat.generate_chat_prompt, '', _continue=True), gradio('interface_state'), gradio('textbox-notebook')).then(
-        lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}')
+        lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}')
 
-    shared.gradio['show_controls'].change(None, gradio('show_controls'), None, _js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}')
+    shared.gradio['show_controls'].change(None, gradio('show_controls'), None, js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}')
diff --git a/modules/ui_default.py b/modules/ui_default.py
index 7db6f0d9..1f962551 100644
--- a/modules/ui_default.py
+++ b/modules/ui_default.py
@@ -67,21 +67,21 @@ def create_event_handlers():
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
-        lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}')
+        lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
 
     shared.gradio['textbox-default'].submit(
         lambda x: x, gradio('textbox-default'), gradio('last_input-default')).then(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
-        lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}')
+        lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
 
     shared.gradio['markdown_render-default'].click(lambda x: x, gradio('output_textbox'), gradio('markdown-default'), queue=False)
     shared.gradio['Continue-default'].click(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         generate_reply_wrapper, [shared.gradio['output_textbox']] + gradio(inputs)[1:], gradio(outputs), show_progress=False).then(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
-        lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}')
+        lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
 
     shared.gradio['Stop-default'].click(stop_everything_event, None, None, queue=False)
     shared.gradio['prompt_menu-default'].change(load_prompt, gradio('prompt_menu-default'), gradio('textbox-default'), show_progress=False)
diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py
index ac6a8a8f..ea053eef 100644
--- a/modules/ui_model_menu.py
+++ b/modules/ui_model_menu.py
@@ -75,7 +75,7 @@ def create_ui():
         with gr.Row():
             with gr.Column():
                 shared.gradio['loader'] = gr.Dropdown(label="Model loader", choices=loaders.loaders_and_params.keys(), value=None)
-                with gr.Box():
+                with gr.Blocks():
                     with gr.Row():
                         with gr.Column():
                             with gr.Blocks():
diff --git a/modules/ui_notebook.py b/modules/ui_notebook.py
index 6bd5c919..a7c62baf 100644
--- a/modules/ui_notebook.py
+++ b/modules/ui_notebook.py
@@ -67,14 +67,14 @@ def create_event_handlers():
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
-        lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}')
+        lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
 
     shared.gradio['textbox-notebook'].submit(
         lambda x: x, gradio('textbox-notebook'), gradio('last_input-notebook')).then(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
-        lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}')
+        lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
 
     shared.gradio['Undo'].click(lambda x: x, gradio('last_input-notebook'), gradio('textbox-notebook'), show_progress=False)
     shared.gradio['markdown_render-notebook'].click(lambda x: x, gradio('textbox-notebook'), gradio('markdown-notebook'), queue=False)
@@ -83,7 +83,7 @@ def create_event_handlers():
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
-        lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}')
+        lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
 
     shared.gradio['Stop-notebook'].click(stop_everything_event, None, None, queue=False)
     shared.gradio['prompt_menu-notebook'].change(load_prompt, gradio('prompt_menu-notebook'), gradio('textbox-notebook'), show_progress=False)
diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py
index 7aebe672..9bc05c58 100644
--- a/modules/ui_parameters.py
+++ b/modules/ui_parameters.py
@@ -73,7 +73,6 @@ def create_ui(default_preset):
                         with gr.Column():
                             shared.gradio['truncation_length'] = gr.Slider(value=get_truncation_length(), minimum=shared.settings['truncation_length_min'], maximum=shared.settings['truncation_length_max'], step=256, label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length. Most models require this to be at most 2048.')
                             shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.')
-                            shared.gradio['max_updates_second'] = gr.Slider(value=shared.settings['max_updates_second'], minimum=0, maximum=24, step=1, label='Maximum UI updates/second', info='Set this if you experience lag in the UI during streaming.')
                             shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.')
 
                             shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=1, value=shared.settings["custom_stopping_strings"] or None, label='Custom stopping strings', info='In addition to the defaults. Written between "" and separated by commas.', placeholder='"\\n", "\\nYou:"')
diff --git a/modules/ui_session.py b/modules/ui_session.py
index 989046ea..08929c33 100644
--- a/modules/ui_session.py
+++ b/modules/ui_session.py
@@ -32,10 +32,10 @@ def create_ui():
         # Reset interface event
         shared.gradio['reset_interface'].click(
             set_interface_arguments, gradio('extensions_menu', 'bool_menu'), None).then(
-            lambda: None, None, None, _js='() => {document.body.innerHTML=\'<h1 style="font-family:monospace;padding-top:20%;margin:0;height:100vh;color:lightgray;text-align:center;background:var(--body-background-fill)">Reloading...</h1>\'; setTimeout(function(){location.reload()},2500); return []}')
+            lambda: None, None, None, js='() => {document.body.innerHTML=\'<h1 style="font-family:monospace;padding-top:20%;margin:0;height:100vh;color:lightgray;text-align:center;background:var(--body-background-fill)">Reloading...</h1>\'; setTimeout(function(){location.reload()},2500); return []}')
 
         shared.gradio['toggle_dark_mode'].click(
-            lambda: None, None, None, _js='() => {document.getElementsByTagName("body")[0].classList.toggle("dark")}').then(
+            lambda: None, None, None, js='() => {document.getElementsByTagName("body")[0].classList.toggle("dark")}').then(
             lambda x: 'dark' if x == 'light' else 'light', gradio('theme_state'), gradio('theme_state'))
 
         shared.gradio['save_settings'].click(
diff --git a/requirements.txt b/requirements.txt
index 037fabf4..452a6929 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@ accelerate==0.27.*
 colorama
 datasets
 einops
-gradio==3.50.*
+gradio==4.19.*
 hqq==0.1.3.post1
 jinja2==3.1.2
 lm_eval==0.3.0
diff --git a/requirements_amd.txt b/requirements_amd.txt
index 2bece4fd..6c6e4bc9 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -2,7 +2,7 @@ accelerate==0.27.*
 colorama
 datasets
 einops
-gradio==3.50.*
+gradio==4.19.*
 hqq==0.1.3.post1
 jinja2==3.1.2
 lm_eval==0.3.0
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index c83b4ece..032dc73b 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -2,7 +2,7 @@ accelerate==0.27.*
 colorama
 datasets
 einops
-gradio==3.50.*
+gradio==4.19.*
 hqq==0.1.3.post1
 jinja2==3.1.2
 lm_eval==0.3.0
diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt
index c1513b13..ab812991 100644
--- a/requirements_apple_intel.txt
+++ b/requirements_apple_intel.txt
@@ -2,7 +2,7 @@ accelerate==0.27.*
 colorama
 datasets
 einops
-gradio==3.50.*
+gradio==4.19.*
 hqq==0.1.3.post1
 jinja2==3.1.2
 lm_eval==0.3.0
diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt
index 4515f7b6..9789cead 100644
--- a/requirements_apple_silicon.txt
+++ b/requirements_apple_silicon.txt
@@ -2,7 +2,7 @@ accelerate==0.27.*
 colorama
 datasets
 einops
-gradio==3.50.*
+gradio==4.19.*
 hqq==0.1.3.post1
 jinja2==3.1.2
 lm_eval==0.3.0
diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt
index 6bbbba51..f48956c6 100644
--- a/requirements_cpu_only.txt
+++ b/requirements_cpu_only.txt
@@ -2,7 +2,7 @@ accelerate==0.27.*
 colorama
 datasets
 einops
-gradio==3.50.*
+gradio==4.19.*
 hqq==0.1.3.post1
 jinja2==3.1.2
 lm_eval==0.3.0
diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt
index 91a5db7a..80a728ca 100644
--- a/requirements_cpu_only_noavx2.txt
+++ b/requirements_cpu_only_noavx2.txt
@@ -2,7 +2,7 @@ accelerate==0.27.*
 colorama
 datasets
 einops
-gradio==3.50.*
+gradio==4.19.*
 hqq==0.1.3.post1
 jinja2==3.1.2
 lm_eval==0.3.0
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index 6a840f50..71d11200 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -2,7 +2,7 @@ accelerate==0.27.*
 colorama
 datasets
 einops
-gradio==3.50.*
+gradio==4.19.*
 hqq==0.1.3.post1
 jinja2==3.1.2
 lm_eval==0.3.0
diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt
index 0319b6c3..d36ae6f7 100644
--- a/requirements_nowheels.txt
+++ b/requirements_nowheels.txt
@@ -2,7 +2,7 @@ accelerate==0.27.*
 colorama
 datasets
 einops
-gradio==3.50.*
+gradio==4.19.*
 hqq==0.1.3.post1
 jinja2==3.1.2
 lm_eval==0.3.0
diff --git a/server.py b/server.py
index 681fe4e7..c6a01830 100644
--- a/server.py
+++ b/server.py
@@ -18,6 +18,7 @@ warnings.filterwarnings('ignore', category=UserWarning, message='The value passe
 warnings.filterwarnings('ignore', category=UserWarning, message='Field "model_names" has conflict')
 
 with RequestBlocker():
+    from modules import gradio_hijack
     import gradio as gr
 
 import matplotlib
@@ -145,11 +146,9 @@ def create_interface():
         ui_model_menu.create_event_handlers()
 
         # Interface launch events
-        if shared.settings['dark_theme']:
-            shared.gradio['interface'].load(lambda: None, None, None, _js="() => document.getElementsByTagName('body')[0].classList.add('dark')")
-
-        shared.gradio['interface'].load(lambda: None, None, None, _js=f"() => {{{js}}}")
-        shared.gradio['interface'].load(None, gradio('show_controls'), None, _js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}')
+        shared.gradio['interface'].load(lambda: None, None, None, js=f"() => {{if ({str(shared.settings['dark_theme']).lower()}) {{ document.getElementsByTagName('body')[0].classList.add('dark'); }} }}")
+        shared.gradio['interface'].load(lambda: None, None, None, js=f"() => {{{js}}}")
+        shared.gradio['interface'].load(lambda x: None, gradio('show_controls'), None, js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}')
         shared.gradio['interface'].load(partial(ui.apply_interface_values, {}, use_persistent=True), None, gradio(ui.list_interface_input_elements()), show_progress=False)
         shared.gradio['interface'].load(chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display'))
 
@@ -157,9 +156,10 @@ def create_interface():
         extensions_module.create_extensions_block()  # Extensions block
 
     # Launch the interface
-    shared.gradio['interface'].queue(concurrency_count=64)
+    shared.gradio['interface'].queue()
     with OpenMonkeyPatch():
         shared.gradio['interface'].launch(
+            max_threads=64,
             prevent_thread_lock=True,
             share=shared.args.share,
             server_name=None if not shared.args.listen else (shared.args.listen_host or '0.0.0.0'),
@@ -168,7 +168,8 @@ def create_interface():
             auth=auth or None,
             ssl_verify=False if (shared.args.ssl_keyfile or shared.args.ssl_certfile) else True,
             ssl_keyfile=shared.args.ssl_keyfile,
-            ssl_certfile=shared.args.ssl_certfile
+            ssl_certfile=shared.args.ssl_certfile,
+            allowed_paths=["."]
         )
 
 
diff --git a/settings-template.yaml b/settings-template.yaml
index 87101116..095f25ec 100644
--- a/settings-template.yaml
+++ b/settings-template.yaml
@@ -15,7 +15,6 @@ truncation_length: 2048
 truncation_length_min: 0
 truncation_length_max: 200000
 max_tokens_second: 0
-max_updates_second: 0
 prompt_lookup_num_tokens: 0
 custom_stopping_strings: ''
 custom_token_bans: ''

From 1437f757a1f4b4e2dcbd419e29dce6e150b82fbd Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 5 Mar 2024 02:33:51 -0800
Subject: [PATCH 19/25] Bump HQQ to 0.1.5

---
 requirements.txt                 | 2 +-
 requirements_amd.txt             | 2 +-
 requirements_amd_noavx2.txt      | 2 +-
 requirements_apple_intel.txt     | 2 +-
 requirements_apple_silicon.txt   | 2 +-
 requirements_cpu_only.txt        | 2 +-
 requirements_cpu_only_noavx2.txt | 2 +-
 requirements_noavx2.txt          | 2 +-
 requirements_nowheels.txt        | 2 +-
 9 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 452a6929..f16950df 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,7 +3,7 @@ colorama
 datasets
 einops
 gradio==4.19.*
-hqq==0.1.3.post1
+hqq==0.1.5
 jinja2==3.1.2
 lm_eval==0.3.0
 markdown
diff --git a/requirements_amd.txt b/requirements_amd.txt
index 6c6e4bc9..40fc7541 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -3,7 +3,7 @@ colorama
 datasets
 einops
 gradio==4.19.*
-hqq==0.1.3.post1
+hqq==0.1.5
 jinja2==3.1.2
 lm_eval==0.3.0
 markdown
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index 032dc73b..b1715176 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -3,7 +3,7 @@ colorama
 datasets
 einops
 gradio==4.19.*
-hqq==0.1.3.post1
+hqq==0.1.5
 jinja2==3.1.2
 lm_eval==0.3.0
 markdown
diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt
index ab812991..3f76c481 100644
--- a/requirements_apple_intel.txt
+++ b/requirements_apple_intel.txt
@@ -3,7 +3,7 @@ colorama
 datasets
 einops
 gradio==4.19.*
-hqq==0.1.3.post1
+hqq==0.1.5
 jinja2==3.1.2
 lm_eval==0.3.0
 markdown
diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt
index 9789cead..96ca04e9 100644
--- a/requirements_apple_silicon.txt
+++ b/requirements_apple_silicon.txt
@@ -3,7 +3,7 @@ colorama
 datasets
 einops
 gradio==4.19.*
-hqq==0.1.3.post1
+hqq==0.1.5
 jinja2==3.1.2
 lm_eval==0.3.0
 markdown
diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt
index f48956c6..49f37936 100644
--- a/requirements_cpu_only.txt
+++ b/requirements_cpu_only.txt
@@ -3,7 +3,7 @@ colorama
 datasets
 einops
 gradio==4.19.*
-hqq==0.1.3.post1
+hqq==0.1.5
 jinja2==3.1.2
 lm_eval==0.3.0
 markdown
diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt
index 80a728ca..62b6721f 100644
--- a/requirements_cpu_only_noavx2.txt
+++ b/requirements_cpu_only_noavx2.txt
@@ -3,7 +3,7 @@ colorama
 datasets
 einops
 gradio==4.19.*
-hqq==0.1.3.post1
+hqq==0.1.5
 jinja2==3.1.2
 lm_eval==0.3.0
 markdown
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index 71d11200..8950071e 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -3,7 +3,7 @@ colorama
 datasets
 einops
 gradio==4.19.*
-hqq==0.1.3.post1
+hqq==0.1.5
 jinja2==3.1.2
 lm_eval==0.3.0
 markdown
diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt
index d36ae6f7..2f0a1022 100644
--- a/requirements_nowheels.txt
+++ b/requirements_nowheels.txt
@@ -3,7 +3,7 @@ colorama
 datasets
 einops
 gradio==4.19.*
-hqq==0.1.3.post1
+hqq==0.1.5
 jinja2==3.1.2
 lm_eval==0.3.0
 markdown

From 72a498ddd44a895205e53b5696742dc4ded9e12e Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 5 Mar 2024 07:34:32 -0300
Subject: [PATCH 20/25] Update peft requirement from ==0.8.* to ==0.9.* (#5626)

---
 requirements.txt                 | 2 +-
 requirements_amd.txt             | 2 +-
 requirements_amd_noavx2.txt      | 2 +-
 requirements_apple_intel.txt     | 2 +-
 requirements_apple_silicon.txt   | 2 +-
 requirements_cpu_only.txt        | 2 +-
 requirements_cpu_only_noavx2.txt | 2 +-
 requirements_noavx2.txt          | 2 +-
 requirements_nowheels.txt        | 2 +-
 9 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 452a6929..23064816 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,7 +10,7 @@ markdown
 numpy==1.26.*
 optimum==1.17.*
 pandas
-peft==0.8.*
+peft==0.9.*
 Pillow>=9.5.0
 pyyaml
 requests
diff --git a/requirements_amd.txt b/requirements_amd.txt
index 6c6e4bc9..59500b3d 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -10,7 +10,7 @@ markdown
 numpy==1.26.*
 optimum==1.17.*
 pandas
-peft==0.8.*
+peft==0.9.*
 Pillow>=9.5.0
 pyyaml
 requests
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index 032dc73b..53281f59 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -10,7 +10,7 @@ markdown
 numpy==1.26.*
 optimum==1.17.*
 pandas
-peft==0.8.*
+peft==0.9.*
 Pillow>=9.5.0
 pyyaml
 requests
diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt
index ab812991..3f870857 100644
--- a/requirements_apple_intel.txt
+++ b/requirements_apple_intel.txt
@@ -10,7 +10,7 @@ markdown
 numpy==1.26.*
 optimum==1.17.*
 pandas
-peft==0.8.*
+peft==0.9.*
 Pillow>=9.5.0
 pyyaml
 requests
diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt
index 9789cead..ddc8bc9b 100644
--- a/requirements_apple_silicon.txt
+++ b/requirements_apple_silicon.txt
@@ -10,7 +10,7 @@ markdown
 numpy==1.26.*
 optimum==1.17.*
 pandas
-peft==0.8.*
+peft==0.9.*
 Pillow>=9.5.0
 pyyaml
 requests
diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt
index f48956c6..41af6c21 100644
--- a/requirements_cpu_only.txt
+++ b/requirements_cpu_only.txt
@@ -10,7 +10,7 @@ markdown
 numpy==1.26.*
 optimum==1.17.*
 pandas
-peft==0.8.*
+peft==0.9.*
 Pillow>=9.5.0
 pyyaml
 requests
diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt
index 80a728ca..642b141f 100644
--- a/requirements_cpu_only_noavx2.txt
+++ b/requirements_cpu_only_noavx2.txt
@@ -10,7 +10,7 @@ markdown
 numpy==1.26.*
 optimum==1.17.*
 pandas
-peft==0.8.*
+peft==0.9.*
 Pillow>=9.5.0
 pyyaml
 requests
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index 71d11200..45a0e78b 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -10,7 +10,7 @@ markdown
 numpy==1.26.*
 optimum==1.17.*
 pandas
-peft==0.8.*
+peft==0.9.*
 Pillow>=9.5.0
 pyyaml
 requests
diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt
index d36ae6f7..99276b4a 100644
--- a/requirements_nowheels.txt
+++ b/requirements_nowheels.txt
@@ -10,7 +10,7 @@ markdown
 numpy==1.26.*
 optimum==1.17.*
 pandas
-peft==0.8.*
+peft==0.9.*
 Pillow>=9.5.0
 pyyaml
 requests

From d61e31e1827104f31115024bb82a82553220d985 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 5 Mar 2024 07:54:34 -0300
Subject: [PATCH 21/25] Save the extensions after Gradio 4 (#5632)

---
 modules/gradio_hijack.py | 71 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 67 insertions(+), 4 deletions(-)

diff --git a/modules/gradio_hijack.py b/modules/gradio_hijack.py
index 026f3d6c..2ddd983a 100644
--- a/modules/gradio_hijack.py
+++ b/modules/gradio_hijack.py
@@ -1,9 +1,72 @@
+'''
+Copied from: https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14184
+'''
+
+import inspect
+import warnings
+from functools import wraps
+
 import gradio as gr
 
 
-def Box(*args, **kwargs):
-    return gr.Blocks(*args, **kwargs)
+class GradioDeprecationWarning(DeprecationWarning):
+    pass
 
 
-if not hasattr(gr, 'Box'):
-    gr.Box = Box
+def repair(grclass):
+    if not getattr(grclass, 'EVENTS', None):
+        return
+
+    @wraps(grclass.__init__)
+    def __repaired_init__(self, *args, tooltip=None, source=None, original=grclass.__init__, **kwargs):
+        if source:
+            kwargs["sources"] = [source]
+
+        allowed_kwargs = inspect.signature(original).parameters
+        fixed_kwargs = {}
+        for k, v in kwargs.items():
+            if k in allowed_kwargs:
+                fixed_kwargs[k] = v
+            else:
+                warnings.warn(f"unexpected argument for {grclass.__name__}: {k}", GradioDeprecationWarning, stacklevel=2)
+
+        original(self, *args, **fixed_kwargs)
+
+        self.webui_tooltip = tooltip
+
+        for event in self.EVENTS:
+            replaced_event = getattr(self, str(event))
+
+            def fun(*xargs, _js=None, replaced_event=replaced_event, **xkwargs):
+                if _js:
+                    xkwargs['js'] = _js
+
+                return replaced_event(*xargs, **xkwargs)
+
+            setattr(self, str(event), fun)
+
+    grclass.__init__ = __repaired_init__
+    grclass.update = gr.update
+
+
+for component in set(gr.components.__all__ + gr.layouts.__all__):
+    repair(getattr(gr, component, None))
+
+
+class Dependency(gr.events.Dependency):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        def then(*xargs, _js=None, **xkwargs):
+            if _js:
+                xkwargs['js'] = _js
+
+            return original_then(*xargs, **xkwargs)
+
+        original_then = self.then
+        self.then = then
+
+
+gr.events.Dependency = Dependency
+
+gr.Box = gr.Group

From 03f03af535e006e6de0478cf1c0f80e3c382f0ec Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 5 Mar 2024 02:56:37 -0800
Subject: [PATCH 22/25] Revert "Update peft requirement from ==0.8.* to ==0.9.*
 (#5626)"

This reverts commit 72a498ddd44a895205e53b5696742dc4ded9e12e.
---
 requirements.txt                 | 2 +-
 requirements_amd.txt             | 2 +-
 requirements_amd_noavx2.txt      | 2 +-
 requirements_apple_intel.txt     | 2 +-
 requirements_apple_silicon.txt   | 2 +-
 requirements_cpu_only.txt        | 2 +-
 requirements_cpu_only_noavx2.txt | 2 +-
 requirements_noavx2.txt          | 2 +-
 requirements_nowheels.txt        | 2 +-
 9 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 991be48a..f16950df 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,7 +10,7 @@ markdown
 numpy==1.26.*
 optimum==1.17.*
 pandas
-peft==0.9.*
+peft==0.8.*
 Pillow>=9.5.0
 pyyaml
 requests
diff --git a/requirements_amd.txt b/requirements_amd.txt
index 843c75c6..40fc7541 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -10,7 +10,7 @@ markdown
 numpy==1.26.*
 optimum==1.17.*
 pandas
-peft==0.9.*
+peft==0.8.*
 Pillow>=9.5.0
 pyyaml
 requests
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index 4b477e68..b1715176 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -10,7 +10,7 @@ markdown
 numpy==1.26.*
 optimum==1.17.*
 pandas
-peft==0.9.*
+peft==0.8.*
 Pillow>=9.5.0
 pyyaml
 requests
diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt
index 67d4a5c2..3f76c481 100644
--- a/requirements_apple_intel.txt
+++ b/requirements_apple_intel.txt
@@ -10,7 +10,7 @@ markdown
 numpy==1.26.*
 optimum==1.17.*
 pandas
-peft==0.9.*
+peft==0.8.*
 Pillow>=9.5.0
 pyyaml
 requests
diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt
index dbcd78c9..96ca04e9 100644
--- a/requirements_apple_silicon.txt
+++ b/requirements_apple_silicon.txt
@@ -10,7 +10,7 @@ markdown
 numpy==1.26.*
 optimum==1.17.*
 pandas
-peft==0.9.*
+peft==0.8.*
 Pillow>=9.5.0
 pyyaml
 requests
diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt
index c28615c1..49f37936 100644
--- a/requirements_cpu_only.txt
+++ b/requirements_cpu_only.txt
@@ -10,7 +10,7 @@ markdown
 numpy==1.26.*
 optimum==1.17.*
 pandas
-peft==0.9.*
+peft==0.8.*
 Pillow>=9.5.0
 pyyaml
 requests
diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt
index 2f789625..62b6721f 100644
--- a/requirements_cpu_only_noavx2.txt
+++ b/requirements_cpu_only_noavx2.txt
@@ -10,7 +10,7 @@ markdown
 numpy==1.26.*
 optimum==1.17.*
 pandas
-peft==0.9.*
+peft==0.8.*
 Pillow>=9.5.0
 pyyaml
 requests
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index 65e94f1e..8950071e 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -10,7 +10,7 @@ markdown
 numpy==1.26.*
 optimum==1.17.*
 pandas
-peft==0.9.*
+peft==0.8.*
 Pillow>=9.5.0
 pyyaml
 requests
diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt
index 1bc59616..2f0a1022 100644
--- a/requirements_nowheels.txt
+++ b/requirements_nowheels.txt
@@ -10,7 +10,7 @@ markdown
 numpy==1.26.*
 optimum==1.17.*
 pandas
-peft==0.9.*
+peft==0.8.*
 Pillow>=9.5.0
 pyyaml
 requests

From 7eee9e947067ee03037c9b028b4e4bc89ea8f2e0 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 6 Mar 2024 06:46:50 -0800
Subject: [PATCH 23/25] Add -k to curl command to download miniconda on windows
 (closes #5628)

---
 start_windows.bat | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/start_windows.bat b/start_windows.bat
index f607e518..900ae7a4 100755
--- a/start_windows.bat
+++ b/start_windows.bat
@@ -37,7 +37,7 @@ if "%conda_exists%" == "F" (
 	echo Downloading Miniconda from %MINICONDA_DOWNLOAD_URL% to %INSTALL_DIR%\miniconda_installer.exe
 
 	mkdir "%INSTALL_DIR%"
-	call curl -L "%MINICONDA_DOWNLOAD_URL%" > "%INSTALL_DIR%\miniconda_installer.exe" || ( echo. && echo Miniconda failed to download. && goto end )
+	call curl -Lk "%MINICONDA_DOWNLOAD_URL%" > "%INSTALL_DIR%\miniconda_installer.exe" || ( echo. && echo Miniconda failed to download. && goto end )
 
 	echo Installing Miniconda to %CONDA_ROOT_PREFIX%
 	start /wait "" "%INSTALL_DIR%\miniconda_installer.exe" /InstallationType=JustMe /NoShortcuts=1 /AddToPath=0 /RegisterPython=0 /NoRegistry=1 /S /D=%CONDA_ROOT_PREFIX%

From 21749583624c2609c716d7ee5cfe0f6fbb83f06b Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 6 Mar 2024 11:52:46 -0300
Subject: [PATCH 24/25] Revert gradio to 3.50.2 (#5640)

---
 css/main.css                     |  5 ---
 extensions/gallery/script.py     |  2 +-
 extensions/whisper_stt/script.py |  2 +-
 js/switch_tabs.js                |  8 ++--
 modules/block_requests.py        |  3 +-
 modules/gradio_hijack.py         | 72 --------------------------------
 modules/shared.py                |  1 +
 modules/text_generation.py       | 14 +++++--
 modules/ui.py                    |  1 +
 modules/ui_chat.py               | 32 +++++++-------
 modules/ui_default.py            |  6 +--
 modules/ui_model_menu.py         |  2 +-
 modules/ui_notebook.py           |  6 +--
 modules/ui_parameters.py         |  1 +
 modules/ui_session.py            |  4 +-
 requirements.txt                 |  2 +-
 requirements_amd.txt             |  2 +-
 requirements_amd_noavx2.txt      |  2 +-
 requirements_apple_intel.txt     |  2 +-
 requirements_apple_silicon.txt   |  2 +-
 requirements_cpu_only.txt        |  2 +-
 requirements_cpu_only_noavx2.txt |  2 +-
 requirements_noavx2.txt          |  2 +-
 requirements_nowheels.txt        |  2 +-
 server.py                        | 15 ++++---
 settings-template.yaml           |  1 +
 26 files changed, 63 insertions(+), 130 deletions(-)
 delete mode 100644 modules/gradio_hijack.py

diff --git a/css/main.css b/css/main.css
index 9681a5e3..b41985d8 100644
--- a/css/main.css
+++ b/css/main.css
@@ -89,11 +89,6 @@ div.svelte-15lo0d8 > *, div.svelte-15lo0d8 > .form > * {
     flex-wrap: nowrap;
 }
 
-gradio-app > :first-child {
-    padding-left: var(--size-4) !important;
-    padding-right: var(--size-4) !important;
-}
-
 .header_bar {
     background-color: #f7f7f7;
     box-shadow: 0 2px 3px rgba(22 22 22 / 35%);
diff --git a/extensions/gallery/script.py b/extensions/gallery/script.py
index 1bb8068a..1cb7f27f 100644
--- a/extensions/gallery/script.py
+++ b/extensions/gallery/script.py
@@ -119,7 +119,7 @@ def ui():
             samples_per_page=settings["gallery-items_per_page"]
         )
 
-    filter_box.change(lambda: None, None, None, js=f'() => {{{custom_js()}; gotoFirstPage()}}').success(
+    filter_box.change(lambda: None, None, None, _js=f'() => {{{custom_js()}; gotoFirstPage()}}').success(
         filter_cards, filter_box, gallery).then(
         lambda x: gr.update(elem_classes='highlighted-border' if x != '' else ''), filter_box, filter_box, show_progress=False)
 
diff --git a/extensions/whisper_stt/script.py b/extensions/whisper_stt/script.py
index efa58ce9..cdc55687 100644
--- a/extensions/whisper_stt/script.py
+++ b/extensions/whisper_stt/script.py
@@ -64,7 +64,7 @@ def ui():
 
     audio.change(
         auto_transcribe, [audio, auto_submit, whipser_model, whipser_language], [shared.gradio['textbox'], audio]).then(
-        None, auto_submit, None, js="(check) => {if (check) { document.getElementById('Generate').click() }}")
+        None, auto_submit, None, _js="(check) => {if (check) { document.getElementById('Generate').click() }}")
 
     whipser_model.change(lambda x: params.update({"whipser_model": x}), whipser_model, None)
     whipser_language.change(lambda x: params.update({"whipser_language": x}), whipser_language, None)
diff --git a/js/switch_tabs.js b/js/switch_tabs.js
index 0564f891..75d56367 100644
--- a/js/switch_tabs.js
+++ b/js/switch_tabs.js
@@ -32,27 +32,27 @@ function switch_to_chat() {
 }
 
 function switch_to_default() {
-  let default_tab_button = main_parent.childNodes[0].childNodes[5];
+  let default_tab_button = main_parent.childNodes[0].childNodes[4];
   default_tab_button.click();
   scrollToTop();
 }
 
 function switch_to_notebook() {
-  let notebook_tab_button = main_parent.childNodes[0].childNodes[9];
+  let notebook_tab_button = main_parent.childNodes[0].childNodes[7];
   notebook_tab_button.click();
   findButtonsByText("Raw")[1].click();
   scrollToTop();
 }
 
 function switch_to_generation_parameters() {
-  let parameters_tab_button = main_parent.childNodes[0].childNodes[13];
+  let parameters_tab_button = main_parent.childNodes[0].childNodes[10];
   parameters_tab_button.click();
   findButtonsByText("Generation")[0].click();
   scrollToTop();
 }
 
 function switch_to_character() {
-  let parameters_tab_button = main_parent.childNodes[0].childNodes[13];
+  let parameters_tab_button = main_parent.childNodes[0].childNodes[10];
   parameters_tab_button.click();
   findButtonsByText("Character")[0].click();
   scrollToTop();
diff --git a/modules/block_requests.py b/modules/block_requests.py
index ac6c6800..fbc45de4 100644
--- a/modules/block_requests.py
+++ b/modules/block_requests.py
@@ -43,9 +43,8 @@ def my_open(*args, **kwargs):
         with original_open(*args, **kwargs) as f:
             file_contents = f.read()
 
-        file_contents = file_contents.replace(b'\t\t<script\n\t\t\tsrc="https://cdnjs.cloudflare.com/ajax/libs/iframe-resizer/4.3.9/iframeResizer.contentWindow.min.js"\n\t\t\tasync\n\t\t></script>', b'')
+        file_contents = file_contents.replace(b'\t\t<script\n\t\t\tsrc="https://cdnjs.cloudflare.com/ajax/libs/iframe-resizer/4.3.7/iframeResizer.contentWindow.min.js"\n\t\t\tasync\n\t\t></script>', b'')
         file_contents = file_contents.replace(b'cdnjs.cloudflare.com', b'127.0.0.1')
-
         return io.BytesIO(file_contents)
     else:
         return original_open(*args, **kwargs)
diff --git a/modules/gradio_hijack.py b/modules/gradio_hijack.py
deleted file mode 100644
index 2ddd983a..00000000
--- a/modules/gradio_hijack.py
+++ /dev/null
@@ -1,72 +0,0 @@
-'''
-Copied from: https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14184
-'''
-
-import inspect
-import warnings
-from functools import wraps
-
-import gradio as gr
-
-
-class GradioDeprecationWarning(DeprecationWarning):
-    pass
-
-
-def repair(grclass):
-    if not getattr(grclass, 'EVENTS', None):
-        return
-
-    @wraps(grclass.__init__)
-    def __repaired_init__(self, *args, tooltip=None, source=None, original=grclass.__init__, **kwargs):
-        if source:
-            kwargs["sources"] = [source]
-
-        allowed_kwargs = inspect.signature(original).parameters
-        fixed_kwargs = {}
-        for k, v in kwargs.items():
-            if k in allowed_kwargs:
-                fixed_kwargs[k] = v
-            else:
-                warnings.warn(f"unexpected argument for {grclass.__name__}: {k}", GradioDeprecationWarning, stacklevel=2)
-
-        original(self, *args, **fixed_kwargs)
-
-        self.webui_tooltip = tooltip
-
-        for event in self.EVENTS:
-            replaced_event = getattr(self, str(event))
-
-            def fun(*xargs, _js=None, replaced_event=replaced_event, **xkwargs):
-                if _js:
-                    xkwargs['js'] = _js
-
-                return replaced_event(*xargs, **xkwargs)
-
-            setattr(self, str(event), fun)
-
-    grclass.__init__ = __repaired_init__
-    grclass.update = gr.update
-
-
-for component in set(gr.components.__all__ + gr.layouts.__all__):
-    repair(getattr(gr, component, None))
-
-
-class Dependency(gr.events.Dependency):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-
-        def then(*xargs, _js=None, **xkwargs):
-            if _js:
-                xkwargs['js'] = _js
-
-            return original_then(*xargs, **xkwargs)
-
-        original_then = self.then
-        self.then = then
-
-
-gr.events.Dependency = Dependency
-
-gr.Box = gr.Group
diff --git a/modules/shared.py b/modules/shared.py
index 6dfc140c..7bef04bf 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -46,6 +46,7 @@ settings = {
     'truncation_length_min': 0,
     'truncation_length_max': 200000,
     'max_tokens_second': 0,
+    'max_updates_second': 0,
     'prompt_lookup_num_tokens': 0,
     'custom_stopping_strings': '',
     'custom_token_bans': '',
diff --git a/modules/text_generation.py b/modules/text_generation.py
index 60ba51fb..227d1822 100644
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@@ -80,16 +80,19 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap
         state = copy.deepcopy(state)
         state['stream'] = True
 
+    min_update_interval = 0
+    if state.get('max_updates_second', 0) > 0:
+        min_update_interval = 1 / state['max_updates_second']
+
     # Generate
     for reply in generate_func(question, original_question, seed, state, stopping_strings, is_chat=is_chat):
         reply, stop_found = apply_stopping_strings(reply, all_stop_strings)
         if escape_html:
             reply = html.escape(reply)
-
         if is_stream:
             cur_time = time.time()
 
-            # Limit number of tokens/second to make text readable in real time
+            # Maximum number of tokens/second
             if state['max_tokens_second'] > 0:
                 diff = 1 / state['max_tokens_second'] - (cur_time - last_update)
                 if diff > 0:
@@ -97,8 +100,13 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap
 
                 last_update = time.time()
                 yield reply
+
+            # Limit updates to avoid lag in the Gradio UI
+            # API updates are not limited
             else:
-                yield reply
+                if cur_time - last_update > min_update_interval:
+                    last_update = cur_time
+                    yield reply
 
         if stop_found or (state['max_tokens_second'] > 0 and shared.stop_everything):
             break
diff --git a/modules/ui.py b/modules/ui.py
index 67613c53..6249bb48 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -113,6 +113,7 @@ def list_interface_input_elements():
         'max_new_tokens',
         'auto_max_new_tokens',
         'max_tokens_second',
+        'max_updates_second',
         'prompt_lookup_num_tokens',
         'seed',
         'temperature',
diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index 0990d233..ad4a4f0f 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -175,7 +175,7 @@ def create_event_handlers():
         chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then(
-        lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
+        lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}')
 
     shared.gradio['textbox'].submit(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
@@ -183,28 +183,28 @@ def create_event_handlers():
         chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then(
-        lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
+        lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}')
 
     shared.gradio['Regenerate'].click(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         partial(chat.generate_chat_reply_wrapper, regenerate=True), gradio(inputs), gradio('display', 'history'), show_progress=False).then(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then(
-        lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
+        lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}')
 
     shared.gradio['Continue'].click(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         partial(chat.generate_chat_reply_wrapper, _continue=True), gradio(inputs), gradio('display', 'history'), show_progress=False).then(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then(
-        lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
+        lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}')
 
     shared.gradio['Impersonate'].click(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         lambda x: x, gradio('textbox'), gradio('Chat input'), show_progress=False).then(
         chat.impersonate_wrapper, gradio(inputs), gradio('textbox', 'display'), show_progress=False).then(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
-        lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
+        lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}')
 
     shared.gradio['Replace last reply'].click(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
@@ -282,7 +282,7 @@ def create_event_handlers():
         chat.redraw_html, gradio(reload_arr), gradio('display')).then(
         lambda x: gr.update(choices=(histories := chat.find_all_histories(x)), value=histories[0]), gradio('interface_state'), gradio('unique_id')).then(
         chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then(
-        lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_chat()}}')
+        lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_chat()}}')
 
     shared.gradio['character_menu'].change(
         chat.load_character, gradio('character_menu', 'name1', 'name2'), gradio('name1', 'name2', 'character_picture', 'greeting', 'context')).success(
@@ -290,7 +290,7 @@ def create_event_handlers():
         chat.load_latest_history, gradio('interface_state'), gradio('history')).then(
         chat.redraw_html, gradio(reload_arr), gradio('display')).then(
         lambda x: gr.update(choices=(histories := chat.find_all_histories(x)), value=histories[0]), gradio('interface_state'), gradio('unique_id')).then(
-        lambda: None, None, None, js=f'() => {{{ui.update_big_picture_js}; updateBigPicture()}}')
+        lambda: None, None, None, _js=f'() => {{{ui.update_big_picture_js}; updateBigPicture()}}')
 
     shared.gradio['mode'].change(
         lambda x: gr.update(visible=x != 'instruct'), gradio('mode'), gradio('chat_style'), show_progress=False).then(
@@ -326,15 +326,15 @@ def create_event_handlers():
 
     shared.gradio['save_chat_history'].click(
         lambda x: json.dumps(x, indent=4), gradio('history'), gradio('temporary_text')).then(
-        None, gradio('temporary_text', 'character_menu', 'mode'), None, js=f'(hist, char, mode) => {{{ui.save_files_js}; saveHistory(hist, char, mode)}}')
+        None, gradio('temporary_text', 'character_menu', 'mode'), None, _js=f'(hist, char, mode) => {{{ui.save_files_js}; saveHistory(hist, char, mode)}}')
 
     shared.gradio['Submit character'].click(
         chat.upload_character, gradio('upload_json', 'upload_img_bot'), gradio('character_menu')).then(
-        lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}')
+        lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}')
 
     shared.gradio['Submit tavern character'].click(
         chat.upload_tavern_character, gradio('upload_img_tavern', 'tavern_json'), gradio('character_menu')).then(
-        lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}')
+        lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}')
 
     shared.gradio['upload_json'].upload(lambda: gr.update(interactive=True), None, gradio('Submit character'))
     shared.gradio['upload_json'].clear(lambda: gr.update(interactive=False), None, gradio('Submit character'))
@@ -348,28 +348,28 @@ def create_event_handlers():
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         lambda x: x.update({'mode': 'instruct', 'history': {'internal': [], 'visible': []}}), gradio('interface_state'), None).then(
         partial(chat.generate_chat_prompt, 'Input'), gradio('interface_state'), gradio('textbox-default')).then(
-        lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_default()}}')
+        lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_default()}}')
 
     shared.gradio['send_instruction_to_notebook'].click(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         lambda x: x.update({'mode': 'instruct', 'history': {'internal': [], 'visible': []}}), gradio('interface_state'), None).then(
         partial(chat.generate_chat_prompt, 'Input'), gradio('interface_state'), gradio('textbox-notebook')).then(
-        lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}')
+        lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}')
 
     shared.gradio['send_instruction_to_negative_prompt'].click(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         lambda x: x.update({'mode': 'instruct', 'history': {'internal': [], 'visible': []}}), gradio('interface_state'), None).then(
         partial(chat.generate_chat_prompt, 'Input'), gradio('interface_state'), gradio('negative_prompt')).then(
-        lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_generation_parameters()}}')
+        lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_generation_parameters()}}')
 
     shared.gradio['send-chat-to-default'].click(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         partial(chat.generate_chat_prompt, '', _continue=True), gradio('interface_state'), gradio('textbox-default')).then(
-        lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_default()}}')
+        lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_default()}}')
 
     shared.gradio['send-chat-to-notebook'].click(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         partial(chat.generate_chat_prompt, '', _continue=True), gradio('interface_state'), gradio('textbox-notebook')).then(
-        lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}')
+        lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}')
 
-    shared.gradio['show_controls'].change(None, gradio('show_controls'), None, js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}')
+    shared.gradio['show_controls'].change(None, gradio('show_controls'), None, _js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}')
diff --git a/modules/ui_default.py b/modules/ui_default.py
index 1f962551..7db6f0d9 100644
--- a/modules/ui_default.py
+++ b/modules/ui_default.py
@@ -67,21 +67,21 @@ def create_event_handlers():
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
-        lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
+        lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}')
 
     shared.gradio['textbox-default'].submit(
         lambda x: x, gradio('textbox-default'), gradio('last_input-default')).then(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
-        lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
+        lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}')
 
     shared.gradio['markdown_render-default'].click(lambda x: x, gradio('output_textbox'), gradio('markdown-default'), queue=False)
     shared.gradio['Continue-default'].click(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         generate_reply_wrapper, [shared.gradio['output_textbox']] + gradio(inputs)[1:], gradio(outputs), show_progress=False).then(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
-        lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
+        lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}')
 
     shared.gradio['Stop-default'].click(stop_everything_event, None, None, queue=False)
     shared.gradio['prompt_menu-default'].change(load_prompt, gradio('prompt_menu-default'), gradio('textbox-default'), show_progress=False)
diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py
index ea053eef..ac6a8a8f 100644
--- a/modules/ui_model_menu.py
+++ b/modules/ui_model_menu.py
@@ -75,7 +75,7 @@ def create_ui():
         with gr.Row():
             with gr.Column():
                 shared.gradio['loader'] = gr.Dropdown(label="Model loader", choices=loaders.loaders_and_params.keys(), value=None)
-                with gr.Blocks():
+                with gr.Box():
                     with gr.Row():
                         with gr.Column():
                             with gr.Blocks():
diff --git a/modules/ui_notebook.py b/modules/ui_notebook.py
index a7c62baf..6bd5c919 100644
--- a/modules/ui_notebook.py
+++ b/modules/ui_notebook.py
@@ -67,14 +67,14 @@ def create_event_handlers():
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
-        lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
+        lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}')
 
     shared.gradio['textbox-notebook'].submit(
         lambda x: x, gradio('textbox-notebook'), gradio('last_input-notebook')).then(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
-        lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
+        lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}')
 
     shared.gradio['Undo'].click(lambda x: x, gradio('last_input-notebook'), gradio('textbox-notebook'), show_progress=False)
     shared.gradio['markdown_render-notebook'].click(lambda x: x, gradio('textbox-notebook'), gradio('markdown-notebook'), queue=False)
@@ -83,7 +83,7 @@ def create_event_handlers():
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
-        lambda: None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
+        lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}')
 
     shared.gradio['Stop-notebook'].click(stop_everything_event, None, None, queue=False)
     shared.gradio['prompt_menu-notebook'].change(load_prompt, gradio('prompt_menu-notebook'), gradio('textbox-notebook'), show_progress=False)
diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py
index 9bc05c58..7aebe672 100644
--- a/modules/ui_parameters.py
+++ b/modules/ui_parameters.py
@@ -73,6 +73,7 @@ def create_ui(default_preset):
                         with gr.Column():
                             shared.gradio['truncation_length'] = gr.Slider(value=get_truncation_length(), minimum=shared.settings['truncation_length_min'], maximum=shared.settings['truncation_length_max'], step=256, label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length. Most models require this to be at most 2048.')
                             shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.')
+                            shared.gradio['max_updates_second'] = gr.Slider(value=shared.settings['max_updates_second'], minimum=0, maximum=24, step=1, label='Maximum UI updates/second', info='Set this if you experience lag in the UI during streaming.')
                             shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.')
 
                             shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=1, value=shared.settings["custom_stopping_strings"] or None, label='Custom stopping strings', info='In addition to the defaults. Written between "" and separated by commas.', placeholder='"\\n", "\\nYou:"')
diff --git a/modules/ui_session.py b/modules/ui_session.py
index 08929c33..989046ea 100644
--- a/modules/ui_session.py
+++ b/modules/ui_session.py
@@ -32,10 +32,10 @@ def create_ui():
         # Reset interface event
         shared.gradio['reset_interface'].click(
             set_interface_arguments, gradio('extensions_menu', 'bool_menu'), None).then(
-            lambda: None, None, None, js='() => {document.body.innerHTML=\'<h1 style="font-family:monospace;padding-top:20%;margin:0;height:100vh;color:lightgray;text-align:center;background:var(--body-background-fill)">Reloading...</h1>\'; setTimeout(function(){location.reload()},2500); return []}')
+            lambda: None, None, None, _js='() => {document.body.innerHTML=\'<h1 style="font-family:monospace;padding-top:20%;margin:0;height:100vh;color:lightgray;text-align:center;background:var(--body-background-fill)">Reloading...</h1>\'; setTimeout(function(){location.reload()},2500); return []}')
 
         shared.gradio['toggle_dark_mode'].click(
-            lambda: None, None, None, js='() => {document.getElementsByTagName("body")[0].classList.toggle("dark")}').then(
+            lambda: None, None, None, _js='() => {document.getElementsByTagName("body")[0].classList.toggle("dark")}').then(
             lambda x: 'dark' if x == 'light' else 'light', gradio('theme_state'), gradio('theme_state'))
 
         shared.gradio['save_settings'].click(
diff --git a/requirements.txt b/requirements.txt
index f16950df..fdf8c914 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@ accelerate==0.27.*
 colorama
 datasets
 einops
-gradio==4.19.*
+gradio==3.50.*
 hqq==0.1.5
 jinja2==3.1.2
 lm_eval==0.3.0
diff --git a/requirements_amd.txt b/requirements_amd.txt
index 40fc7541..7255cf2f 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -2,7 +2,7 @@ accelerate==0.27.*
 colorama
 datasets
 einops
-gradio==4.19.*
+gradio==3.50.*
 hqq==0.1.5
 jinja2==3.1.2
 lm_eval==0.3.0
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index b1715176..b949aa30 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -2,7 +2,7 @@ accelerate==0.27.*
 colorama
 datasets
 einops
-gradio==4.19.*
+gradio==3.50.*
 hqq==0.1.5
 jinja2==3.1.2
 lm_eval==0.3.0
diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt
index 3f76c481..88d578ad 100644
--- a/requirements_apple_intel.txt
+++ b/requirements_apple_intel.txt
@@ -2,7 +2,7 @@ accelerate==0.27.*
 colorama
 datasets
 einops
-gradio==4.19.*
+gradio==3.50.*
 hqq==0.1.5
 jinja2==3.1.2
 lm_eval==0.3.0
diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt
index 96ca04e9..13767480 100644
--- a/requirements_apple_silicon.txt
+++ b/requirements_apple_silicon.txt
@@ -2,7 +2,7 @@ accelerate==0.27.*
 colorama
 datasets
 einops
-gradio==4.19.*
+gradio==3.50.*
 hqq==0.1.5
 jinja2==3.1.2
 lm_eval==0.3.0
diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt
index 49f37936..92b5b969 100644
--- a/requirements_cpu_only.txt
+++ b/requirements_cpu_only.txt
@@ -2,7 +2,7 @@ accelerate==0.27.*
 colorama
 datasets
 einops
-gradio==4.19.*
+gradio==3.50.*
 hqq==0.1.5
 jinja2==3.1.2
 lm_eval==0.3.0
diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt
index 62b6721f..01964fde 100644
--- a/requirements_cpu_only_noavx2.txt
+++ b/requirements_cpu_only_noavx2.txt
@@ -2,7 +2,7 @@ accelerate==0.27.*
 colorama
 datasets
 einops
-gradio==4.19.*
+gradio==3.50.*
 hqq==0.1.5
 jinja2==3.1.2
 lm_eval==0.3.0
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index 8950071e..f9a62a22 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -2,7 +2,7 @@ accelerate==0.27.*
 colorama
 datasets
 einops
-gradio==4.19.*
+gradio==3.50.*
 hqq==0.1.5
 jinja2==3.1.2
 lm_eval==0.3.0
diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt
index 2f0a1022..7166015d 100644
--- a/requirements_nowheels.txt
+++ b/requirements_nowheels.txt
@@ -2,7 +2,7 @@ accelerate==0.27.*
 colorama
 datasets
 einops
-gradio==4.19.*
+gradio==3.50.*
 hqq==0.1.5
 jinja2==3.1.2
 lm_eval==0.3.0
diff --git a/server.py b/server.py
index c6a01830..681fe4e7 100644
--- a/server.py
+++ b/server.py
@@ -18,7 +18,6 @@ warnings.filterwarnings('ignore', category=UserWarning, message='The value passe
 warnings.filterwarnings('ignore', category=UserWarning, message='Field "model_names" has conflict')
 
 with RequestBlocker():
-    from modules import gradio_hijack
     import gradio as gr
 
 import matplotlib
@@ -146,9 +145,11 @@ def create_interface():
         ui_model_menu.create_event_handlers()
 
         # Interface launch events
-        shared.gradio['interface'].load(lambda: None, None, None, js=f"() => {{if ({str(shared.settings['dark_theme']).lower()}) {{ document.getElementsByTagName('body')[0].classList.add('dark'); }} }}")
-        shared.gradio['interface'].load(lambda: None, None, None, js=f"() => {{{js}}}")
-        shared.gradio['interface'].load(lambda x: None, gradio('show_controls'), None, js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}')
+        if shared.settings['dark_theme']:
+            shared.gradio['interface'].load(lambda: None, None, None, _js="() => document.getElementsByTagName('body')[0].classList.add('dark')")
+
+        shared.gradio['interface'].load(lambda: None, None, None, _js=f"() => {{{js}}}")
+        shared.gradio['interface'].load(None, gradio('show_controls'), None, _js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}')
         shared.gradio['interface'].load(partial(ui.apply_interface_values, {}, use_persistent=True), None, gradio(ui.list_interface_input_elements()), show_progress=False)
         shared.gradio['interface'].load(chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display'))
 
@@ -156,10 +157,9 @@ def create_interface():
         extensions_module.create_extensions_block()  # Extensions block
 
     # Launch the interface
-    shared.gradio['interface'].queue()
+    shared.gradio['interface'].queue(concurrency_count=64)
     with OpenMonkeyPatch():
         shared.gradio['interface'].launch(
-            max_threads=64,
             prevent_thread_lock=True,
             share=shared.args.share,
             server_name=None if not shared.args.listen else (shared.args.listen_host or '0.0.0.0'),
@@ -168,8 +168,7 @@ def create_interface():
             auth=auth or None,
             ssl_verify=False if (shared.args.ssl_keyfile or shared.args.ssl_certfile) else True,
             ssl_keyfile=shared.args.ssl_keyfile,
-            ssl_certfile=shared.args.ssl_certfile,
-            allowed_paths=["."]
+            ssl_certfile=shared.args.ssl_certfile
         )
 
 
diff --git a/settings-template.yaml b/settings-template.yaml
index 095f25ec..87101116 100644
--- a/settings-template.yaml
+++ b/settings-template.yaml
@@ -15,6 +15,7 @@ truncation_length: 2048
 truncation_length_min: 0
 truncation_length_max: 200000
 max_tokens_second: 0
+max_updates_second: 0
 prompt_lookup_num_tokens: 0
 custom_stopping_strings: ''
 custom_token_bans: ''

From fcc92caa309ed6d39af398926aba58164dfb439d Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 6 Mar 2024 07:36:23 -0800
Subject: [PATCH 25/25] Installer: add option to install requirements for just
 one extension

---
 one_click.py | 65 +++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 46 insertions(+), 19 deletions(-)

diff --git a/one_click.py b/one_click.py
index e2f8c485..c64ac460 100644
--- a/one_click.py
+++ b/one_click.py
@@ -200,6 +200,16 @@ def run_cmd(cmd, assert_success=False, environment=False, capture_output=False,
     return result
 
 
+def generate_alphabetic_sequence(index):
+    result = ''
+    while index >= 0:
+        index, remainder = divmod(index, 26)
+        result = chr(ord('A') + remainder) + result
+        index -= 1
+
+    return result
+
+
 def get_user_choice(question, options_dict):
     print()
     print(question)
@@ -308,9 +318,13 @@ def install_webui():
     update_requirements(initial_installation=True)
 
 
+def get_extensions_names():
+    return [foldername for foldername in os.listdir('extensions') if os.path.isfile(os.path.join('extensions', foldername, 'requirements.txt'))]
+
+
 def install_extensions_requirements():
     print_big_message("Installing extensions requirements.\nSome of these may fail on Windows.\nDon\'t worry if you see error messages, as they will not affect the main program.")
-    extensions = [foldername for foldername in os.listdir('extensions') if os.path.isfile(os.path.join('extensions', foldername, 'requirements.txt'))]
+    extensions = get_extensions_names()
     for i, extension in enumerate(extensions):
         print(f"\n\n--- [{i+1}/{len(extensions)}]: {extension}\n\n")
         extension_req_path = os.path.join("extensions", extension, "requirements.txt")
@@ -414,25 +428,38 @@ if __name__ == "__main__":
     args, _ = parser.parse_known_args()
 
     if args.update_wizard:
-        choice = get_user_choice(
-            "What would you like to do?",
-            {
-                'A': 'Update the web UI',
-                'B': 'Install/update extensions requirements',
-                'C': 'Revert local changes to repository files with \"git reset --hard\"',
-                'N': 'Nothing (exit)'
-            },
-        )
+        while True:
+            choice = get_user_choice(
+                "What would you like to do?",
+                {
+                    'A': 'Update the web UI',
+                    'B': 'Install/update extensions requirements',
+                    'C': 'Revert local changes to repository files with \"git reset --hard\"',
+                    'N': 'Nothing (exit)'
+                },
+            )
 
-        if choice == 'A':
-            update_requirements()
-        elif choice == 'B':
-            install_extensions_requirements()
-            update_requirements(pull=False)
-        elif choice == 'C':
-            run_cmd("git reset --hard", assert_success=True, environment=True)
-        elif choice == 'N':
-            sys.exit()
+            if choice == 'A':
+                update_requirements()
+            elif choice == 'B':
+                choices = {'A': 'All extensions'}
+                for i, name in enumerate(get_extensions_names()):
+                    key = generate_alphabetic_sequence(i + 1)
+                    choices[key] = name
+
+                choice = get_user_choice("What extension?", choices)
+
+                if choice == 'A':
+                    install_extensions_requirements()
+                else:
+                    extension_req_path = os.path.join("extensions", choices[choice], "requirements.txt")
+                    run_cmd(f"python -m pip install -r {extension_req_path} --upgrade", assert_success=False, environment=True)
+
+                update_requirements(pull=False)
+            elif choice == 'C':
+                run_cmd("git reset --hard", assert_success=True, environment=True)
+            elif choice == 'N':
+                sys.exit()
     else:
         if not is_installed():
             install_webui()