From c32f06d62f52dc59f57203a8af088201b5891029 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 17 Jan 2025 07:03:22 -0800
Subject: [PATCH 01/29] Update README
---
.github/FUNDING.yml | 3 +--
README.md | 6 +++++-
2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
index 7fd11138..e2e16212 100644
--- a/.github/FUNDING.yml
+++ b/.github/FUNDING.yml
@@ -1,2 +1 @@
-github: oobabooga
-ko_fi: oobabooga
+patreon: oobabooga
diff --git a/README.md b/README.md
index 7e2dec95..4b22f8d5 100644
--- a/README.md
+++ b/README.md
@@ -359,7 +359,7 @@ text-generation-webui
└── llama-2-13b-chat.Q4_K_M.gguf
```
-* The remaining model types (like 16-bit transformers models and GPTQ models) are made of several files and must be placed in a subfolder. Example:
+* The remaining model types (like 16-bit Transformers models and EXL2 models) are made of several files and must be placed in a subfolder. Example:
```
text-generation-webui
@@ -400,3 +400,7 @@ https://colab.research.google.com/github/oobabooga/text-generation-webui/blob/ma
## Acknowledgment
In August 2023, [Andreessen Horowitz](https://a16z.com/) (a16z) provided a generous grant to encourage and support my independent work on this project. I am **extremely** grateful for their trust and recognition.
+
+## ⭐ Featured Patreon Supporters
+
+* [Become the first one!](https://www.patreon.com/oobabooga)
From f8a5b0bc43ee7dcdb9c3a43f78ddf462daaac59e Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 20 Jan 2025 17:41:03 -0300
Subject: [PATCH 02/29] Update accelerate requirement from ==1.2.* to ==1.3.*
(#6683)
---
requirements.txt | 2 +-
requirements_amd.txt | 2 +-
requirements_amd_noavx2.txt | 2 +-
requirements_apple_intel.txt | 2 +-
requirements_apple_silicon.txt | 2 +-
requirements_cpu_only.txt | 2 +-
requirements_cpu_only_noavx2.txt | 2 +-
requirements_noavx2.txt | 2 +-
requirements_nowheels.txt | 2 +-
9 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/requirements.txt b/requirements.txt
index 18872431..cb4a93ee 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-accelerate==1.2.*
+accelerate==1.3.*
bitsandbytes==0.45.*
colorama
datasets
diff --git a/requirements_amd.txt b/requirements_amd.txt
index 87ee93d1..13616a92 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -1,4 +1,4 @@
-accelerate==1.2.*
+accelerate==1.3.*
colorama
datasets
einops
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index fa2f5ca7..b1fa3957 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -1,4 +1,4 @@
-accelerate==1.2.*
+accelerate==1.3.*
colorama
datasets
einops
diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt
index e9838295..5c62e0b7 100644
--- a/requirements_apple_intel.txt
+++ b/requirements_apple_intel.txt
@@ -1,4 +1,4 @@
-accelerate==1.2.*
+accelerate==1.3.*
colorama
datasets
einops
diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt
index bef02feb..93ead215 100644
--- a/requirements_apple_silicon.txt
+++ b/requirements_apple_silicon.txt
@@ -1,4 +1,4 @@
-accelerate==1.2.*
+accelerate==1.3.*
colorama
datasets
einops
diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt
index 32f1a50a..f0db2016 100644
--- a/requirements_cpu_only.txt
+++ b/requirements_cpu_only.txt
@@ -1,4 +1,4 @@
-accelerate==1.2.*
+accelerate==1.3.*
colorama
datasets
einops
diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt
index 938848bf..80d0f039 100644
--- a/requirements_cpu_only_noavx2.txt
+++ b/requirements_cpu_only_noavx2.txt
@@ -1,4 +1,4 @@
-accelerate==1.2.*
+accelerate==1.3.*
colorama
datasets
einops
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index 69e497e0..b3a1423b 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -1,4 +1,4 @@
-accelerate==1.2.*
+accelerate==1.3.*
bitsandbytes==0.45.*
colorama
datasets
diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt
index a034ee61..3d6c922f 100644
--- a/requirements_nowheels.txt
+++ b/requirements_nowheels.txt
@@ -1,4 +1,4 @@
-accelerate==1.2.*
+accelerate==1.3.*
colorama
datasets
einops
From ecb5d3c48545a9d3ad41cd34bd77767e93f6ed3b Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 21 Jan 2025 08:35:35 -0800
Subject: [PATCH 03/29] Installer: do not redownload wheels for each update
---
one_click.py | 67 +++++++++++++++++++++++++++++-----------------------
1 file changed, 38 insertions(+), 29 deletions(-)
diff --git a/one_click.py b/one_click.py
index e78a2450..4910f8c7 100644
--- a/one_click.py
+++ b/one_click.py
@@ -101,7 +101,7 @@ def torch_version():
def update_pytorch():
- print_big_message("Checking for PyTorch updates")
+ print_big_message("Checking for PyTorch updates.")
torver = torch_version()
is_cuda = '+cu' in torver
@@ -343,6 +343,31 @@ def update_requirements(initial_installation=False, pull=True):
git_creation_cmd = 'git init -b main && git remote add origin https://github.com/oobabooga/text-generation-webui && git fetch && git symbolic-ref refs/remotes/origin/HEAD refs/remotes/origin/main && git reset --hard origin/main && git branch --set-upstream-to=origin/main'
run_cmd(git_creation_cmd, environment=True, assert_success=True)
+ # Detect the requirements file from the PyTorch version
+ torver = torch_version()
+ is_cuda = '+cu' in torver
+ is_cuda118 = '+cu118' in torver # 2.1.0+cu118
+ is_rocm = '+rocm' in torver # 2.0.1+rocm5.4.2
+ is_intel = '+cxx11' in torver # 2.0.1a0+cxx11.abi
+ is_cpu = '+cpu' in torver # 2.0.1+cpu
+
+ if is_rocm:
+ base_requirements = "requirements_amd" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt"
+ elif is_cpu or is_intel:
+ base_requirements = "requirements_cpu_only" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt"
+ elif is_macos():
+ base_requirements = "requirements_apple_" + ("intel" if is_x86_64() else "silicon") + ".txt"
+ else:
+ base_requirements = "requirements" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt"
+
+ requirements_file = base_requirements
+
+ # Call git pull
+ before_pull_whl_lines = []
+ if os.path.exists(requirements_file):
+ with open(requirements_file, 'r') as f:
+ before_pull_whl_lines = [line for line in f if '.whl' in line]
+
if pull:
print_big_message("Updating the local copy of the repository with \"git pull\"")
@@ -362,6 +387,11 @@ def update_requirements(initial_installation=False, pull=True):
print_big_message(f"File '{file_name}' was updated during 'git pull'. Please run the script again.")
exit(1)
+ after_pull_whl_lines = []
+ if os.path.exists(requirements_file):
+ with open(requirements_file, 'r') as f:
+ after_pull_whl_lines = [line for line in f if '.whl' in line]
+
if os.environ.get("INSTALL_EXTENSIONS", "").lower() in ("yes", "y", "true", "1", "t", "on"):
install_extensions_requirements()
@@ -369,30 +399,16 @@ def update_requirements(initial_installation=False, pull=True):
if not initial_installation:
update_pytorch()
- # Detect the PyTorch version
- torver = torch_version()
- is_cuda = '+cu' in torver
- is_cuda118 = '+cu118' in torver # 2.1.0+cu118
- is_rocm = '+rocm' in torver # 2.0.1+rocm5.4.2
- is_intel = '+cxx11' in torver # 2.0.1a0+cxx11.abi
- is_cpu = '+cpu' in torver # 2.0.1+cpu
-
- if is_rocm:
- base_requirements = "requirements_amd" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt"
- elif is_cpu or is_intel:
- base_requirements = "requirements_cpu_only" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt"
- elif is_macos():
- base_requirements = "requirements_apple_" + ("intel" if is_x86_64() else "silicon") + ".txt"
- else:
- base_requirements = "requirements" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt"
-
- requirements_file = base_requirements
-
print_big_message(f"Installing webui requirements from file: {requirements_file}")
print(f"TORCH: {torver}\n")
# Prepare the requirements file
textgen_requirements = open(requirements_file).read().splitlines()
+
+ whl_changed = before_pull_whl_lines != after_pull_whl_lines
+ if not initial_installation and not whl_changed:
+ textgen_requirements = [line for line in textgen_requirements if not '.whl' in line]
+
if is_cuda118:
textgen_requirements = [
req.replace('+cu121', '+cu118').replace('+cu122', '+cu118')
@@ -416,16 +432,9 @@ def update_requirements(initial_installation=False, pull=True):
# Install/update the project requirements
run_cmd("python -m pip install -r temp_requirements.txt --upgrade", assert_success=True, environment=True)
+
+ # Clean up
os.remove('temp_requirements.txt')
-
- # Check for '+cu' or '+rocm' in version string to determine if torch uses CUDA or ROCm. Check for pytorch-cuda as well for backwards compatibility
- if not any((is_cuda, is_rocm)) and run_cmd("conda list -f pytorch-cuda | grep pytorch-cuda", environment=True, capture_output=True).returncode == 1:
- clear_cache()
- return
-
- if not os.path.exists("repositories/"):
- os.mkdir("repositories")
-
clear_cache()
From 5e99dded4e9bae4501127c0bb905a6d27711722d Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 21 Jan 2025 09:05:44 -0800
Subject: [PATCH 04/29] UI: add "Continue" and "Remove" buttons below the last
chat message
---
css/main.css | 10 ++++++++++
js/global_scope_js.js | 8 ++++++++
modules/html_generator.py | 15 +++++++++++++--
3 files changed, 31 insertions(+), 2 deletions(-)
diff --git a/css/main.css b/css/main.css
index 1a7efe70..b10d1980 100644
--- a/css/main.css
+++ b/css/main.css
@@ -1259,6 +1259,16 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
left: 25px;
}
+.footer-button.footer-continue-button {
+ bottom: -23px;
+ left: 50px;
+}
+
+.footer-button.footer-remove-button {
+ bottom: -23px;
+ left: 75px;
+}
+
.message:hover .footer-button,
.user-message:hover .footer-button,
.assistant-message:hover .footer-button {
diff --git a/js/global_scope_js.js b/js/global_scope_js.js
index 6bf0f0e3..f308edb9 100644
--- a/js/global_scope_js.js
+++ b/js/global_scope_js.js
@@ -22,6 +22,14 @@ function regenerateClick() {
document.getElementById("Regenerate").click();
}
+function continueClick() {
+ document.getElementById("Continue").click();
+}
+
+function removeLastClick() {
+ document.getElementById("Remove-last").click();
+}
+
function handleMorphdomUpdate(text) {
morphdom(
document.getElementById("chat").parentNode,
diff --git a/modules/html_generator.py b/modules/html_generator.py
index 29973412..c836f663 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -304,8 +304,13 @@ def get_image_cache(path):
copy_svg = ''''''
refresh_svg = ''''''
-copy_button = f''
-refresh_button = f''
+continue_svg = ''''''
+remove_svg = ''''''
+
+copy_button = f''
+refresh_button = f''
+continue_button = f''
+remove_button = f''
def generate_instruct_html(history):
@@ -334,6 +339,8 @@ def generate_instruct_html(history):
f'
{converted_visible[1]}
'
f'{copy_button}'
f'{refresh_button if i == len(history["visible"]) - 1 else ""}'
+ f'{continue_button if i == len(history["visible"]) - 1 else ""}'
+ f'{remove_button if i == len(history["visible"]) - 1 else ""}'
f''
f''
)
@@ -383,6 +390,8 @@ def generate_cai_chat_html(history, name1, name2, style, character, reset_cache=
f'{converted_visible[1]}
'
f'{copy_button}'
f'{refresh_button if i == len(history["visible"]) - 1 else ""}'
+ f'{continue_button if i == len(history["visible"]) - 1 else ""}'
+ f'{remove_button if i == len(history["visible"]) - 1 else ""}'
f''
f''
)
@@ -417,6 +426,8 @@ def generate_chat_html(history, name1, name2, reset_cache=False):
f'{converted_visible[1]}
'
f'{copy_button}'
f'{refresh_button if i == len(history["visible"]) - 1 else ""}'
+ f'{continue_button if i == len(history["visible"]) - 1 else ""}'
+ f'{remove_button if i == len(history["visible"]) - 1 else ""}'
f''
f''
)
From 2bf8788c3036f4e83677f41c225af4fa868d9b7a Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 21 Jan 2025 09:31:06 -0800
Subject: [PATCH 05/29] Installer: Fix a bug after
ecb5d3c48545a9d3ad41cd34bd77767e93f6ed3b
---
.gitignore | 1 +
one_click.py | 34 +++++++++++++++++++++++-----------
2 files changed, 24 insertions(+), 11 deletions(-)
diff --git a/.gitignore b/.gitignore
index ca307c4a..7d1099b6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,6 +32,7 @@ venv
.direnv
.vs
.vscode
+.wheels_changed_flag
*.bak
*.ipynb
*.log
diff --git a/one_click.py b/one_click.py
index 4910f8c7..04a488a0 100644
--- a/one_click.py
+++ b/one_click.py
@@ -362,13 +362,17 @@ def update_requirements(initial_installation=False, pull=True):
requirements_file = base_requirements
- # Call git pull
- before_pull_whl_lines = []
- if os.path.exists(requirements_file):
- with open(requirements_file, 'r') as f:
- before_pull_whl_lines = [line for line in f if '.whl' in line]
+ # Call git pull, while checking if .whl requirements have changed
+ wheels_changed_from_flag = False
+ if os.path.exists('.wheels_changed_flag'):
+ os.remove('.wheels_changed_flag')
+ wheels_changed_from_flag = True
if pull:
+ if os.path.exists(requirements_file):
+ with open(requirements_file, 'r') as f:
+ before_pull_whl_lines = [line for line in f if '.whl' in line]
+
print_big_message("Updating the local copy of the repository with \"git pull\"")
files_to_check = [
@@ -381,16 +385,25 @@ def update_requirements(initial_installation=False, pull=True):
run_cmd("git pull --autostash", assert_success=True, environment=True)
after_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check}
+ if os.path.exists(requirements_file):
+ with open(requirements_file, 'r') as f:
+ after_pull_whl_lines = [line for line in f if '.whl' in line]
+
# Check for differences in installation file hashes
for file_name in files_to_check:
if before_pull_hashes[file_name] != after_pull_hashes[file_name]:
print_big_message(f"File '{file_name}' was updated during 'git pull'. Please run the script again.")
+
+ # Check if wheels changed during this pull
+ wheels_changed = before_pull_whl_lines != after_pull_whl_lines
+ if wheels_changed:
+ open('.wheels_changed_flag', 'w').close()
+
exit(1)
- after_pull_whl_lines = []
- if os.path.exists(requirements_file):
- with open(requirements_file, 'r') as f:
- after_pull_whl_lines = [line for line in f if '.whl' in line]
+ wheels_changed = wheels_changed_from_flag
+ if pull:
+ wheels_changed = wheels_changed or (before_pull_whl_lines != after_pull_whl_lines)
if os.environ.get("INSTALL_EXTENSIONS", "").lower() in ("yes", "y", "true", "1", "t", "on"):
install_extensions_requirements()
@@ -405,8 +418,7 @@ def update_requirements(initial_installation=False, pull=True):
# Prepare the requirements file
textgen_requirements = open(requirements_file).read().splitlines()
- whl_changed = before_pull_whl_lines != after_pull_whl_lines
- if not initial_installation and not whl_changed:
+ if not initial_installation and not wheels_changed:
textgen_requirements = [line for line in textgen_requirements if not '.whl' in line]
if is_cuda118:
From ff250dd800e122e3d40a6f0182abafea9bf7da83 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 21 Jan 2025 09:58:13 -0800
Subject: [PATCH 06/29] Installer: simplify the script
---
one_click.py | 107 ++++++++++++++++++++++-----------------------------
1 file changed, 47 insertions(+), 60 deletions(-)
diff --git a/one_click.py b/one_click.py
index 04a488a0..b0a77b72 100644
--- a/one_click.py
+++ b/one_click.py
@@ -102,31 +102,24 @@ def torch_version():
def update_pytorch():
print_big_message("Checking for PyTorch updates.")
-
torver = torch_version()
- is_cuda = '+cu' in torver
- is_cuda118 = '+cu118' in torver # 2.1.0+cu118
- is_rocm = '+rocm' in torver # 2.0.1+rocm5.4.2
- is_intel = '+cxx11' in torver # 2.0.1a0+cxx11.abi
- is_cpu = '+cpu' in torver # 2.0.1+cpu
+ base_cmd = f"python -m pip install --upgrade torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION}"
- install_pytorch = f"python -m pip install --upgrade torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION} "
+ if "+cu118" in torver:
+ install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/cu118"
+ elif "+cu" in torver:
+ install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/cu121"
+ elif "+rocm" in torver:
+ install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/rocm6.1"
+ elif "+cpu" in torver:
+ install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/cpu"
+ elif "+cxx11" in torver:
+ intel_extension = "intel-extension-for-pytorch==2.1.10+xpu" if is_linux() else "intel-extension-for-pytorch==2.1.10"
+ install_cmd = f"{base_cmd} {intel_extension} --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
+ else:
+ install_cmd = base_cmd
- if is_cuda118:
- install_pytorch += "--index-url https://download.pytorch.org/whl/cu118"
- elif is_cuda:
- install_pytorch += "--index-url https://download.pytorch.org/whl/cu121"
- elif is_rocm:
- install_pytorch += "--index-url https://download.pytorch.org/whl/rocm6.1"
- elif is_cpu:
- install_pytorch += "--index-url https://download.pytorch.org/whl/cpu"
- elif is_intel:
- if is_linux():
- install_pytorch = "python -m pip install --upgrade torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
- else:
- install_pytorch = "python -m pip install --upgrade torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
-
- run_cmd(f"{install_pytorch}", assert_success=True, environment=True)
+ run_cmd(install_cmd, assert_success=True, environment=True)
def is_installed():
@@ -340,69 +333,63 @@ def install_extensions_requirements():
def update_requirements(initial_installation=False, pull=True):
# Create .git directory if missing
if not os.path.exists(os.path.join(script_dir, ".git")):
- git_creation_cmd = 'git init -b main && git remote add origin https://github.com/oobabooga/text-generation-webui && git fetch && git symbolic-ref refs/remotes/origin/HEAD refs/remotes/origin/main && git reset --hard origin/main && git branch --set-upstream-to=origin/main'
- run_cmd(git_creation_cmd, environment=True, assert_success=True)
+ run_cmd(
+ "git init -b main && git remote add origin https://github.com/oobabooga/text-generation-webui && "
+ "git fetch && git symbolic-ref refs/remotes/origin/HEAD refs/remotes/origin/main && "
+ "git reset --hard origin/main && git branch --set-upstream-to=origin/main",
+ environment=True,
+ assert_success=True
+ )
- # Detect the requirements file from the PyTorch version
torver = torch_version()
- is_cuda = '+cu' in torver
- is_cuda118 = '+cu118' in torver # 2.1.0+cu118
- is_rocm = '+rocm' in torver # 2.0.1+rocm5.4.2
- is_intel = '+cxx11' in torver # 2.0.1a0+cxx11.abi
- is_cpu = '+cpu' in torver # 2.0.1+cpu
-
- if is_rocm:
- base_requirements = "requirements_amd" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt"
- elif is_cpu or is_intel:
- base_requirements = "requirements_cpu_only" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt"
+ if "+rocm" in torver:
+ requirements_file = "requirements_amd" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt"
+ elif "+cpu" in torver or "+cxx11" in torver:
+ requirements_file = "requirements_cpu_only" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt"
elif is_macos():
- base_requirements = "requirements_apple_" + ("intel" if is_x86_64() else "silicon") + ".txt"
+ requirements_file = "requirements_apple_" + ("intel" if is_x86_64() else "silicon") + ".txt"
else:
- base_requirements = "requirements" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt"
+ requirements_file = "requirements" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt"
- requirements_file = base_requirements
-
- # Call git pull, while checking if .whl requirements have changed
- wheels_changed_from_flag = False
- if os.path.exists('.wheels_changed_flag'):
+ # Check and clear the wheels changed flag
+ wheels_changed = os.path.exists('.wheels_changed_flag')
+ if wheels_changed:
os.remove('.wheels_changed_flag')
- wheels_changed_from_flag = True
if pull:
+ # Read .whl lines before pulling
+ before_pull_whl_lines = []
if os.path.exists(requirements_file):
with open(requirements_file, 'r') as f:
before_pull_whl_lines = [line for line in f if '.whl' in line]
- print_big_message("Updating the local copy of the repository with \"git pull\"")
+ print_big_message('Updating the local copy of the repository with "git pull"')
+ # Hash files before pulling
files_to_check = [
'start_linux.sh', 'start_macos.sh', 'start_windows.bat', 'start_wsl.bat',
'update_wizard_linux.sh', 'update_wizard_macos.sh', 'update_wizard_windows.bat', 'update_wizard_wsl.bat',
'one_click.py'
]
+ before_hashes = {file: calculate_file_hash(file) for file in files_to_check}
- before_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check}
+ # Perform the git pull
run_cmd("git pull --autostash", assert_success=True, environment=True)
- after_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check}
+ # Check hashes after pulling
+ after_hashes = {file: calculate_file_hash(file) for file in files_to_check}
if os.path.exists(requirements_file):
with open(requirements_file, 'r') as f:
after_pull_whl_lines = [line for line in f if '.whl' in line]
- # Check for differences in installation file hashes
- for file_name in files_to_check:
- if before_pull_hashes[file_name] != after_pull_hashes[file_name]:
- print_big_message(f"File '{file_name}' was updated during 'git pull'. Please run the script again.")
-
- # Check if wheels changed during this pull
- wheels_changed = before_pull_whl_lines != after_pull_whl_lines
- if wheels_changed:
+ # Check for changes
+ for file in files_to_check:
+ if before_hashes[file] != after_hashes[file]:
+ print_big_message(f"File '{file}' was updated during 'git pull'. Please run the script again.")
+ if before_pull_whl_lines != after_pull_whl_lines:
open('.wheels_changed_flag', 'w').close()
-
exit(1)
- wheels_changed = wheels_changed_from_flag
- if pull:
wheels_changed = wheels_changed or (before_pull_whl_lines != after_pull_whl_lines)
if os.environ.get("INSTALL_EXTENSIONS", "").lower() in ("yes", "y", "true", "1", "t", "on"):
@@ -419,16 +406,16 @@ def update_requirements(initial_installation=False, pull=True):
textgen_requirements = open(requirements_file).read().splitlines()
if not initial_installation and not wheels_changed:
- textgen_requirements = [line for line in textgen_requirements if not '.whl' in line]
+ textgen_requirements = [line for line in textgen_requirements if '.whl' not in line]
- if is_cuda118:
+ if "+cu118" in torver:
textgen_requirements = [
req.replace('+cu121', '+cu118').replace('+cu122', '+cu118')
for req in textgen_requirements
if "autoawq" not in req.lower()
]
- if is_windows() and is_cuda118: # No flash-attention on Windows for CUDA 11
+ if is_windows() and "+cu118" in torver: # No flash-attention on Windows for CUDA 11
textgen_requirements = [req for req in textgen_requirements if 'oobabooga/flash-attention' not in req]
with open('temp_requirements.txt', 'w') as file:
From 41f4fee085a08ab67d2e9d6afe6b570a43fb8e32 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 21 Jan 2025 10:01:52 -0800
Subject: [PATCH 07/29] Lint
---
one_click.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/one_click.py b/one_click.py
index b0a77b72..54e29501 100644
--- a/one_click.py
+++ b/one_click.py
@@ -388,6 +388,7 @@ def update_requirements(initial_installation=False, pull=True):
print_big_message(f"File '{file}' was updated during 'git pull'. Please run the script again.")
if before_pull_whl_lines != after_pull_whl_lines:
open('.wheels_changed_flag', 'w').close()
+
exit(1)
wheels_changed = wheels_changed or (before_pull_whl_lines != after_pull_whl_lines)
From 079ace63ec1462bd8402a23ce29c67323c9a5e9f Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 21 Jan 2025 10:14:05 -0800
Subject: [PATCH 08/29] Installer: minor change
---
start_windows.bat | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/start_windows.bat b/start_windows.bat
index 960cfdb7..2e42d6fa 100755
--- a/start_windows.bat
+++ b/start_windows.bat
@@ -41,12 +41,12 @@ if "%conda_exists%" == "F" (
mkdir "%INSTALL_DIR%"
call curl -Lk "%MINICONDA_DOWNLOAD_URL%" > "%INSTALL_DIR%\miniconda_installer.exe" || ( echo. && echo Miniconda failed to download. && goto end )
- :: Try CertUtil first
+ @rem Try CertUtil first
for /f %%a in ('CertUtil -hashfile "%INSTALL_DIR%\miniconda_installer.exe" SHA256 ^| find /i /v " " ^| find /i "%MINICONDA_CHECKSUM%"') do (
set "output=%%a"
)
- :: If CertUtil fails, try PowerShell
+ @rem If CertUtil fails, try PowerShell
if not defined output (
for /f %%a in ('powershell -Command "if((Get-FileHash \"%INSTALL_DIR%\miniconda_installer.exe\" -Algorithm SHA256).Hash -eq ''%MINICONDA_CHECKSUM%''){echo true}"') do (
set "output=%%a"
From 39799adc4739c769e057ce253d31dbd08b0695c6 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 21 Jan 2025 11:49:44 -0800
Subject: [PATCH 09/29] Add a helpful error message when llama.cpp fails to
load the model
---
modules/llamacpp_hf.py | 14 +++++++++++++-
modules/llamacpp_model.py | 14 +++++++++++++-
2 files changed, 26 insertions(+), 2 deletions(-)
diff --git a/modules/llamacpp_hf.py b/modules/llamacpp_hf.py
index f9964fe8..b3761e0f 100644
--- a/modules/llamacpp_hf.py
+++ b/modules/llamacpp_hf.py
@@ -202,7 +202,19 @@ class LlamacppHF(PreTrainedModel):
params["type_v"] = get_llamacpp_cache_type_for_string(shared.args.cache_type)
Llama = llama_cpp_lib().Llama
- model = Llama(**params)
+ try:
+ model = Llama(**params)
+ except Exception as e:
+ error_message = (
+ f"Failed loading the model. **This usually happens due to lack of memory**. Try these steps:\n"
+ f"1. Reduce the context length `n_ctx` (currently {shared.args.n_ctx})."
+ f"{' Try a lower value like 4096.' if shared.args.n_ctx > 4096 else '.'}"
+ "\n"
+ f"2. Lower the `n-gpu-layers` value (currently {shared.args.n_gpu_layers})."
+ )
+
+ raise type(e)(error_message) from e
+
model.last_updated_index = -1
return LlamacppHF(model, model_file)
diff --git a/modules/llamacpp_model.py b/modules/llamacpp_model.py
index c79755e4..db25c66c 100644
--- a/modules/llamacpp_model.py
+++ b/modules/llamacpp_model.py
@@ -108,7 +108,19 @@ class LlamaCppModel:
params["type_k"] = get_llamacpp_cache_type_for_string(shared.args.cache_type)
params["type_v"] = get_llamacpp_cache_type_for_string(shared.args.cache_type)
- result.model = Llama(**params)
+ try:
+ result.model = Llama(**params)
+ except Exception as e:
+ error_message = (
+ f"Failed loading the model. **This usually happens due to lack of memory**. Try these steps:\n"
+ f"1. Reduce the context length `n_ctx` (currently {shared.args.n_ctx})."
+ f"{' Try a lower value like 4096.' if shared.args.n_ctx > 4096 else '.'}"
+ "\n"
+ f"2. Lower the `n-gpu-layers` value (currently {shared.args.n_gpu_layers})."
+ )
+
+ raise type(e)(error_message) from e
+
if cache_capacity > 0:
result.model.set_cache(LlamaCache(capacity_bytes=cache_capacity))
From 4bd260c60d0c3b947b74021a9608c49bd18e5c09 Mon Sep 17 00:00:00 2001
From: FP HAM
Date: Wed, 22 Jan 2025 10:01:44 -0500
Subject: [PATCH 10/29] Give SillyTavern a bit of leaway the way the do OpenAI
(#6685)
---
extensions/openai/completions.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/extensions/openai/completions.py b/extensions/openai/completions.py
index 2cefc22b..0f1f26a8 100644
--- a/extensions/openai/completions.py
+++ b/extensions/openai/completions.py
@@ -205,7 +205,7 @@ def convert_history(history):
else:
chat_dialogue.append(['', current_reply])
elif role == "system":
- system_message = content
+ system_message += f"\n{content}" if system_message else content
if not user_input_last:
user_input = ""
From b76b7f6bf5dd2c5d14bf9efaaefb1585c2d48a58 Mon Sep 17 00:00:00 2001
From: Shay Molcho <152275799+shaymolcho@users.noreply.github.com>
Date: Wed, 22 Jan 2025 17:02:43 +0200
Subject: [PATCH 11/29] Minor README change (#6687)
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 4b22f8d5..407fdff0 100644
--- a/README.md
+++ b/README.md
@@ -380,7 +380,7 @@ text-generation-webui
│ │ └── tokenizer.model
```
-In both cases, you can use the "Model" tab of the UI to download the model from Hugging Face automatically. It is also possible to download it via the command-line with
+In both cases, you can use the "Model" tab of the UI to download the model from Hugging Face automatically. It is also possible to download it via the command-line with:
```
python download-model.py organization/model
From 7f8c1c1f073f5460633d01e2d201a4ad78fa329d Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 22 Jan 2025 08:45:17 -0800
Subject: [PATCH 12/29] Docs: update the API examples
---
docs/12 - OpenAI API.md | 30 +++++++++++++++++++++++-------
1 file changed, 23 insertions(+), 7 deletions(-)
diff --git a/docs/12 - OpenAI API.md b/docs/12 - OpenAI API.md
index 9b4f89bf..daabb43e 100644
--- a/docs/12 - OpenAI API.md
+++ b/docs/12 - OpenAI API.md
@@ -14,7 +14,7 @@ Add `--api` to your command-line flags.
* To create a public Cloudflare URL, add the `--public-api` flag.
* To listen on your local network, add the `--listen` flag.
* To change the port, which is 5000 by default, use `--api-port 1234` (change 1234 to your desired port number).
-* To use SSL, add `--ssl-keyfile key.pem --ssl-certfile cert.pem`. Note that it doesn't work with `--public-api`.
+* To use SSL, add `--ssl-keyfile key.pem --ssl-certfile cert.pem`. ⚠️ **Note**: this doesn't work with `--public-api` since Cloudflare already uses HTTPS by default.
* To use an API key for authentication, add `--api-key yourkey`.
### Examples
@@ -51,8 +51,7 @@ curl http://127.0.0.1:5000/v1/chat/completions \
"content": "Hello!"
}
],
- "mode": "instruct",
- "instruction_template": "Alpaca"
+ "mode": "instruct"
}'
```
@@ -86,7 +85,6 @@ curl http://127.0.0.1:5000/v1/chat/completions \
}
],
"mode": "instruct",
- "instruction_template": "Alpaca",
"stream": true
}'
```
@@ -131,9 +129,6 @@ curl -k http://127.0.0.1:5000/v1/internal/model/load \
"args": {
"load_in_4bit": true,
"n_gpu_layers": 12
- },
- "settings": {
- "instruction_template": "Alpaca"
}
}'
```
@@ -241,6 +236,27 @@ for event in client.events():
print()
```
+#### Python example with API key
+
+Replace
+
+```python
+headers = {
+ "Content-Type": "application/json"
+}
+```
+
+with
+
+```python
+headers = {
+ "Content-Type": "application/json",
+ "Authorization": "Bearer yourPassword123"
+}
+```
+
+in any of the examples above.
+
### Environment variables
The following environment variables can be used (they take precedence over everything else):
From 0485ff20e8b7abbd4fba24a5993eed3c11dfa946 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 23 Jan 2025 06:21:40 -0800
Subject: [PATCH 13/29] Workaround for convert_to_markdown bug
---
modules/html_generator.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/modules/html_generator.py b/modules/html_generator.py
index c836f663..3edbef5e 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -154,6 +154,8 @@ def add_long_list_class(html):
@functools.lru_cache(maxsize=None)
def convert_to_markdown(string):
+ if not string:
+ return ""
# Make \[ \] LaTeX equations inline
pattern = r'^\s*\\\[\s*\n([\s\S]*?)\n\s*\\\]\s*$'
From 5d6f3e6f923b59c8314c72c4d2b8c4c34eccdc5d Mon Sep 17 00:00:00 2001
From: FP HAM
Date: Fri, 24 Jan 2025 09:23:44 -0500
Subject: [PATCH 14/29] Training pro- removed monkeypatch references (#6695)
---
extensions/Training_PRO/script.py | 19 -------------------
1 file changed, 19 deletions(-)
diff --git a/extensions/Training_PRO/script.py b/extensions/Training_PRO/script.py
index 01bcf67d..f553e482 100644
--- a/extensions/Training_PRO/script.py
+++ b/extensions/Training_PRO/script.py
@@ -557,12 +557,6 @@ def calc_trainable_parameters(model):
def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch_size: int, batch_size: int, epochs: int, learning_rate: str, lr_scheduler_type: str, lora_rank: int, lora_alpha: int, lora_dropout: float, cutoff_len: int, dataset: str, eval_dataset: str, format: str, eval_steps: int, raw_text_file: str, higher_rank_limit: bool, warmup_steps: int, optimizer: str, hard_cut_string: str, train_only_after: str, stop_at_loss: float, add_eos_token: bool, min_chars: int, report_to: str, precize_slicing_overlap: bool, add_eos_token_type: str, save_steps_under_loss: float, add_bos_token: bool, training_projection: str,sliding_window:bool,warmup_ratio:float, grad_accumulation: int,neft_noise_alpha:float):
- if shared.args.monkey_patch:
- from alpaca_lora_4bit.monkeypatch.peft_tuners_lora_monkey_patch import (
- replace_peft_model_with_int4_lora_model
- )
- replace_peft_model_with_int4_lora_model()
-
global train_log_graph
global WANT_INTERRUPT
WANT_INTERRUPT = False
@@ -600,10 +594,6 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
time.sleep(5)
- if shared.args.loader == 'GPTQ-for-LLaMa' and not shared.args.monkey_patch:
- yield "LoRA training with GPTQ-for-LLaMa requires loading with `--monkey-patch`", zero_pd
- return
-
if cutoff_len <= 0 or micro_batch_size <= 0 or actual_lr <= 0 or lora_rank <= 0 or lora_alpha <= 0:
yield "Cannot input zeroes.", zero_pd
return
@@ -865,15 +855,6 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
yield traceback.format_exc().replace('\n', '\n\n'), zero_pd
return
- if shared.args.monkey_patch:
- from alpaca_lora_4bit.autograd_4bit import Autograd4bitQuantLinear
- from alpaca_lora_4bit.models import Linear4bitLt
- for _, m in lora_model.named_modules():
- if isinstance(m, Autograd4bitQuantLinear) or isinstance(m, Linear4bitLt):
- if m.is_v1_model:
- m.zeros = m.zeros.half()
- m.scales = m.scales.half()
-
class Tracked():
def __init__(self):
self.current_steps = 0
From 71a551a62247228729b86c912891fe80d8bf4e84 Mon Sep 17 00:00:00 2001
From: FP HAM
Date: Fri, 24 Jan 2025 09:37:20 -0500
Subject: [PATCH 15/29] Add strftime_now to JINJA to sattisfy LLAMA 3.1 and 3.2
(and granite) (#6692)
---
modules/chat.py | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/modules/chat.py b/modules/chat.py
index 694c137b..60ded0b0 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -30,9 +30,13 @@ from modules.text_generation import (
)
from modules.utils import delete_file, get_available_characters, save_file
-# Copied from the Transformers library
+def strftime_now(format):
+ return datetime.now().strftime(format)
+
jinja_env = ImmutableSandboxedEnvironment(trim_blocks=True, lstrip_blocks=True)
+jinja_env.globals["strftime_now"] = strftime_now
+
def str_presenter(dumper, data):
"""
From 3d4f3e423c28694d35fdc431e37028c4a201de38 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 25 Jan 2025 07:28:31 -0800
Subject: [PATCH 16/29] Downloader: Make progress bars not jump around
Adapted from: https://gist.github.com/NiklasBeierl/13096bfdd8b2084da8c1163dd06f91d3
---
download-model.py | 149 +++++++++++++++++++++++++++++-----------------
1 file changed, 95 insertions(+), 54 deletions(-)
diff --git a/download-model.py b/download-model.py
index 8fe94371..8ff1d69c 100644
--- a/download-model.py
+++ b/download-model.py
@@ -14,6 +14,7 @@ import json
import os
import re
import sys
+from multiprocessing import Array
from pathlib import Path
from time import sleep
@@ -27,9 +28,10 @@ base = os.environ.get("HF_ENDPOINT") or "https://huggingface.co"
class ModelDownloader:
- def __init__(self, max_retries=5):
+ def __init__(self, max_retries=7):
self.max_retries = max_retries
self.session = self.get_session()
+ self._progress_bar_slots = None
def get_session(self):
session = requests.Session()
@@ -186,73 +188,112 @@ class ModelDownloader:
output_folder = Path(base_folder) / output_folder
return output_folder
+ @property
+ def progress_bar_slots(self):
+ if self._progress_bar_slots is None:
+ raise RuntimeError("Progress bar slots not initialized. Start download threads first.")
+
+ return self._progress_bar_slots
+
+ def initialize_progress_bar_slots(self, num_threads):
+ self._progress_bar_slots = Array("B", [0] * num_threads)
+
+ def get_progress_bar_position(self):
+ with self.progress_bar_slots.get_lock():
+ for i in range(len(self.progress_bar_slots)):
+ if self.progress_bar_slots[i] == 0:
+ self.progress_bar_slots[i] = 1
+ return i
+
+ return 0 # fallback
+
+ def release_progress_bar_position(self, slot):
+ with self.progress_bar_slots.get_lock():
+ self.progress_bar_slots[slot] = 0
+
def get_single_file(self, url, output_folder, start_from_scratch=False):
filename = Path(url.rsplit('/', 1)[1])
output_path = output_folder / filename
+ progress_bar_position = self.get_progress_bar_position()
- max_retries = 7
+ max_retries = self.max_retries
attempt = 0
- while attempt < max_retries:
- attempt += 1
- session = self.session
- headers = {}
- mode = 'wb'
+ try:
+ while attempt < max_retries:
+ attempt += 1
+ session = self.session
+ headers = {}
+ mode = 'wb'
- try:
- if output_path.exists() and not start_from_scratch:
- # Resume download
- r = session.get(url, stream=True, timeout=20)
- total_size = int(r.headers.get('content-length', 0))
- if output_path.stat().st_size >= total_size:
- return
+ try:
+ if output_path.exists() and not start_from_scratch:
+ # Resume download
+ r = session.get(url, stream=True, timeout=20)
+ total_size = int(r.headers.get('content-length', 0))
+ if output_path.stat().st_size >= total_size:
+ return
- headers = {'Range': f'bytes={output_path.stat().st_size}-'}
- mode = 'ab'
+ headers = {'Range': f'bytes={output_path.stat().st_size}-'}
+ mode = 'ab'
- with session.get(url, stream=True, headers=headers, timeout=30) as r:
- r.raise_for_status() # If status is not 2xx, raise an error
- total_size = int(r.headers.get('content-length', 0))
- block_size = 1024 * 1024 # 1MB
+ with session.get(url, stream=True, headers=headers, timeout=30) as r:
+ r.raise_for_status() # If status is not 2xx, raise an error
+ total_size = int(r.headers.get('content-length', 0))
+ block_size = 1024 * 1024 # 1MB
- filename_str = str(filename) # Convert PosixPath to string if necessary
+ filename_str = str(filename) # Convert PosixPath to string if necessary
- tqdm_kwargs = {
- 'total': total_size,
- 'unit': 'B',
- 'unit_scale': True,
- 'unit_divisor': 1024,
- 'bar_format': '{desc}{percentage:3.0f}%|{bar:50}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]',
- 'desc': f"{filename_str}: "
- }
+ tqdm_kwargs = {
+ 'total': total_size,
+ 'unit': 'B',
+ 'unit_scale': True,
+ 'unit_divisor': 1024,
+ 'bar_format': '{desc}{percentage:3.0f}%|{bar:50}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]',
+ 'desc': f"{filename_str}: ",
+ 'position': progress_bar_position,
+ 'leave': False
+ }
- if 'COLAB_GPU' in os.environ:
- tqdm_kwargs.update({
- 'position': 0,
- 'leave': True
- })
+ if 'COLAB_GPU' in os.environ:
+ tqdm_kwargs.update({
+ 'position': 0,
+ 'leave': True
+ })
- with open(output_path, mode) as f:
- with tqdm.tqdm(**tqdm_kwargs) as t:
- count = 0
- for data in r.iter_content(block_size):
- f.write(data)
- t.update(len(data))
- if total_size != 0 and self.progress_bar is not None:
- count += len(data)
- self.progress_bar(float(count) / float(total_size), f"{filename_str}")
+ with open(output_path, mode) as f:
+ with tqdm.tqdm(**tqdm_kwargs) as t:
+ count = 0
+ for data in r.iter_content(block_size):
+ f.write(data)
+ t.update(len(data))
+ if total_size != 0 and self.progress_bar is not None:
+ count += len(data)
+ self.progress_bar(float(count) / float(total_size), f"{filename_str}")
- break # Exit loop if successful
- except (RequestException, ConnectionError, Timeout) as e:
- print(f"Error downloading {filename}: {e}.")
- print(f"That was attempt {attempt}/{max_retries}.", end=' ')
- if attempt < max_retries:
- print(f"Retry begins in {2 ** attempt} seconds.")
- sleep(2 ** attempt)
- else:
- print("Failed to download after the maximum number of attempts.")
+ break # Exit loop if successful
+ except (RequestException, ConnectionError, Timeout) as e:
+ print(f"Error downloading {filename}: {e}.")
+ print(f"That was attempt {attempt}/{max_retries}.", end=' ')
+ if attempt < max_retries:
+ print(f"Retry begins in {2 ** attempt} seconds.")
+ sleep(2 ** attempt)
+ else:
+ print("Failed to download after the maximum number of attempts.")
+ finally:
+ self.release_progress_bar_position(progress_bar_position)
def start_download_threads(self, file_list, output_folder, start_from_scratch=False, threads=4):
- thread_map(lambda url: self.get_single_file(url, output_folder, start_from_scratch=start_from_scratch), file_list, max_workers=threads, disable=True)
+ self.initialize_progress_bar_slots(threads)
+ tqdm.tqdm.set_lock(tqdm.tqdm.get_lock())
+ try:
+ thread_map(
+ lambda url: self.get_single_file(url, output_folder, start_from_scratch=start_from_scratch),
+ file_list,
+ max_workers=threads,
+ disable=True
+ )
+ finally:
+ print(f"\nDownload of {len(file_list)} files to {output_folder} completed.")
def download_model_files(self, model, branch, links, sha256, output_folder, progress_bar=None, start_from_scratch=False, threads=4, specific_file=None, is_llamacpp=False):
self.progress_bar = progress_bar
@@ -318,7 +359,7 @@ if __name__ == '__main__':
parser.add_argument('--model-dir', type=str, default=None, help='Save the model files to a subfolder of this folder instead of the default one (text-generation-webui/models).')
parser.add_argument('--clean', action='store_true', help='Does not resume the previous download.')
parser.add_argument('--check', action='store_true', help='Validates the checksums of model files.')
- parser.add_argument('--max-retries', type=int, default=5, help='Max retries count when get error in download time.')
+ parser.add_argument('--max-retries', type=int, default=7, help='Max retries count when get error in download time.')
args = parser.parse_args()
branch = args.branch
From 75ff3f381556f80050850c908fd3479cfb6da70d Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 25 Jan 2025 08:22:23 -0800
Subject: [PATCH 17/29] UI: Mention common context length values
---
modules/ui_model_menu.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py
index d5116938..1264a9fd 100644
--- a/modules/ui_model_menu.py
+++ b/modules/ui_model_menu.py
@@ -89,8 +89,8 @@ def create_ui():
shared.gradio['threads_batch'] = gr.Slider(label="threads_batch", minimum=0, step=1, maximum=256, value=shared.args.threads_batch)
shared.gradio['n_batch'] = gr.Slider(label="n_batch", minimum=1, maximum=2048, step=1, value=shared.args.n_batch)
shared.gradio['hqq_backend'] = gr.Dropdown(label="hqq_backend", choices=["PYTORCH", "PYTORCH_COMPILE", "ATEN"], value=shared.args.hqq_backend)
- shared.gradio['n_ctx'] = gr.Number(label="n_ctx", precision=0, step=256, value=shared.args.n_ctx, info='Context length. ⚠️ Lower this value if you can\'t load the model.')
- shared.gradio['max_seq_len'] = gr.Number(label='max_seq_len', precision=0, step=256, value=shared.args.max_seq_len, info='Context length. ⚠️ Lower this value if you can\'t load the model.')
+ shared.gradio['n_ctx'] = gr.Number(label="n_ctx", precision=0, step=256, value=shared.args.n_ctx, info='Context length. ⚠️ Lower this value if you can\'t load the model. Common values: 2048, 4096, 8192, 16384, 32768.')
+ shared.gradio['max_seq_len'] = gr.Number(label='max_seq_len', precision=0, step=256, value=shared.args.max_seq_len, info='Context length. ⚠️ Lower this value if you can\'t load the model. Common values: 2048, 4096, 8192, 16384, 32768.')
shared.gradio['cache_type'] = gr.Dropdown(label="cache_type", choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q6', 'q4'], value=shared.args.cache_type, info='Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4.')
shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 60,40')
shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7')
From c49251e95d27c210408080065dc16f6b0b0cd7a7 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 25 Jan 2025 15:03:09 -0800
Subject: [PATCH 18/29] Installer: change a message
---
one_click.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/one_click.py b/one_click.py
index 54e29501..9124e833 100644
--- a/one_click.py
+++ b/one_click.py
@@ -400,7 +400,7 @@ def update_requirements(initial_installation=False, pull=True):
if not initial_installation:
update_pytorch()
- print_big_message(f"Installing webui requirements from file: {requirements_file}")
+ print_big_message(f"Using requirements file: {requirements_file}")
print(f"TORCH: {torver}\n")
# Prepare the requirements file
From 87de91dd65e7f720b852d9ba1b75df3d457fa4f0 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 25 Jan 2025 18:29:11 -0800
Subject: [PATCH 19/29] Docs: fix an API example
---
docs/12 - OpenAI API.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/12 - OpenAI API.md b/docs/12 - OpenAI API.md
index daabb43e..364c6b09 100644
--- a/docs/12 - OpenAI API.md
+++ b/docs/12 - OpenAI API.md
@@ -193,7 +193,7 @@ while True:
assistant_message = ''
for event in client.events():
payload = json.loads(event.data)
- chunk = payload['choices'][0]['message']['content']
+ chunk = payload['choices'][0]['delta']['content']
assistant_message += chunk
print(chunk, end='')
From 1c9dfa871bd151bccd1ee50b6674f3b878ef4d25 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 26 Jan 2025 18:17:31 -0800
Subject: [PATCH 20/29] Revert "Installer: change a message"
This reverts commit c49251e95d27c210408080065dc16f6b0b0cd7a7.
---
one_click.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/one_click.py b/one_click.py
index 9124e833..54e29501 100644
--- a/one_click.py
+++ b/one_click.py
@@ -400,7 +400,7 @@ def update_requirements(initial_installation=False, pull=True):
if not initial_installation:
update_pytorch()
- print_big_message(f"Using requirements file: {requirements_file}")
+ print_big_message(f"Installing webui requirements from file: {requirements_file}")
print(f"TORCH: {torver}\n")
# Prepare the requirements file
From 053911b6294dae5547e385ef22e69fbd4ad3b57b Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 27 Jan 2025 09:07:39 -0800
Subject: [PATCH 21/29] Installer: don't ignore .whl requirements if the commit
has changed
By the user manually switching branches or calling git pull.
---
one_click.py | 45 ++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 40 insertions(+), 5 deletions(-)
diff --git a/one_click.py b/one_click.py
index 54e29501..e1b2be53 100644
--- a/one_click.py
+++ b/one_click.py
@@ -1,6 +1,7 @@
import argparse
import glob
import hashlib
+import json
import os
import platform
import re
@@ -148,6 +149,11 @@ def check_env():
sys.exit(1)
+def get_current_commit():
+ result = run_cmd("git rev-parse HEAD", capture_output=True, environment=True)
+ return result.stdout.decode('utf-8').strip()
+
+
def clear_cache():
run_cmd("conda clean -a -y", environment=True)
run_cmd("python -m pip cache purge", environment=True)
@@ -351,10 +357,21 @@ def update_requirements(initial_installation=False, pull=True):
else:
requirements_file = "requirements" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt"
- # Check and clear the wheels changed flag
- wheels_changed = os.path.exists('.wheels_changed_flag')
- if wheels_changed:
- os.remove('.wheels_changed_flag')
+ # Load state from JSON file
+ state_file = '.installer_state.json'
+ wheels_changed = False
+ if os.path.exists(state_file):
+ with open(state_file, 'r') as f:
+ last_state = json.load(f)
+
+ wheels_changed = last_state.get('wheels_changed', False)
+ else:
+ last_state = {}
+
+ # Check wheels changed from state file and commit differences
+ current_commit = get_current_commit()
+ if last_state.get('last_commit') != current_commit:
+ wheels_changed = True
if pull:
# Read .whl lines before pulling
@@ -387,12 +404,30 @@ def update_requirements(initial_installation=False, pull=True):
if before_hashes[file] != after_hashes[file]:
print_big_message(f"File '{file}' was updated during 'git pull'. Please run the script again.")
if before_pull_whl_lines != after_pull_whl_lines:
- open('.wheels_changed_flag', 'w').close()
+ wheels_changed = True
+
+ # Save state before exiting
+ current_state = {
+ 'last_commit': current_commit,
+ 'wheels_changed': wheels_changed
+ }
+
+ with open(state_file, 'w') as f:
+ json.dump(current_state, f)
exit(1)
wheels_changed = wheels_changed or (before_pull_whl_lines != after_pull_whl_lines)
+ # Save current state
+ current_state = {
+ 'last_commit': current_commit,
+ 'wheels_changed': wheels_changed
+ }
+
+ with open(state_file, 'w') as f:
+ json.dump(current_state, f)
+
if os.environ.get("INSTALL_EXTENSIONS", "").lower() in ("yes", "y", "true", "1", "t", "on"):
install_extensions_requirements()
From 340022d4b09a01ecd62799dcc3b0c65900896cbe Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 27 Jan 2025 10:02:21 -0800
Subject: [PATCH 22/29] Fix after previous commit
---
one_click.py | 23 +++++++++--------------
1 file changed, 9 insertions(+), 14 deletions(-)
diff --git a/one_click.py b/one_click.py
index e1b2be53..4d2a4571 100644
--- a/one_click.py
+++ b/one_click.py
@@ -359,20 +359,17 @@ def update_requirements(initial_installation=False, pull=True):
# Load state from JSON file
state_file = '.installer_state.json'
+ current_commit = get_current_commit()
wheels_changed = False
if os.path.exists(state_file):
with open(state_file, 'r') as f:
last_state = json.load(f)
- wheels_changed = last_state.get('wheels_changed', False)
+ if 'wheels_changed' in last_state or last_state.get('last_commit') != current_commit:
+ wheels_changed = True
else:
last_state = {}
- # Check wheels changed from state file and commit differences
- current_commit = get_current_commit()
- if last_state.get('last_commit') != current_commit:
- wheels_changed = True
-
if pull:
# Read .whl lines before pulling
before_pull_whl_lines = []
@@ -407,10 +404,9 @@ def update_requirements(initial_installation=False, pull=True):
wheels_changed = True
# Save state before exiting
- current_state = {
- 'last_commit': current_commit,
- 'wheels_changed': wheels_changed
- }
+ current_state = {'last_commit': current_commit}
+ if wheels_changed:
+ current_state['wheels_changed'] = True
with open(state_file, 'w') as f:
json.dump(current_state, f)
@@ -420,10 +416,9 @@ def update_requirements(initial_installation=False, pull=True):
wheels_changed = wheels_changed or (before_pull_whl_lines != after_pull_whl_lines)
# Save current state
- current_state = {
- 'last_commit': current_commit,
- 'wheels_changed': wheels_changed
- }
+ current_state = {'last_commit': current_commit}
+ if wheels_changed:
+ current_state['wheels_changed'] = True
with open(state_file, 'w') as f:
json.dump(current_state, f)
From bac652bb1d145b0151ecf8a4c0366fcfd38a1e91 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 27 Jan 2025 10:22:36 -0800
Subject: [PATCH 23/29] Another fix
---
one_click.py | 17 ++++++-----------
1 file changed, 6 insertions(+), 11 deletions(-)
diff --git a/one_click.py b/one_click.py
index 4d2a4571..d0f54582 100644
--- a/one_click.py
+++ b/one_click.py
@@ -365,7 +365,7 @@ def update_requirements(initial_installation=False, pull=True):
with open(state_file, 'r') as f:
last_state = json.load(f)
- if 'wheels_changed' in last_state or last_state.get('last_commit') != current_commit:
+ if 'wheels_changed' in last_state or last_state.get('last_installed_commit') != current_commit:
wheels_changed = True
else:
last_state = {}
@@ -396,15 +396,15 @@ def update_requirements(initial_installation=False, pull=True):
with open(requirements_file, 'r') as f:
after_pull_whl_lines = [line for line in f if '.whl' in line]
- # Check for changes
+ wheels_changed = wheels_changed or (before_pull_whl_lines != after_pull_whl_lines)
+
+ # Check for changes to installer files
for file in files_to_check:
if before_hashes[file] != after_hashes[file]:
print_big_message(f"File '{file}' was updated during 'git pull'. Please run the script again.")
- if before_pull_whl_lines != after_pull_whl_lines:
- wheels_changed = True
# Save state before exiting
- current_state = {'last_commit': current_commit}
+ current_state = {}
if wheels_changed:
current_state['wheels_changed'] = True
@@ -413,13 +413,8 @@ def update_requirements(initial_installation=False, pull=True):
exit(1)
- wheels_changed = wheels_changed or (before_pull_whl_lines != after_pull_whl_lines)
-
# Save current state
- current_state = {'last_commit': current_commit}
- if wheels_changed:
- current_state['wheels_changed'] = True
-
+ current_state = {'last_installed_commit': current_commit}
with open(state_file, 'w') as f:
json.dump(current_state, f)
From 0b9ab1438dd079ea3ee9175d4c32d2d6f9c9073d Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 27 Jan 2025 10:28:59 -0800
Subject: [PATCH 24/29] Clean up
---
one_click.py | 2 --
1 file changed, 2 deletions(-)
diff --git a/one_click.py b/one_click.py
index d0f54582..9105e057 100644
--- a/one_click.py
+++ b/one_click.py
@@ -367,8 +367,6 @@ def update_requirements(initial_installation=False, pull=True):
if 'wheels_changed' in last_state or last_state.get('last_installed_commit') != current_commit:
wheels_changed = True
- else:
- last_state = {}
if pull:
# Read .whl lines before pulling
From 39365897550683b3c1ff02e36f1942016a880cca Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 28 Jan 2025 12:53:55 -0800
Subject: [PATCH 25/29] Update README
---
.github/FUNDING.yml | 1 -
README.md | 4 ----
2 files changed, 5 deletions(-)
delete mode 100644 .github/FUNDING.yml
diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
deleted file mode 100644
index e2e16212..00000000
--- a/.github/FUNDING.yml
+++ /dev/null
@@ -1 +0,0 @@
-patreon: oobabooga
diff --git a/README.md b/README.md
index 407fdff0..3642fc58 100644
--- a/README.md
+++ b/README.md
@@ -400,7 +400,3 @@ https://colab.research.google.com/github/oobabooga/text-generation-webui/blob/ma
## Acknowledgment
In August 2023, [Andreessen Horowitz](https://a16z.com/) (a16z) provided a generous grant to encourage and support my independent work on this project. I am **extremely** grateful for their trust and recognition.
-
-## ⭐ Featured Patreon Supporters
-
-* [Become the first one!](https://www.patreon.com/oobabooga)
From 9ddcc91a9166ae5b61f7f390a2483d461de04ab8 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 29 Jan 2025 11:20:28 -0800
Subject: [PATCH 26/29] Bump llama-cpp-python to 0.3.7
---
requirements.txt | 24 ++++++++++++------------
requirements_amd.txt | 12 ++++++------
requirements_amd_noavx2.txt | 8 ++++----
requirements_apple_intel.txt | 8 ++++----
requirements_apple_silicon.txt | 12 ++++++------
requirements_cpu_only.txt | 8 ++++----
requirements_cpu_only_noavx2.txt | 8 ++++----
requirements_noavx2.txt | 24 ++++++++++++------------
8 files changed, 52 insertions(+), 52 deletions(-)
diff --git a/requirements.txt b/requirements.txt
index cb4a93ee..4ff7a6df 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -32,22 +32,22 @@ sse-starlette==1.6.5
tiktoken
# llama-cpp-python (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# llama-cpp-python (CUDA, with GGML_CUDA_FORCE_MMQ)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.6+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.6+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.6+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.6+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# llama-cpp-python (CUDA, without GGML_CUDA_FORCE_MMQ)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.6+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.6+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.6+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.6+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# CUDA wheels
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements_amd.txt b/requirements_amd.txt
index 13616a92..e30f30ee 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -31,14 +31,14 @@ sse-starlette==1.6.5
tiktoken
# llama-cpp-python (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# AMD wheels
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.6+rocm6.1.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.6+rocm6.1.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.7+rocm6.1.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.7+rocm6.1.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index b1fa3957..15d25caa 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -31,10 +31,10 @@ sse-starlette==1.6.5
tiktoken
# llama-cpp-python (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# AMD wheels
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt
index 5c62e0b7..b614acf4 100644
--- a/requirements_apple_intel.txt
+++ b/requirements_apple_intel.txt
@@ -31,8 +31,8 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp311-cp311-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp310-cp310-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp311-cp311-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp310-cp310-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl
diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt
index 93ead215..ca9cc3ac 100644
--- a/requirements_apple_silicon.txt
+++ b/requirements_apple_silicon.txt
@@ -31,10 +31,10 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp311-cp311-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp310-cp310-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp311-cp311-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp310-cp310-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl
diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt
index f0db2016..e9a97905 100644
--- a/requirements_cpu_only.txt
+++ b/requirements_cpu_only.txt
@@ -31,7 +31,7 @@ sse-starlette==1.6.5
tiktoken
# llama-cpp-python (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt
index 80d0f039..c4357676 100644
--- a/requirements_cpu_only_noavx2.txt
+++ b/requirements_cpu_only_noavx2.txt
@@ -31,7 +31,7 @@ sse-starlette==1.6.5
tiktoken
# llama-cpp-python (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index b3a1423b..40cbc7b0 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -32,22 +32,22 @@ sse-starlette==1.6.5
tiktoken
# llama-cpp-python (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# llama-cpp-python (CUDA, with GGML_CUDA_FORCE_MMQ)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.6+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.6+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.6+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.6+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# llama-cpp-python (CUDA, without GGML_CUDA_FORCE_MMQ)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.6+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.6+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.6+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.6+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# CUDA wheels
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
From b7c17727b00e3500ddf5b9263973ec57467557ef Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 29 Jan 2025 13:57:56 -0800
Subject: [PATCH 27/29] Update .gitignore
---
.gitignore | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.gitignore b/.gitignore
index 7d1099b6..318e147d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,13 +26,13 @@
.DS_Store
.eslintrc.js
.idea
+.installer_state.json
.venv
venv
.envrc
.direnv
.vs
.vscode
-.wheels_changed_flag
*.bak
*.ipynb
*.log
From f01cc079b98e422948000b92ad2e0dd8b3031014 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 29 Jan 2025 14:00:59 -0800
Subject: [PATCH 28/29] Lint
---
extensions/openai/completions.py | 2 +-
modules/chat.py | 3 ++-
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/extensions/openai/completions.py b/extensions/openai/completions.py
index 0f1f26a8..f1a60645 100644
--- a/extensions/openai/completions.py
+++ b/extensions/openai/completions.py
@@ -146,7 +146,7 @@ def convert_history(history):
for item in entry['content']:
if not isinstance(item, dict):
continue
-
+
image_url = None
content = None
if item['type'] == 'image_url' and isinstance(item['image_url'], dict):
diff --git a/modules/chat.py b/modules/chat.py
index 60ded0b0..0e47da29 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -30,11 +30,12 @@ from modules.text_generation import (
)
from modules.utils import delete_file, get_available_characters, save_file
+
def strftime_now(format):
return datetime.now().strftime(format)
-jinja_env = ImmutableSandboxedEnvironment(trim_blocks=True, lstrip_blocks=True)
+jinja_env = ImmutableSandboxedEnvironment(trim_blocks=True, lstrip_blocks=True)
jinja_env.globals["strftime_now"] = strftime_now
From b614ea659673e6ac2f4cbf46dbffa1a747ec2d68 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 29 Jan 2025 14:05:39 -0800
Subject: [PATCH 29/29] Installer: small fixes
---
one_click.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/one_click.py b/one_click.py
index 9105e057..effc7d43 100644
--- a/one_click.py
+++ b/one_click.py
@@ -367,6 +367,8 @@ def update_requirements(initial_installation=False, pull=True):
if 'wheels_changed' in last_state or last_state.get('last_installed_commit') != current_commit:
wheels_changed = True
+ else:
+ wheels_changed = True
if pull:
# Read .whl lines before pulling
@@ -409,7 +411,7 @@ def update_requirements(initial_installation=False, pull=True):
with open(state_file, 'w') as f:
json.dump(current_state, f)
- exit(1)
+ sys.exit(1)
# Save current state
current_state = {'last_installed_commit': current_commit}