Merge pull request #5404 from oobabooga/dev

Merge dev branch
This commit is contained in:
oobabooga 2024-01-30 14:17:08 -03:00 committed by GitHub
commit 4f3fdf1b5f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
25 changed files with 336 additions and 235 deletions

View File

@ -65,12 +65,9 @@
" torver = torch.__version__\n",
" print(f\"TORCH: {torver}\")\n",
" is_cuda118 = '+cu118' in torver # 2.1.0+cu118\n",
" is_cuda117 = '+cu117' in torver # 2.0.1+cu117\n",
"\n",
" textgen_requirements = open('requirements.txt').read().splitlines()\n",
" if is_cuda117:\n",
" textgen_requirements = [req.replace('+cu121', '+cu117').replace('+cu122', '+cu117').replace('torch2.1', 'torch2.0') for req in textgen_requirements]\n",
" elif is_cuda118:\n",
" if is_cuda118:\n",
" textgen_requirements = [req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') for req in textgen_requirements]\n",
" with open('temp_requirements.txt', 'w') as file:\n",
" file.write('\\n'.join(textgen_requirements))\n",
@ -130,4 +127,4 @@
"outputs": []
}
]
}
}

View File

@ -160,12 +160,23 @@ The `requirements*.txt` above contain various wheels precompiled through GitHub
### Alternative: Docker
```
ln -s docker/{nvidia/Dockerfile,docker-compose.yml,.dockerignore} .
For NVIDIA GPU:
ln -s docker/{nvidia/Dockerfile,nvidia/docker-compose.yml,.dockerignore} .
For AMD GPU:
ln -s docker/{amd/Dockerfile,intel/docker-compose.yml,.dockerignore} .
For Intel GPU:
ln -s docker/{intel/Dockerfile,amd/docker-compose.yml,.dockerignore} .
For CPU only
ln -s docker/{cpu/Dockerfile,cpu/docker-compose.yml,.dockerignore} .
cp docker/.env.example .env
#Create logs/cache dir :
mkdir -p logs cache
# Edit .env and set:
# TORCH_CUDA_ARCH_LIST based on your GPU model
# APP_RUNTIME_GID your host user's group id (run `id -g` in a terminal)
# BUILD_EXTENIONS optionally add comma separated list of extensions to build
# Edit CMD_FLAGS.txt and add in it the options you want to execute (like --listen --cpu)
#
docker compose up --build
```

View File

@ -20,3 +20,6 @@ APP_RUNTIME_GID=6972
# override default app build permissions (handy for deploying to cloud)
#APP_GID=6972
#APP_UID=6972
# Set cache env
TRANSFORMERS_CACHE=/home/app/text-generation-webui/cache/
HF_HOME=/home/app/text-generation-webui/cache/

21
docker/amd/Dockerfile Normal file
View File

@ -0,0 +1,21 @@
# BUILDER
FROM ubuntu:22.04
WORKDIR /builder
ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX}"
ARG BUILD_EXTENSIONS="${BUILD_EXTENSIONS:-}"
ARG APP_UID="${APP_UID:-6972}"
ARG APP_GID="${APP_GID:-6972}"
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
apt update && \
apt install --no-install-recommends -y git vim build-essential python3-dev pip bash curl && \
rm -rf /var/lib/apt/lists/*
WORKDIR /home/app/
RUN git clone https://github.com/oobabooga/text-generation-webui.git
WORKDIR /home/app/text-generation-webui
RUN GPU_CHOICE=B USE_CUDA118=FALSE LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose
COPY CMD_FLAGS.txt /home/app/text-generation-webui/
EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005}
WORKDIR /home/app/text-generation-webui
# set umask to ensure group read / write at runtime
CMD umask 0002 && export HOME=/home/app/text-generation-webui && ./start_linux.sh

View File

@ -0,0 +1,57 @@
version: "3.3"
services:
text-generation-webui:
build:
context: .
args:
# Requirements file to use:
# | GPU | CPU | requirements file to use |
# |--------|---------|---------|
# | NVIDIA | has AVX2 | `requirements.txt` |
# | NVIDIA | no AVX2 | `requirements_noavx2.txt` |
# | AMD | has AVX2 | `requirements_amd.txt` |
# | AMD | no AVX2 | `requirements_amd_noavx2.txt` |
# | CPU only | has AVX2 | `requirements_cpu_only.txt` |
# | CPU only | no AVX2 | `requirements_cpu_only_noavx2.txt` |
# | Apple | Intel | `requirements_apple_intel.txt` |
# | Apple | Apple Silicon | `requirements_apple_silicon.txt` |
# Default: requirements.txt`
# BUILD_REQUIREMENTS: requirements.txt
# Extension requirements to build:
# BUILD_EXTENSIONS:
# specify which cuda version your card supports: https://developer.nvidia.com/cuda-gpus
TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST:-7.5}
BUILD_EXTENSIONS: ${BUILD_EXTENSIONS:-}
APP_GID: ${APP_GID:-6972}
APP_UID: ${APP_UID-6972}
env_file: .env
user: "${APP_RUNTIME_UID:-6972}:${APP_RUNTIME_GID:-6972}"
ports:
- "${HOST_PORT:-7860}:${CONTAINER_PORT:-7860}"
- "${HOST_API_PORT:-5000}:${CONTAINER_API_PORT:-5000}"
stdin_open: true
group_add:
- video
tty: true
ipc: host
devices:
- /dev/kfd
- /dev/dri
cap_add:
- SYS_PTRACE
security_opt:
- seccomp=unconfined
volumes:
- ./cache:/home/app/text-generation-webui/cache
- ./characters:/home/app/text-generation-webui/characters
- ./extensions:/home/app/text-generation-webui/extensions
- ./loras:/home/app/text-generation-webui/loras
- ./logs:/home/app/text-generation-webui/logs
- ./models:/home/app/text-generation-webui/models
- ./presets:/home/app/text-generation-webui/presets
- ./prompts:/home/app/text-generation-webui/prompts
- ./softprompts:/home/app/text-generation-webui/softprompts
- ./training:/home/app/text-generation-webui/training
- ./cloudflared:/etc/cloudflared

25
docker/cpu/Dockerfile Normal file
View File

@ -0,0 +1,25 @@
# BUILDER
FROM ubuntu:22.04
WORKDIR /builder
ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX}"
ARG BUILD_EXTENSIONS="${BUILD_EXTENSIONS:-}"
ARG APP_UID="${APP_UID:-6972}"
ARG APP_GID="${APP_GID:-6972}"
ARG GPU_CHOICE=A
ARG USE_CUDA118=FALSE
ARG LAUNCH_AFTER_INSTALL=FALSE
ARG INSTALL_EXTENSIONS=TRUE
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
apt update && \
apt install --no-install-recommends -y git vim build-essential python3-dev pip bash curl && \
rm -rf /var/lib/apt/lists/*
WORKDIR /home/app/
RUN git clone https://github.com/oobabooga/text-generation-webui.git
WORKDIR /home/app/text-generation-webui
RUN GPU_CHOICE=N USE_CUDA118=FALSE LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose
COPY CMD_FLAGS.txt /home/app/text-generation-webui/
EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005}
# set umask to ensure group read / write at runtime
WORKDIR /home/app/text-generation-webui
CMD umask 0002 && export HOME=/home/app/text-generation-webui && ./start_linux.sh

View File

@ -0,0 +1,47 @@
version: "3.3"
services:
text-generation-webui:
build:
context: .
args:
# Requirements file to use:
# | GPU | CPU | requirements file to use |
# |--------|---------|---------|
# | NVIDIA | has AVX2 | `requirements.txt` |
# | NVIDIA | no AVX2 | `requirements_noavx2.txt` |
# | AMD | has AVX2 | `requirements_amd.txt` |
# | AMD | no AVX2 | `requirements_amd_noavx2.txt` |
# | CPU only | has AVX2 | `requirements_cpu_only.txt` |
# | CPU only | no AVX2 | `requirements_cpu_only_noavx2.txt` |
# | Apple | Intel | `requirements_apple_intel.txt` |
# | Apple | Apple Silicon | `requirements_apple_silicon.txt` |
# Default: requirements.txt`
# BUILD_REQUIREMENTS: requirements.txt
# Extension requirements to build:
# BUILD_EXTENSIONS:
# specify which cuda version your card supports: https://developer.nvidia.com/cuda-gpus
TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST:-7.5}
BUILD_EXTENSIONS: ${BUILD_EXTENSIONS:-}
APP_GID: ${APP_GID:-6972}
APP_UID: ${APP_UID-6972}
env_file: .env
user: "${APP_RUNTIME_UID:-6972}:${APP_RUNTIME_GID:-6972}"
ports:
- "${HOST_PORT:-7860}:${CONTAINER_PORT:-7860}"
- "${HOST_API_PORT:-5000}:${CONTAINER_API_PORT:-5000}"
stdin_open: true
tty: true
volumes:
- ./cache:/home/app/text-generation-webui/cache
- ./characters:/home/app/text-generation-webui/characters
- ./extensions:/home/app/text-generation-webui/extensions
- ./loras:/home/app/text-generation-webui/loras
- ./logs:/home/app/text-generation-webui/logs
- ./models:/home/app/text-generation-webui/models
- ./presets:/home/app/text-generation-webui/presets
- ./prompts:/home/app/text-generation-webui/prompts
- ./softprompts:/home/app/text-generation-webui/softprompts
- ./training:/home/app/text-generation-webui/training
- ./cloudflared:/etc/cloudflared

21
docker/intel/Dockerfile Normal file
View File

@ -0,0 +1,21 @@
# BUILDER
FROM ubuntu:22.04
WORKDIR /builder
ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX}"
ARG BUILD_EXTENSIONS="${BUILD_EXTENSIONS:-}"
ARG APP_UID="${APP_UID:-6972}"
ARG APP_GID="${APP_GID:-6972}"
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
apt update && \
apt install --no-install-recommends -y git vim build-essential python3-dev pip bash curl && \
rm -rf /var/lib/apt/lists/*
WORKDIR /home/app/
RUN git clone https://github.com/oobabooga/text-generation-webui.git
WORKDIR /home/app/text-generation-webui
RUN GPU_CHOICE=D USE_CUDA118=FALSE LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose
COPY CMD_FLAGS.txt /home/app/text-generation-webui/
EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005}
# set umask to ensure group read / write at runtime
WORKDIR /home/app/text-generation-webui
CMD umask 0002 && export HOME=/home/app/text-generation-webui && ./start_linux.sh

View File

@ -0,0 +1,55 @@
version: "3.3"
services:
text-generation-webui:
build:
context: .
args:
# Requirements file to use:
# | GPU | CPU | requirements file to use |
# |--------|---------|---------|
# | NVIDIA | has AVX2 | `requirements.txt` |
# | NVIDIA | no AVX2 | `requirements_noavx2.txt` |
# | AMD | has AVX2 | `requirements_amd.txt` |
# | AMD | no AVX2 | `requirements_amd_noavx2.txt` |
# | CPU only | has AVX2 | `requirements_cpu_only.txt` |
# | CPU only | no AVX2 | `requirements_cpu_only_noavx2.txt` |
# | Apple | Intel | `requirements_apple_intel.txt` |
# | Apple | Apple Silicon | `requirements_apple_silicon.txt` |
# Default: requirements.txt`
# BUILD_REQUIREMENTS: requirements.txt
# Extension requirements to build:
# BUILD_EXTENSIONS:
# specify which cuda version your card supports: https://developer.nvidia.com/cuda-gpus
TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST:-7.5}
BUILD_EXTENSIONS: ${BUILD_EXTENSIONS:-}
APP_GID: ${APP_GID:-6972}
APP_UID: ${APP_UID-6972}
env_file: .env
user: "${APP_RUNTIME_UID:-6972}:${APP_RUNTIME_GID:-6972}"
ports:
- "${HOST_PORT:-7860}:${CONTAINER_PORT:-7860}"
- "${HOST_API_PORT:-5000}:${CONTAINER_API_PORT:-5000}"
stdin_open: true
group_add:
- video
tty: true
ipc: host
devices:
- /dev/kfd
- /dev/dri
cap_add:
- SYS_PTRACE
security_opt:
- seccomp=unconfined
volumes:
- ./characters:/home/app/text-generation-webui/characters
- ./extensions:/home/app/text-generation-webui/extensions
- ./loras:/home/app/text-generation-webui/loras
- ./models:/home/app/text-generation-webui/models
- ./presets:/home/app/text-generation-webui/presets
- ./prompts:/home/app/text-generation-webui/prompts
- ./softprompts:/home/app/text-generation-webui/softprompts
- ./training:/home/app/text-generation-webui/training
- ./cloudflared:/etc/cloudflared

View File

@ -1,57 +1,21 @@
# BUILDER
FROM nvidia/cuda:12.1.1-devel-ubuntu22.04 as builder
FROM ubuntu:22.04
WORKDIR /builder
ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX}"
ARG BUILD_EXTENSIONS="${BUILD_EXTENSIONS:-}"
ARG BUILD_REQUIREMENTS="${BUILD_REQUIREMENTS:-requirements.txt}"
ARG APP_UID="${APP_UID:-6972}"
ARG APP_GID="${APP_GID:-6972}"
# create / update build env
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
apt update && \
apt install --no-install-recommends -y git vim build-essential python3-dev pip && \
rm -rf /var/lib/apt/lists/*
RUN --mount=type=cache,target=/root/.cache/pip,rw \
pip3 install --global --upgrade pip wheel setuptools && \
# make shared builder & runtime app user
addgroup --gid $APP_GID app_grp && \
useradd -m -u $APP_UID --gid app_grp app
USER app:app_grp
# build wheels for runtime
WORKDIR /home/app/build
COPY --chown=app:app_grp "$BUILD_REQUIREMENTS" /home/app/build/requirements.txt
COPY --chown=app:app_grp extensions /home/app/build/extensions
RUN --mount=type=cache,target=/root/.cache/pip,rw \
# build all requirements files as wheel dists
pip3 wheel -w wheels -r requirements.txt `echo "$BUILD_EXTENSIONS" | sed -r 's/([^,]+)\s*,?\s*/ -r \/home\/app\/build\/extensions\/\1\/requirements.txt/g'`
# drop wheel and setuptools .whl to avoid install issues
RUN rm wheels/setuptools*.whl
# RUNTIME
FROM nvidia/cuda:12.1.1-runtime-ubuntu22.04
ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6}"
ARG APP_UID="${APP_UID:-6972}"
ARG APP_GID="${APP_GID:-6972}"
ENV CLI_ARGS=""
# create / update runtime env
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
apt update && \
apt install --no-install-recommends -y git python3 pip && \
rm -rf /var/lib/apt/lists/* && \
pip3 install --global --no-cache --upgrade pip wheel setuptools && \
# make shared builder & runtime app user
addgroup --gid $APP_GID app_grp && \
useradd -m -u $APP_UID --gid app_grp app
USER app:app_grp
# install locally built wheels for app
WORKDIR /home/app/wheels
COPY --from=builder /home/app/build/wheels /home/app/wheels
COPY --chown=app:app_grp . /home/app/text-generation-webui
RUN umask 0002 && \
chmod g+rwX /home/app/text-generation-webui && \
pip3 install --global --no-build-isolation --no-cache --no-index ./*.whl && \
rm -r /home/app/wheels
apt install --no-install-recommends -y git vim build-essential python3-dev pip bash curl && \
rm -rf /var/lib/apt/lists/*
WORKDIR /home/app/
RUN git clone https://github.com/oobabooga/text-generation-webui.git
WORKDIR /home/app/text-generation-webui
RUN GPU_CHOICE=A USE_CUDA118=FALSE LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose
COPY CMD_FLAGS.txt /home/app/text-generation-webui/
EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005}
WORKDIR /home/app/text-generation-webui
# set umask to ensure group read / write at runtime
CMD umask 0002 && export HOME=/home/app && python3 server.py ${CLI_ARGS}
CMD umask 0002 && export HOME=/home/app/text-generation-webui && ./start_linux.sh

View File

@ -34,9 +34,11 @@ services:
stdin_open: true
tty: true
volumes:
- ./cache:/home/app/text-generation-webui/cache
- ./characters:/home/app/text-generation-webui/characters
- ./extensions:/home/app/text-generation-webui/extensions
- ./loras:/home/app/text-generation-webui/loras
- ./logs:/home/app/text-generation-webui/logs
- ./models:/home/app/text-generation-webui/models
- ./presets:/home/app/text-generation-webui/presets
- ./prompts:/home/app/text-generation-webui/prompts

View File

@ -30,10 +30,18 @@ class ModelDownloader:
if max_retries:
self.session.mount('https://cdn-lfs.huggingface.co', HTTPAdapter(max_retries=max_retries))
self.session.mount('https://huggingface.co', HTTPAdapter(max_retries=max_retries))
if os.getenv('HF_USER') is not None and os.getenv('HF_PASS') is not None:
self.session.auth = (os.getenv('HF_USER'), os.getenv('HF_PASS'))
if os.getenv('HF_TOKEN') is not None:
self.session.headers = {'authorization': f'Bearer {os.getenv("HF_TOKEN")}'}
try:
from huggingface_hub import get_token
token = get_token()
except ImportError:
token = os.getenv("HF_TOKEN")
if token is not None:
self.session.headers = {'authorization': f'Bearer {token}'}
def sanitize_model_and_branch_names(self, model, branch):
if model[-1] == '/':

View File

@ -6,6 +6,7 @@ import time
from pathlib import Path
import gradio as gr
import torch
from TTS.api import TTS
from TTS.utils.synthesizer import Synthesizer
@ -23,7 +24,7 @@ params = {
"voice": "female_01.wav",
"language": "English",
"model_name": "tts_models/multilingual/multi-dataset/xtts_v2",
"device": "cuda"
"device": "cuda" if torch.cuda.is_available() else "cpu"
}
this_dir = str(Path(__file__).parent.resolve())

View File

@ -110,22 +110,6 @@ def process_parameters(body, is_legacy=False):
logits_processor = []
logit_bias = body.get('logit_bias', None)
if logit_bias: # {str: float, ...}
# XXX convert tokens from tiktoken based on requested model
# Ex.: 'logit_bias': {'1129': 100, '11442': 100, '16243': 100}
try:
encoder = tiktoken.encoding_for_model(generate_params['model'])
new_logit_bias = {}
for logit, bias in logit_bias.items():
for x in encode(encoder.decode([int(logit)]), add_special_tokens=False)[0]:
if int(x) in [0, 1, 2, 29871]: # XXX LLAMA tokens
continue
new_logit_bias[str(int(x))] = bias
debug_msg('logit_bias_map', logit_bias, '->', new_logit_bias)
logit_bias = new_logit_bias
except KeyError:
pass # assume native tokens if we can't find the tokenizer
logits_processor = [LogitsBiasProcessor(logit_bias)]
logprobs = None # coming to chat eventually

4
grammars/roleplay.gbnf Normal file
View File

@ -0,0 +1,4 @@
phrasePattern ::= "*" [A-ZÀ-Ü] ( [A-Za-z'.?!,:;-à-üÀ-Ü0-9]+ " " )* [A-Za-z'.?!,:;-à-üÀ-Ü0-9]+ "*"
quotedText ::= "\"" [A-ZÀ-Ü] [A-Za-z'.*?!,:;-à-üÀ-Ü0-9]* (" " [A-Za-z'.*?!,:;-à-üÀ-Ü0-9]+)* "\""
root ::= phrasePattern singleSpace quotedText singleSpace phrasePattern singleSpace quotedText singleSpace (phrasePattern singleSpace quotedText singleSpace)?
singleSpace ::= " "

View File

@ -115,6 +115,6 @@ def get_truncation_length():
def load_grammar(name):
p = Path(f'grammars/{name}')
if p.exists():
return open(p, 'r').read()
return open(p, 'r', encoding='utf-8').read()
else:
return ''

View File

@ -149,17 +149,17 @@ def run_cmd(cmd, assert_success=False, environment=False, capture_output=False,
if environment:
if is_windows():
conda_bat_path = os.path.join(script_dir, "installer_files", "conda", "condabin", "conda.bat")
cmd = "\"" + conda_bat_path + "\" activate \"" + conda_env_path + "\" >nul && " + cmd
cmd = f'"{conda_bat_path}" activate "{conda_env_path}" >nul && {cmd}'
else:
conda_sh_path = os.path.join(script_dir, "installer_files", "conda", "etc", "profile.d", "conda.sh")
cmd = ". \"" + conda_sh_path + "\" && conda activate \"" + conda_env_path + "\" && " + cmd
cmd = f'. "{conda_sh_path}" && conda activate "{conda_env_path}" && {cmd}'
# Run shell commands
result = subprocess.run(cmd, shell=True, capture_output=capture_output, env=env)
# Assert the command ran successfully
if assert_success and result.returncode != 0:
print("Command '" + cmd + "' failed with exit status code '" + str(result.returncode) + "'.\n\nExiting now.\nTry running the start/update script again.")
print(f"Command '{cmd}' failed with exit status code '{str(result.returncode)}'.\n\nExiting now.\nTry running the start/update script again.")
sys.exit(1)
return result
@ -257,7 +257,7 @@ def install_webui():
def update_requirements(initial_installation=False):
# Create .git directory if missing
if not os.path.isdir(os.path.join(script_dir, ".git")):
if not os.path.exists(os.path.join(script_dir, ".git")):
git_creation_cmd = 'git init -b main && git remote add origin https://github.com/oobabooga/text-generation-webui && git fetch && git symbolic-ref refs/remotes/origin/HEAD refs/remotes/origin/main && git reset --hard origin/main && git branch --set-upstream-to=origin/main'
run_cmd(git_creation_cmd, environment=True, assert_success=True)
@ -291,7 +291,7 @@ def update_requirements(initial_installation=False):
for i, extension in enumerate(extensions):
print(f"\n\n--- [{i+1}/{len(extensions)}]: {extension}\n\n")
extension_req_path = os.path.join("extensions", extension, "requirements.txt")
run_cmd("python -m pip install -r " + extension_req_path + " --upgrade", assert_success=False, environment=True)
run_cmd(f"python -m pip install -r {extension_req_path} --upgrade", assert_success=False, environment=True)
elif initial_installation:
print_big_message("Will not install extensions due to INSTALL_EXTENSIONS environment variable.")
@ -299,7 +299,6 @@ def update_requirements(initial_installation=False):
torver = torch_version()
is_cuda = '+cu' in torver
is_cuda118 = '+cu118' in torver # 2.1.0+cu118
is_cuda117 = '+cu117' in torver # 2.0.1+cu117
is_rocm = '+rocm' in torver # 2.0.1+rocm5.4.2
is_intel = '+cxx11' in torver # 2.0.1a0+cxx11.abi
is_cpu = '+cpu' in torver # 2.0.1+cpu
@ -320,11 +319,9 @@ def update_requirements(initial_installation=False):
# Prepare the requirements file
textgen_requirements = open(requirements_file).read().splitlines()
if is_cuda117:
textgen_requirements = [req.replace('+cu121', '+cu117').replace('+cu122', '+cu117').replace('torch2.1', 'torch2.0') for req in textgen_requirements]
elif is_cuda118:
if is_cuda118:
textgen_requirements = [req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') for req in textgen_requirements]
if is_windows() and (is_cuda117 or is_cuda118): # No flash-attention on Windows for CUDA 11
if is_windows() and is_cuda118: # No flash-attention on Windows for CUDA 11
textgen_requirements = [req for req in textgen_requirements if 'jllllll/flash-attention' not in req]
with open('temp_requirements.txt', 'w') as file:
@ -335,13 +332,13 @@ def update_requirements(initial_installation=False):
for req in git_requirements:
url = req.replace("git+", "")
package_name = url.split("/")[-1].split("@")[0].rstrip(".git")
run_cmd("python -m pip uninstall -y " + package_name, environment=True)
run_cmd(f"python -m pip uninstall -y {package_name}", environment=True)
print(f"Uninstalled {package_name}")
# Make sure that API requirements are installed (temporary)
extension_req_path = os.path.join("extensions", "openai", "requirements.txt")
if os.path.exists(extension_req_path):
run_cmd("python -m pip install -r " + extension_req_path + " --upgrade", environment=True)
run_cmd(f"python -m pip install -r {extension_req_path} --upgrade", environment=True)
# Install/update the project requirements
run_cmd("python -m pip install -r temp_requirements.txt --upgrade", assert_success=True, environment=True)

View File

@ -29,67 +29,39 @@ bitsandbytes==0.41.1; platform_system != "Windows"
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
# llama-cpp-python (CPU only, AVX2)
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx2-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx2-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx2-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx2-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.36+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.36+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.36+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.36+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# llama-cpp-python (CUDA, no tensor cores)
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.31+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.31+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.31+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.31+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.31+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.31+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.31+cu121-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.31+cu121-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.36+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.36+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.36+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.36+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# llama-cpp-python (CUDA, tensor cores)
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.31+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.31+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.31+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.31+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.31+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.31+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.31+cu121-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.31+cu121-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.36+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.36+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.36+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.36+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# CUDA wheels
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu122torch2.1cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu122torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu122torch2.1cxx11abiFALSE-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu122torch2.1cxx11abiFALSE-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
https://github.com/jllllll/ctransformers-cuBLAS-wheels/releases/download/AVX2/ctransformers-0.2.27+cu121-py3-none-any.whl
autoawq==0.1.8; platform_system == "Linux" or platform_system == "Windows"

View File

@ -29,27 +29,17 @@ bitsandbytes==0.38.1; platform_system != "Windows"
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.38.1-py3-none-win_amd64.whl; platform_system == "Windows"
# llama-cpp-python (CPU only, AVX2)
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx2-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx2-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx2-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx2-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.36+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.36+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.36+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.36+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# AMD wheels
https://github.com/oobabooga/llama-cpp-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.31+rocm5.6.1-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.31+rocm5.6.1-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.31+rocm5.6.1-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.31+rocm5.6.1-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.36+rocm5.6.1-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.36+rocm5.6.1-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"

View File

@ -29,23 +29,15 @@ bitsandbytes==0.38.1; platform_system != "Windows"
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.38.1-py3-none-win_amd64.whl; platform_system == "Windows"
# llama-cpp-python (CPU only, no AVX2)
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.36+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.36+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.36+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.36+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# AMD wheels
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"

View File

@ -29,15 +29,9 @@ bitsandbytes==0.41.1; platform_system != "Windows"
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
# Mac wheels
https://github.com/oobabooga/llama-cpp-wheels/releases/download/metal/llama_cpp_python-0.2.31-cp311-cp311-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/metal/llama_cpp_python-0.2.31-cp310-cp310-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/metal/llama_cpp_python-0.2.31-cp39-cp39-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.9"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/metal/llama_cpp_python-0.2.31-cp38-cp38-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.8"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/metal/llama_cpp_python-0.2.31-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/metal/llama_cpp_python-0.2.31-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/metal/llama_cpp_python-0.2.31-cp39-cp39-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.9"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/metal/llama_cpp_python-0.2.31-cp38-cp38-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.8"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/metal/llama_cpp_python-0.2.31-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/metal/llama_cpp_python-0.2.31-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/metal/llama_cpp_python-0.2.31-cp39-cp39-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.9"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/metal/llama_cpp_python-0.2.31-cp38-cp38-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.8"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.36-cp311-cp311-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.36-cp310-cp310-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.36-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.36-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.36-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.36-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"

View File

@ -29,19 +29,11 @@ bitsandbytes==0.41.1; platform_system != "Windows"
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
# Mac wheels
https://github.com/oobabooga/llama-cpp-wheels/releases/download/metal/llama_cpp_python-0.2.31-cp311-cp311-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/metal/llama_cpp_python-0.2.31-cp310-cp310-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/metal/llama_cpp_python-0.2.31-cp39-cp39-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.9"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/metal/llama_cpp_python-0.2.31-cp38-cp38-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.8"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/metal/llama_cpp_python-0.2.31-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/metal/llama_cpp_python-0.2.31-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/metal/llama_cpp_python-0.2.31-cp39-cp39-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.9"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/metal/llama_cpp_python-0.2.31-cp38-cp38-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.8"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/metal/llama_cpp_python-0.2.31-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/metal/llama_cpp_python-0.2.31-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/metal/llama_cpp_python-0.2.31-cp39-cp39-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.9"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/metal/llama_cpp_python-0.2.31-cp38-cp38-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.8"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/metal/llama_cpp_python-0.2.31-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/metal/llama_cpp_python-0.2.31-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/metal/llama_cpp_python-0.2.31-cp39-cp39-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.9"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/metal/llama_cpp_python-0.2.31-cp38-cp38-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.8"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.36-cp311-cp311-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.36-cp310-cp310-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.36-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.36-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.36-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.36-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.36-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.36-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"

View File

@ -29,11 +29,7 @@ bitsandbytes==0.41.1; platform_system != "Windows"
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
# llama-cpp-python (CPU only, AVX2)
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx2-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx2-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx2-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx2-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.36+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.36+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.36+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.36+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"

View File

@ -29,11 +29,7 @@ bitsandbytes==0.41.1; platform_system != "Windows"
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
# llama-cpp-python (CPU only, no AVX2)
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.36+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.36+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.36+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.36+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"

View File

@ -29,67 +29,39 @@ bitsandbytes==0.41.1; platform_system != "Windows"
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
# llama-cpp-python (CPU only, no AVX2)
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/cpu/llama_cpp_python-0.2.31+cpuavx-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.36+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.36+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.36+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.36+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# llama-cpp-python (CUDA, no tensor cores)
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.31+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.31+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.31+cu121avx-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.31+cu121avx-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.31+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.31+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.31+cu121avx-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.31+cu121avx-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.36+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.36+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.36+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.36+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# llama-cpp-python (CUDA, tensor cores)
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.31+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.31+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.31+cu121avx-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.31+cu121avx-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.31+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.31+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.31+cu121avx-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
https://github.com/oobabooga/llama-cpp-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.31+cu121avx-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.36+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.36+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.36+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.36+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# CUDA wheels
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu122torch2.1cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu122torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu122torch2.1cxx11abiFALSE-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu122torch2.1cxx11abiFALSE-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
https://github.com/jllllll/ctransformers-cuBLAS-wheels/releases/download/AVX/ctransformers-0.2.27+cu121-py3-none-any.whl
autoawq==0.1.8; platform_system == "Linux" or platform_system == "Windows"