mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2025-01-12 21:37:35 +01:00
feature/docker_improvements (#4768)
This commit is contained in:
parent
2698d7c9fd
commit
88620c6b39
7
.gitignore
vendored
7
.gitignore
vendored
@ -26,7 +26,6 @@
|
||||
.DS_Store
|
||||
.eslintrc.js
|
||||
.idea
|
||||
.env
|
||||
.venv
|
||||
venv
|
||||
.envrc
|
||||
@ -42,3 +41,9 @@ package.json
|
||||
package-lock.json
|
||||
Thumbs.db
|
||||
wandb
|
||||
|
||||
# ignore user docker config and top level links to docker files
|
||||
/docker-compose.yaml
|
||||
/docker-compose.yml
|
||||
/Dockerfile
|
||||
.env
|
||||
|
@ -163,14 +163,18 @@ The requirments*.txt above contain various precompiled wheels. If you wish to co
|
||||
### Alternative: Docker
|
||||
|
||||
```
|
||||
ln -s docker/{Dockerfile,docker-compose.yml,.dockerignore} .
|
||||
ln -s docker/{nvidia/Dockerfile,docker-compose.yml} .
|
||||
cp docker/.env.example .env
|
||||
# Edit .env and set TORCH_CUDA_ARCH_LIST based on your GPU model
|
||||
# Edit .env and set:
|
||||
# TORCH_CUDA_ARCH_LIST based on your GPU model
|
||||
# APP_RUNTIME_GID your host user's group id (run `id -g` in a terminal)
|
||||
# BUILD_EXTENIONS optionally add comma separated list of extensions to build
|
||||
docker compose up --build
|
||||
```
|
||||
|
||||
* You need to have Docker Compose v2.17 or higher installed. See [this guide](https://github.com/oobabooga/text-generation-webui/wiki/09-%E2%80%90-Docker) for instructions.
|
||||
* For additional docker files, check out [this repository](https://github.com/Atinoda/text-generation-webui-docker).
|
||||
* Currently breaks GPTQ-for-Llama
|
||||
|
||||
### Updating the requirements
|
||||
|
||||
|
@ -2,19 +2,21 @@
|
||||
# however for me to work i had to specify the exact version for my card ( 2060 ) it was 7.5
|
||||
# https://developer.nvidia.com/cuda-gpus you can find the version for your card here
|
||||
TORCH_CUDA_ARCH_LIST=7.5
|
||||
|
||||
# your command-line flags go here:
|
||||
CLI_ARGS=--listen
|
||||
|
||||
# the port the webui binds to on the host
|
||||
HOST_PORT=7860
|
||||
# the port the webui binds to inside the container
|
||||
CONTAINER_PORT=7860
|
||||
|
||||
# the port the api binds to on the host
|
||||
HOST_API_PORT=5000
|
||||
# the port the api binds to inside the container
|
||||
CONTAINER_API_PORT=5000
|
||||
|
||||
# the version used to install text-generation-webui from
|
||||
WEBUI_VERSION=HEAD
|
||||
# Comma separated extensions to build
|
||||
BUILD_EXTENSIONS=""
|
||||
# Set APP_RUNTIME_GID to an appropriate host system group to enable access to mounted volumes
|
||||
# You can find your current host user group id with the command `id -g`
|
||||
APP_RUNTIME_GID=6972
|
||||
# override default app build permissions (handy for deploying to cloud)
|
||||
#APP_GID=6972
|
||||
#APP_UID=6972
|
||||
|
@ -1,77 +0,0 @@
|
||||
FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 as builder
|
||||
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw apt-get update && \
|
||||
apt-get install --no-install-recommends -y git vim build-essential python3-dev python3-venv && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN git clone --depth=1 https://github.com/oobabooga/GPTQ-for-LLaMa /build
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache/pip,rw \
|
||||
python3 -m venv /build/venv && \
|
||||
. /build/venv/bin/activate && \
|
||||
pip3 install --upgrade pip setuptools wheel ninja && \
|
||||
pip3 install torch torchvision torchaudio xformers --index-url https://download.pytorch.org/whl/cu121 && \
|
||||
pip3 install -r requirements.txt
|
||||
|
||||
# https://developer.nvidia.com/cuda-gpus
|
||||
# for a rtx 2060: ARG TORCH_CUDA_ARCH_LIST="7.5"
|
||||
ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX}"
|
||||
RUN . /build/venv/bin/activate && \
|
||||
python3 setup_cuda.py bdist_wheel -d .
|
||||
|
||||
FROM nvidia/cuda:12.1.0-runtime-ubuntu22.04
|
||||
|
||||
LABEL maintainer="Your Name <your.email@example.com>"
|
||||
LABEL description="Docker image for GPTQ-for-LLaMa and Text Generation WebUI"
|
||||
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw apt-get update && \
|
||||
apt-get install --no-install-recommends -y python3-dev libportaudio2 libasound-dev git python3 python3-pip make g++ ffmpeg && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache/pip,rw pip3 install virtualenv
|
||||
|
||||
RUN mkdir /app
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
ARG WEBUI_VERSION
|
||||
RUN test -n "${WEBUI_VERSION}" && git reset --hard ${WEBUI_VERSION} || echo "Using provided webui source"
|
||||
|
||||
# Create virtualenv
|
||||
RUN virtualenv /app/venv
|
||||
RUN --mount=type=cache,target=/root/.cache/pip,rw \
|
||||
. /app/venv/bin/activate && \
|
||||
pip3 install --upgrade pip setuptools wheel ninja && \
|
||||
pip3 install torch xformers --index-url https://download.pytorch.org/whl/cu121 && \
|
||||
pip3 install torchvision torchaudio sentence-transformers
|
||||
|
||||
# Copy and install GPTQ-for-LLaMa
|
||||
COPY --from=builder /build /app/repositories/GPTQ-for-LLaMa
|
||||
RUN --mount=type=cache,target=/root/.cache/pip,rw \
|
||||
. /app/venv/bin/activate && \
|
||||
pip3 install /app/repositories/GPTQ-for-LLaMa/*.whl
|
||||
|
||||
# Install main requirements
|
||||
COPY requirements.txt /app/requirements.txt
|
||||
RUN --mount=type=cache,target=/root/.cache/pip,rw \
|
||||
. /app/venv/bin/activate && \
|
||||
pip3 install -r requirements.txt
|
||||
|
||||
COPY . /app/
|
||||
|
||||
RUN cp /app/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda121.so /app/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so
|
||||
|
||||
# Install extension requirements
|
||||
RUN --mount=type=cache,target=/root/.cache/pip,rw \
|
||||
. /app/venv/bin/activate && \
|
||||
for ext in /app/extensions/*/requirements.txt; do \
|
||||
cd "$(dirname "$ext")"; \
|
||||
pip3 install -r requirements.txt; \
|
||||
done
|
||||
|
||||
ENV CLI_ARGS=""
|
||||
|
||||
EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000}
|
||||
CMD . /app/venv/bin/activate && python3 server.py ${CLI_ARGS}
|
@ -5,28 +5,31 @@ services:
|
||||
context: .
|
||||
args:
|
||||
# specify which cuda version your card supports: https://developer.nvidia.com/cuda-gpus
|
||||
TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST:-7.5}
|
||||
WEBUI_VERSION: ${WEBUI_VERSION:-HEAD}
|
||||
TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST:-7.5}
|
||||
BUILD_EXTENSIONS: ${BUILD_EXTENSIONS:-}
|
||||
APP_GID: ${APP_GID:-6972}
|
||||
APP_UID: ${APP_UID-6972}
|
||||
env_file: .env
|
||||
user: "${APP_RUNTIME_UID:-6972}:${APP_RUNTIME_GID:-6972}"
|
||||
ports:
|
||||
- "${HOST_PORT:-7860}:${CONTAINER_PORT:-7860}"
|
||||
- "${HOST_API_PORT:-5000}:${CONTAINER_API_PORT:-5000}"
|
||||
stdin_open: true
|
||||
tty: true
|
||||
volumes:
|
||||
- ./characters:/app/characters
|
||||
- ./extensions:/app/extensions
|
||||
- ./loras:/app/loras
|
||||
- ./models:/app/models
|
||||
- ./presets:/app/presets
|
||||
- ./prompts:/app/prompts
|
||||
- ./softprompts:/app/softprompts
|
||||
- ./training:/app/training
|
||||
- ./characters:/home/app/text-generation-webui/characters
|
||||
- ./extensions:/home/app/text-generation-webui/extensions
|
||||
- ./loras:/home/app/text-generation-webui/loras
|
||||
- ./models:/home/app/text-generation-webui/models
|
||||
- ./presets:/home/app/text-generation-webui/presets
|
||||
- ./prompts:/home/app/text-generation-webui/prompts
|
||||
- ./softprompts:/home/app/text-generation-webui/softprompts
|
||||
- ./training:/home/app/text-generation-webui/training
|
||||
- ./cloudflared:/etc/cloudflared
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
device_ids: ['0']
|
||||
count: all
|
||||
capabilities: [gpu]
|
||||
|
56
docker/nvidia/Dockerfile
Normal file
56
docker/nvidia/Dockerfile
Normal file
@ -0,0 +1,56 @@
|
||||
# BUILDER
|
||||
FROM nvidia/cuda:12.1.1-devel-ubuntu22.04 as builder
|
||||
WORKDIR /builder
|
||||
ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX}"
|
||||
ARG BUILD_EXTENSIONS="${BUILD_EXTENSIONS:-}"
|
||||
ARG APP_UID="${APP_UID:-6972}"
|
||||
ARG APP_GID="${APP_GID:-6972}"
|
||||
# create / update build env
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
|
||||
apt update && \
|
||||
apt install --no-install-recommends -y git vim build-essential python3-dev pip && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
RUN --mount=type=cache,target=/root/.cache/pip,rw \
|
||||
pip3 install --global --upgrade pip wheel setuptools && \
|
||||
# make shared builder & runtime app user
|
||||
addgroup --gid $APP_GID app_grp && \
|
||||
useradd -m -u $APP_UID --gid app_grp app
|
||||
USER app:app_grp
|
||||
# build wheels for runtime
|
||||
WORKDIR /home/app/build
|
||||
COPY --chown=app:app_grp requirements.txt /home/app/build
|
||||
COPY --chown=app:app_grp extensions /home/app/build/extensions
|
||||
RUN --mount=type=cache,target=/root/.cache/pip,rw \
|
||||
# build all requirements files as wheel dists
|
||||
pip3 wheel -w wheels -r requirements.txt `echo "$BUILD_EXTENSIONS" | sed -r 's/([^,]+)\s*,?\s*/ -r \/home\/app\/build\/extensions\/\1\/requirements.txt/g'`
|
||||
# drop wheel and setuptools .whl to avoid install issues
|
||||
RUN rm wheels/setuptools*.whl
|
||||
|
||||
# RUNTIME
|
||||
FROM nvidia/cuda:12.1.1-runtime-ubuntu22.04
|
||||
ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6}"
|
||||
ARG APP_UID="${APP_UID:-6972}"
|
||||
ARG APP_GID="${APP_GID:-6972}"
|
||||
ENV CLI_ARGS=""
|
||||
# create / update runtime env
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
|
||||
apt update && \
|
||||
apt install --no-install-recommends -y git python3 pip && \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
pip3 install --global --no-cache --upgrade pip wheel setuptools && \
|
||||
# make shared builder & runtime app user
|
||||
addgroup --gid $APP_GID app_grp && \
|
||||
useradd -m -u $APP_UID --gid app_grp app
|
||||
USER app:app_grp
|
||||
# install locally built wheels for app
|
||||
WORKDIR /home/app/wheels
|
||||
COPY --from=builder /home/app/build/wheels /home/app/wheels
|
||||
COPY --chown=app:app_grp . /home/app/text-generation-webui
|
||||
RUN umask 0002 && \
|
||||
chmod g+rwX /home/app/text-generation-webui && \
|
||||
pip3 install --global --no-build-isolation --no-cache --no-index ./*.whl && \
|
||||
rm -r /home/app/wheels
|
||||
WORKDIR /home/app/text-generation-webui
|
||||
EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005}
|
||||
# set umask to ensure group read / write at runtime
|
||||
CMD umask 0002 && export HOME=/home/app && python3 server.py ${CLI_ARGS}
|
Loading…
x
Reference in New Issue
Block a user