mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-11-22 08:07:56 +01:00
creating a layer with Docker/docker-compose (#633)
This commit is contained in:
parent
64bcde56ab
commit
08b9d1b23a
10
.dockerignore
Normal file
10
.dockerignore
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
.env
|
||||||
|
Dockerfile
|
||||||
|
/characters
|
||||||
|
/extensions
|
||||||
|
/loras
|
||||||
|
/models
|
||||||
|
/presets
|
||||||
|
/prompts
|
||||||
|
/softprompts
|
||||||
|
/training
|
25
.env.example
Normal file
25
.env.example
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
# by default the Dockerfile specifies these versions: 3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX
|
||||||
|
# however for me to work i had to specify the exact version for my card ( 2060 ) it was 7.5
|
||||||
|
# https://developer.nvidia.com/cuda-gpus you can find the version for your card here
|
||||||
|
TORCH_CUDA_ARCH_LIST=7.5
|
||||||
|
|
||||||
|
# these commands worked for me with roughly 4.5GB of vram
|
||||||
|
CLI_ARGS=--model llama-7b-4bit --wbits 4 --listen --auto-devices
|
||||||
|
|
||||||
|
# the following examples have been tested with the files linked in docs/README_docker.md:
|
||||||
|
# example running 13b with 4bit/128 groupsize : CLI_ARGS=--model llama-13b-4bit-128g --wbits 4 --listen --groupsize 128 --pre_layer 25
|
||||||
|
# example with loading api extension and public share: CLI_ARGS=--model llama-7b-4bit --wbits 4 --listen --auto-devices --no-stream --extensions api --share
|
||||||
|
# example running 7b with 8bit groupsize : CLI_ARGS=--model llama-7b --load-in-8bit --listen --auto-devices
|
||||||
|
|
||||||
|
# the port the webui binds to on the host
|
||||||
|
HOST_PORT=7860
|
||||||
|
# the port the webui binds to inside the container
|
||||||
|
CONTAINER_PORT=7860
|
||||||
|
|
||||||
|
# the port the api binds to on the host
|
||||||
|
HOST_API_PORT=5000
|
||||||
|
# the port the api binds to inside the container
|
||||||
|
CONTAINER_API_PORT=5000
|
||||||
|
|
||||||
|
# the version used to install text-generation-webui from
|
||||||
|
WEBUI_VERSION=HEAD
|
61
Dockerfile
Normal file
61
Dockerfile
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as builder
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install --no-install-recommends -y git vim build-essential python3-dev python3-venv && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
RUN git clone https://github.com/oobabooga/GPTQ-for-LLaMa /build
|
||||||
|
|
||||||
|
WORKDIR /build
|
||||||
|
|
||||||
|
RUN python3 -m venv /build/venv
|
||||||
|
RUN . /build/venv/bin/activate && \
|
||||||
|
pip3 install --upgrade pip setuptools && \
|
||||||
|
pip3 install torch torchvision torchaudio && \
|
||||||
|
pip3 install -r requirements.txt
|
||||||
|
|
||||||
|
# https://developer.nvidia.com/cuda-gpus
|
||||||
|
# for a rtx 2060: ARG TORCH_CUDA_ARCH_LIST="7.5"
|
||||||
|
ARG TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"
|
||||||
|
RUN . /build/venv/bin/activate && \
|
||||||
|
python3 setup_cuda.py bdist_wheel -d .
|
||||||
|
|
||||||
|
FROM nvidia/cuda:11.8.0-runtime-ubuntu22.04
|
||||||
|
|
||||||
|
LABEL maintainer="Your Name <your.email@example.com>"
|
||||||
|
LABEL description="Docker image for GPTQ-for-LLaMa and Text Generation WebUI"
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install --no-install-recommends -y git python3 python3-pip && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/pip pip3 install virtualenv
|
||||||
|
|
||||||
|
COPY . /app/
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
ARG WEBUI_VERSION
|
||||||
|
RUN test -n "${WEBUI_VERSION}" && git reset --hard ${WEBUI_VERSION} || echo "Using provided webui source"
|
||||||
|
|
||||||
|
RUN virtualenv /app/venv
|
||||||
|
RUN . /app/venv/bin/activate && \
|
||||||
|
pip3 install --upgrade pip setuptools && \
|
||||||
|
pip3 install torch torchvision torchaudio && \
|
||||||
|
pip3 install -r requirements.txt
|
||||||
|
|
||||||
|
COPY --from=builder /build /app/repositories/GPTQ-for-LLaMa
|
||||||
|
RUN . /app/venv/bin/activate && \
|
||||||
|
pip3 install /app/repositories/GPTQ-for-LLaMa/*.whl
|
||||||
|
|
||||||
|
ENV CLI_ARGS=""
|
||||||
|
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/api && pip3 install -r requirements.txt
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/elevenlabs_tts && pip3 install -r requirements.txt
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/google_translate && pip3 install -r requirements.txt
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/silero_tts && pip3 install -r requirements.txt
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/whisper_stt && pip3 install -r requirements.txt
|
||||||
|
|
||||||
|
RUN cp /app/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda118.so /app/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so
|
||||||
|
|
||||||
|
CMD . /app/venv/bin/activate && python3 server.py ${CLI_ARGS}
|
@ -117,7 +117,7 @@ As an alternative to the recommended WSL method, you can install the web UI nati
|
|||||||
|
|
||||||
### Alternative: Docker
|
### Alternative: Docker
|
||||||
|
|
||||||
https://github.com/oobabooga/text-generation-webui/issues/174, https://github.com/oobabooga/text-generation-webui/issues/87
|
[docker/docker-compose instructions](docs/README_docker.md)
|
||||||
|
|
||||||
### Updating the requirements
|
### Updating the requirements
|
||||||
|
|
||||||
|
32
docker-compose.yml
Normal file
32
docker-compose.yml
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
version: "3.3"
|
||||||
|
services:
|
||||||
|
text-generation-webui:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
args:
|
||||||
|
# specify which cuda version your card supports: https://developer.nvidia.com/cuda-gpus
|
||||||
|
TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST}
|
||||||
|
GPTQ_VERSION: ${GPTQ_VERSION}
|
||||||
|
WEBUI_VERSION: ${WEBUI_VERSION}
|
||||||
|
env_file: .env
|
||||||
|
ports:
|
||||||
|
- "${HOST_PORT}:${CONTAINER_PORT}"
|
||||||
|
- "${HOST_API_PORT}:${CONTAINER_API_PORT}"
|
||||||
|
stdin_open: true
|
||||||
|
tty: true
|
||||||
|
volumes:
|
||||||
|
- ./characters:/app/characters
|
||||||
|
- ./extensions:/app/extensions
|
||||||
|
- ./loras:/app/loras
|
||||||
|
- ./models:/app/models
|
||||||
|
- ./presets:/app/presets
|
||||||
|
- ./prompts:/app/prompts
|
||||||
|
- ./softprompts:/app/softprompts
|
||||||
|
- ./training:/app/training
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- driver: nvidia
|
||||||
|
device_ids: ['0']
|
||||||
|
capabilities: [gpu]
|
101
docs/README_docker.md
Normal file
101
docs/README_docker.md
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
- [Linux](#linux)
|
||||||
|
- [Ubuntu 22.04](#ubuntu-2204)
|
||||||
|
- [update the drivers](#update-the-drivers)
|
||||||
|
- [reboot](#reboot)
|
||||||
|
- [docker \& container toolkit](#docker--container-toolkit)
|
||||||
|
- [Manjaro](#manjaro)
|
||||||
|
- [update the drivers](#update-the-drivers-1)
|
||||||
|
- [reboot](#reboot-1)
|
||||||
|
- [docker \& container toolkit](#docker--container-toolkit-1)
|
||||||
|
- [prepare environment \& startup](#prepare-environment--startup)
|
||||||
|
- [place models in models folder](#place-models-in-models-folder)
|
||||||
|
- [prepare .env file](#prepare-env-file)
|
||||||
|
- [startup docker container](#startup-docker-container)
|
||||||
|
- [Windows](#windows)
|
||||||
|
# Linux
|
||||||
|
|
||||||
|
## Ubuntu 22.04
|
||||||
|
|
||||||
|
### update the drivers
|
||||||
|
in the the “software updater” update drivers to the last version of the prop driver.
|
||||||
|
|
||||||
|
### reboot
|
||||||
|
to switch using to new driver
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo apt update
|
||||||
|
sudo apt-get install curl
|
||||||
|
|
||||||
|
sudo mkdir -m 0755 -p /etc/apt/keyrings
|
||||||
|
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg
|
||||||
|
|
||||||
|
echo \
|
||||||
|
"deb [arch="$(dpkg --print-architecture)" signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \
|
||||||
|
"$(. /etc/os-release && echo "$VERSION_CODENAME")" stable" | \
|
||||||
|
sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
|
||||||
|
|
||||||
|
sudo apt update
|
||||||
|
sudo apt-get install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin docker-compose -y
|
||||||
|
|
||||||
|
sudo usermod -aG docker $USER
|
||||||
|
newgrp docker
|
||||||
|
```
|
||||||
|
|
||||||
|
### docker & container toolkit
|
||||||
|
```bash
|
||||||
|
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
|
||||||
|
|
||||||
|
echo "deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://nvidia.github.io/libnvidia-container/stable/ubuntu22.04/amd64 /" | \
|
||||||
|
sudo tee /etc/apt/sources.list.d/nvidia.list > /dev/null
|
||||||
|
|
||||||
|
sudo apt update
|
||||||
|
|
||||||
|
sudo apt install nvidia-docker2 nvidia-container-runtime -y
|
||||||
|
sudo systemctl restart docker
|
||||||
|
```
|
||||||
|
|
||||||
|
## Manjaro
|
||||||
|
|
||||||
|
### update the drivers
|
||||||
|
```bash
|
||||||
|
sudo mhwd -a pci nonfree 0300
|
||||||
|
```
|
||||||
|
### reboot
|
||||||
|
```bash
|
||||||
|
reboot
|
||||||
|
```
|
||||||
|
### docker & container toolkit
|
||||||
|
```bash
|
||||||
|
yay -S docker docker-compose buildkit gcc nvidia-docker
|
||||||
|
sudo usermod -aG docker $USER
|
||||||
|
newgrp docker
|
||||||
|
sudo systemctl restart docker # required by nvidia-container-runtime
|
||||||
|
```
|
||||||
|
|
||||||
|
## prepare environment & startup
|
||||||
|
|
||||||
|
### place models in models folder
|
||||||
|
download and place the models inside the models folder. tested with:
|
||||||
|
|
||||||
|
4bit
|
||||||
|
https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1483891617
|
||||||
|
https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1483941105
|
||||||
|
|
||||||
|
8bit:
|
||||||
|
https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1484235789
|
||||||
|
|
||||||
|
### prepare .env file
|
||||||
|
edit .env values to your needs
|
||||||
|
```bash
|
||||||
|
cp .env.example .env
|
||||||
|
nano .env
|
||||||
|
```
|
||||||
|
|
||||||
|
### startup docker container
|
||||||
|
```bash
|
||||||
|
docker-compose up --build
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
# Windows
|
||||||
|
coming soon
|
Loading…
Reference in New Issue
Block a user