From 08b9d1b23a0e47a977fb73a64261054fa6e00e93 Mon Sep 17 00:00:00 2001 From: loeken Date: Fri, 7 Apr 2023 03:46:04 +0200 Subject: [PATCH] creating a layer with Docker/docker-compose (#633) --- .dockerignore | 10 +++++ .env.example | 25 +++++++++++ Dockerfile | 61 +++++++++++++++++++++++++ README.md | 2 +- docker-compose.yml | 32 +++++++++++++ docs/README_docker.md | 101 ++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 230 insertions(+), 1 deletion(-) create mode 100644 .dockerignore create mode 100644 .env.example create mode 100644 Dockerfile create mode 100644 docker-compose.yml create mode 100644 docs/README_docker.md diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..fdf0c4ce --- /dev/null +++ b/.dockerignore @@ -0,0 +1,10 @@ +.env +Dockerfile +/characters +/extensions +/loras +/models +/presets +/prompts +/softprompts +/training diff --git a/.env.example b/.env.example new file mode 100644 index 00000000..d20300b7 --- /dev/null +++ b/.env.example @@ -0,0 +1,25 @@ +# by default the Dockerfile specifies these versions: 3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX +# however for me to work i had to specify the exact version for my card ( 2060 ) it was 7.5 +# https://developer.nvidia.com/cuda-gpus you can find the version for your card here +TORCH_CUDA_ARCH_LIST=7.5 + +# these commands worked for me with roughly 4.5GB of vram +CLI_ARGS=--model llama-7b-4bit --wbits 4 --listen --auto-devices + +# the following examples have been tested with the files linked in docs/README_docker.md: +# example running 13b with 4bit/128 groupsize : CLI_ARGS=--model llama-13b-4bit-128g --wbits 4 --listen --groupsize 128 --pre_layer 25 +# example with loading api extension and public share: CLI_ARGS=--model llama-7b-4bit --wbits 4 --listen --auto-devices --no-stream --extensions api --share +# example running 7b with 8bit groupsize : CLI_ARGS=--model llama-7b --load-in-8bit --listen --auto-devices + +# the port the webui binds to on the host +HOST_PORT=7860 +# the port the webui binds to inside the container +CONTAINER_PORT=7860 + +# the port the api binds to on the host +HOST_API_PORT=5000 +# the port the api binds to inside the container +CONTAINER_API_PORT=5000 + +# the version used to install text-generation-webui from +WEBUI_VERSION=HEAD diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..8a063539 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,61 @@ +FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as builder + +RUN apt-get update && \ + apt-get install --no-install-recommends -y git vim build-essential python3-dev python3-venv && \ + rm -rf /var/lib/apt/lists/* + +RUN git clone https://github.com/oobabooga/GPTQ-for-LLaMa /build + +WORKDIR /build + +RUN python3 -m venv /build/venv +RUN . /build/venv/bin/activate && \ + pip3 install --upgrade pip setuptools && \ + pip3 install torch torchvision torchaudio && \ + pip3 install -r requirements.txt + +# https://developer.nvidia.com/cuda-gpus +# for a rtx 2060: ARG TORCH_CUDA_ARCH_LIST="7.5" +ARG TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX" +RUN . /build/venv/bin/activate && \ + python3 setup_cuda.py bdist_wheel -d . + +FROM nvidia/cuda:11.8.0-runtime-ubuntu22.04 + +LABEL maintainer="Your Name " +LABEL description="Docker image for GPTQ-for-LLaMa and Text Generation WebUI" + +RUN apt-get update && \ + apt-get install --no-install-recommends -y git python3 python3-pip && \ + rm -rf /var/lib/apt/lists/* + +RUN --mount=type=cache,target=/root/.cache/pip pip3 install virtualenv + +COPY . /app/ + +WORKDIR /app + +ARG WEBUI_VERSION +RUN test -n "${WEBUI_VERSION}" && git reset --hard ${WEBUI_VERSION} || echo "Using provided webui source" + +RUN virtualenv /app/venv +RUN . /app/venv/bin/activate && \ + pip3 install --upgrade pip setuptools && \ + pip3 install torch torchvision torchaudio && \ + pip3 install -r requirements.txt + +COPY --from=builder /build /app/repositories/GPTQ-for-LLaMa +RUN . /app/venv/bin/activate && \ + pip3 install /app/repositories/GPTQ-for-LLaMa/*.whl + +ENV CLI_ARGS="" + +RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/api && pip3 install -r requirements.txt +RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/elevenlabs_tts && pip3 install -r requirements.txt +RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/google_translate && pip3 install -r requirements.txt +RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/silero_tts && pip3 install -r requirements.txt +RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/whisper_stt && pip3 install -r requirements.txt + +RUN cp /app/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda118.so /app/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so + +CMD . /app/venv/bin/activate && python3 server.py ${CLI_ARGS} diff --git a/README.md b/README.md index c4dd01df..904f6547 100644 --- a/README.md +++ b/README.md @@ -117,7 +117,7 @@ As an alternative to the recommended WSL method, you can install the web UI nati ### Alternative: Docker -https://github.com/oobabooga/text-generation-webui/issues/174, https://github.com/oobabooga/text-generation-webui/issues/87 +[docker/docker-compose instructions](docs/README_docker.md) ### Updating the requirements diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 00000000..509caee2 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,32 @@ +version: "3.3" +services: + text-generation-webui: + build: + context: . + args: + # specify which cuda version your card supports: https://developer.nvidia.com/cuda-gpus + TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST} + GPTQ_VERSION: ${GPTQ_VERSION} + WEBUI_VERSION: ${WEBUI_VERSION} + env_file: .env + ports: + - "${HOST_PORT}:${CONTAINER_PORT}" + - "${HOST_API_PORT}:${CONTAINER_API_PORT}" + stdin_open: true + tty: true + volumes: + - ./characters:/app/characters + - ./extensions:/app/extensions + - ./loras:/app/loras + - ./models:/app/models + - ./presets:/app/presets + - ./prompts:/app/prompts + - ./softprompts:/app/softprompts + - ./training:/app/training + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ['0'] + capabilities: [gpu] diff --git a/docs/README_docker.md b/docs/README_docker.md new file mode 100644 index 00000000..bdd00748 --- /dev/null +++ b/docs/README_docker.md @@ -0,0 +1,101 @@ +- [Linux](#linux) + - [Ubuntu 22.04](#ubuntu-2204) + - [update the drivers](#update-the-drivers) + - [reboot](#reboot) + - [docker \& container toolkit](#docker--container-toolkit) + - [Manjaro](#manjaro) + - [update the drivers](#update-the-drivers-1) + - [reboot](#reboot-1) + - [docker \& container toolkit](#docker--container-toolkit-1) + - [prepare environment \& startup](#prepare-environment--startup) + - [place models in models folder](#place-models-in-models-folder) + - [prepare .env file](#prepare-env-file) + - [startup docker container](#startup-docker-container) +- [Windows](#windows) +# Linux + +## Ubuntu 22.04 + +### update the drivers +in the the “software updater” update drivers to the last version of the prop driver. + +### reboot +to switch using to new driver + +```bash +sudo apt update +sudo apt-get install curl + +sudo mkdir -m 0755 -p /etc/apt/keyrings +curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg + +echo \ + "deb [arch="$(dpkg --print-architecture)" signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ + "$(. /etc/os-release && echo "$VERSION_CODENAME")" stable" | \ + sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + +sudo apt update +sudo apt-get install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin docker-compose -y + +sudo usermod -aG docker $USER +newgrp docker +``` + +### docker & container toolkit +```bash +curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg + +echo "deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://nvidia.github.io/libnvidia-container/stable/ubuntu22.04/amd64 /" | \ +sudo tee /etc/apt/sources.list.d/nvidia.list > /dev/null + +sudo apt update + +sudo apt install nvidia-docker2 nvidia-container-runtime -y +sudo systemctl restart docker +``` + +## Manjaro + +### update the drivers +```bash +sudo mhwd -a pci nonfree 0300 +``` +### reboot +```bash +reboot +``` +### docker & container toolkit +```bash +yay -S docker docker-compose buildkit gcc nvidia-docker +sudo usermod -aG docker $USER +newgrp docker +sudo systemctl restart docker # required by nvidia-container-runtime +``` + +## prepare environment & startup + +### place models in models folder +download and place the models inside the models folder. tested with: + +4bit +https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1483891617 +https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1483941105 + +8bit: +https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1484235789 + +### prepare .env file +edit .env values to your needs +```bash +cp .env.example .env +nano .env +``` + +### startup docker container +```bash +docker-compose up --build +``` + + +# Windows +coming soon \ No newline at end of file