From 08b9d1b23a0e47a977fb73a64261054fa6e00e93 Mon Sep 17 00:00:00 2001
From: loeken <loeken@internetz.me>
Date: Fri, 7 Apr 2023 03:46:04 +0200
Subject: [PATCH] creating a layer with Docker/docker-compose (#633)

---
 .dockerignore         |  10 +++++
 .env.example          |  25 +++++++++++
 Dockerfile            |  61 +++++++++++++++++++++++++
 README.md             |   2 +-
 docker-compose.yml    |  32 +++++++++++++
 docs/README_docker.md | 101 ++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 230 insertions(+), 1 deletion(-)
 create mode 100644 .dockerignore
 create mode 100644 .env.example
 create mode 100644 Dockerfile
 create mode 100644 docker-compose.yml
 create mode 100644 docs/README_docker.md

diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 00000000..fdf0c4ce
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,10 @@
+.env
+Dockerfile
+/characters
+/extensions
+/loras
+/models
+/presets
+/prompts
+/softprompts
+/training
diff --git a/.env.example b/.env.example
new file mode 100644
index 00000000..d20300b7
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,25 @@
+# by default the Dockerfile specifies these versions: 3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX
+# however for me to work i had to specify the exact version for my card ( 2060 ) it was 7.5
+# https://developer.nvidia.com/cuda-gpus you can find the version for your card here
+TORCH_CUDA_ARCH_LIST=7.5
+
+# these commands worked for me with roughly 4.5GB of vram
+CLI_ARGS=--model llama-7b-4bit --wbits 4 --listen --auto-devices
+
+# the following examples have been tested with the files linked in docs/README_docker.md:
+# example running 13b with 4bit/128 groupsize        : CLI_ARGS=--model llama-13b-4bit-128g --wbits 4 --listen --groupsize 128 --pre_layer 25
+# example with loading api extension and public share: CLI_ARGS=--model llama-7b-4bit --wbits 4 --listen --auto-devices --no-stream --extensions api --share
+# example running 7b with 8bit groupsize             : CLI_ARGS=--model llama-7b --load-in-8bit --listen --auto-devices
+
+# the port the webui binds to on the host
+HOST_PORT=7860
+# the port the webui binds to inside the container
+CONTAINER_PORT=7860
+
+# the port the api binds to on the host
+HOST_API_PORT=5000
+# the port the api binds to inside the container
+CONTAINER_API_PORT=5000
+
+# the version used to install text-generation-webui from
+WEBUI_VERSION=HEAD
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 00000000..8a063539
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,61 @@
+FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as builder
+
+RUN apt-get update && \
+    apt-get install --no-install-recommends -y git vim build-essential python3-dev python3-venv && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN git clone https://github.com/oobabooga/GPTQ-for-LLaMa /build
+
+WORKDIR /build
+
+RUN python3 -m venv /build/venv
+RUN . /build/venv/bin/activate && \
+    pip3 install --upgrade pip setuptools && \
+    pip3 install torch torchvision torchaudio && \
+    pip3 install -r requirements.txt
+
+# https://developer.nvidia.com/cuda-gpus
+# for a rtx 2060: ARG TORCH_CUDA_ARCH_LIST="7.5"
+ARG TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"
+RUN . /build/venv/bin/activate && \
+    python3 setup_cuda.py bdist_wheel -d .
+
+FROM nvidia/cuda:11.8.0-runtime-ubuntu22.04
+
+LABEL maintainer="Your Name <your.email@example.com>"
+LABEL description="Docker image for GPTQ-for-LLaMa and Text Generation WebUI"
+
+RUN apt-get update && \
+    apt-get install --no-install-recommends -y git python3 python3-pip && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN --mount=type=cache,target=/root/.cache/pip pip3 install virtualenv
+
+COPY . /app/
+
+WORKDIR /app
+
+ARG WEBUI_VERSION
+RUN test -n "${WEBUI_VERSION}" && git reset --hard ${WEBUI_VERSION} || echo "Using provided webui source"
+
+RUN virtualenv /app/venv
+RUN . /app/venv/bin/activate && \
+    pip3 install --upgrade pip setuptools && \
+    pip3 install torch torchvision torchaudio && \
+    pip3 install -r requirements.txt
+
+COPY --from=builder /build /app/repositories/GPTQ-for-LLaMa
+RUN . /app/venv/bin/activate && \
+    pip3 install /app/repositories/GPTQ-for-LLaMa/*.whl
+
+ENV CLI_ARGS=""
+
+RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/api && pip3 install -r requirements.txt
+RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/elevenlabs_tts && pip3 install -r requirements.txt
+RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/google_translate && pip3 install -r requirements.txt
+RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/silero_tts && pip3 install -r requirements.txt
+RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/whisper_stt && pip3 install -r requirements.txt
+
+RUN cp /app/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda118.so /app/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so
+
+CMD . /app/venv/bin/activate && python3 server.py ${CLI_ARGS}
diff --git a/README.md b/README.md
index c4dd01df..904f6547 100644
--- a/README.md
+++ b/README.md
@@ -117,7 +117,7 @@ As an alternative to the recommended WSL method, you can install the web UI nati
 
 ### Alternative: Docker
 
-https://github.com/oobabooga/text-generation-webui/issues/174, https://github.com/oobabooga/text-generation-webui/issues/87
+[docker/docker-compose instructions](docs/README_docker.md)
 
 ### Updating the requirements
 
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 00000000..509caee2
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,32 @@
+version: "3.3"
+services:
+  text-generation-webui:
+    build:
+      context: .
+      args:
+        # specify which cuda version your card supports: https://developer.nvidia.com/cuda-gpus
+        TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST}
+        GPTQ_VERSION: ${GPTQ_VERSION}
+        WEBUI_VERSION: ${WEBUI_VERSION}
+    env_file: .env
+    ports:
+      - "${HOST_PORT}:${CONTAINER_PORT}"
+      - "${HOST_API_PORT}:${CONTAINER_API_PORT}"
+    stdin_open: true
+    tty: true
+    volumes:
+      - ./characters:/app/characters
+      - ./extensions:/app/extensions
+      - ./loras:/app/loras
+      - ./models:/app/models
+      - ./presets:/app/presets
+      - ./prompts:/app/prompts
+      - ./softprompts:/app/softprompts
+      - ./training:/app/training
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              device_ids: ['0']
+              capabilities: [gpu]
diff --git a/docs/README_docker.md b/docs/README_docker.md
new file mode 100644
index 00000000..bdd00748
--- /dev/null
+++ b/docs/README_docker.md
@@ -0,0 +1,101 @@
+- [Linux](#linux)
+  - [Ubuntu 22.04](#ubuntu-2204)
+    - [update the drivers](#update-the-drivers)
+    - [reboot](#reboot)
+    - [docker \& container toolkit](#docker--container-toolkit)
+  - [Manjaro](#manjaro)
+    - [update the drivers](#update-the-drivers-1)
+    - [reboot](#reboot-1)
+    - [docker \& container toolkit](#docker--container-toolkit-1)
+  - [prepare environment \& startup](#prepare-environment--startup)
+    - [place models in models folder](#place-models-in-models-folder)
+    - [prepare .env file](#prepare-env-file)
+    - [startup docker container](#startup-docker-container)
+- [Windows](#windows)
+# Linux
+
+## Ubuntu 22.04
+
+### update the drivers
+in the the “software updater” update drivers to the last version of the prop driver.
+
+### reboot
+to switch using to new driver
+
+```bash
+sudo apt update
+sudo apt-get install curl
+
+sudo mkdir -m 0755 -p /etc/apt/keyrings
+curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg
+
+echo \
+  "deb [arch="$(dpkg --print-architecture)" signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \
+  "$(. /etc/os-release && echo "$VERSION_CODENAME")" stable" | \
+  sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
+
+sudo apt update
+sudo apt-get install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin docker-compose -y
+
+sudo usermod -aG docker $USER
+newgrp docker
+```
+
+### docker & container toolkit
+```bash
+curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
+
+echo "deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://nvidia.github.io/libnvidia-container/stable/ubuntu22.04/amd64 /" | \
+sudo tee /etc/apt/sources.list.d/nvidia.list > /dev/null 
+
+sudo apt update
+
+sudo apt install nvidia-docker2 nvidia-container-runtime -y
+sudo systemctl restart docker
+```
+
+## Manjaro
+
+### update the drivers
+```bash
+sudo mhwd -a pci nonfree 0300
+```
+### reboot
+```bash
+reboot
+```
+### docker & container toolkit
+```bash
+yay -S docker docker-compose buildkit gcc nvidia-docker
+sudo usermod -aG docker $USER
+newgrp docker
+sudo systemctl restart docker # required by nvidia-container-runtime
+```
+
+## prepare environment & startup
+
+### place models in models folder
+download and place the models inside the models folder. tested with:
+
+4bit
+https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1483891617
+https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1483941105
+
+8bit:
+https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1484235789
+
+### prepare .env file
+edit .env values to your needs
+```bash
+cp .env.example .env
+nano .env
+```
+
+### startup docker container
+```bash
+docker-compose up --build
+```
+
+
+# Windows
+coming soon
\ No newline at end of file