creating a layer with Docker/docker-compose (#633)

2024-11-22 08:07:56 +01:00 · 2023-04-07 03:46:04 +02:00 · 2023-04-07 03:46:04 +02:00 · 08b9d1b23a
commit 08b9d1b23a
parent 64bcde56ab
6 changed files with 230 additions and 1 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -0,0 +1,10 @@
 .env
 Dockerfile
 /characters
 /extensions
 /loras
 /models
 /presets
 /prompts
 /softprompts
 /training
--- a/.env.example
+++ b/.env.example
@ -0,0 +1,25 @@
 # by default the Dockerfile specifies these versions: 3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX
 # however for me to work i had to specify the exact version for my card ( 2060 ) it was 7.5
 # https://developer.nvidia.com/cuda-gpus you can find the version for your card here
 TORCH_CUDA_ARCH_LIST=7.5
 # these commands worked for me with roughly 4.5GB of vram
 CLI_ARGS=--model llama-7b-4bit --wbits 4 --listen --auto-devices
 # the following examples have been tested with the files linked in docs/README_docker.md:
 # example running 13b with 4bit/128 groupsize        : CLI_ARGS=--model llama-13b-4bit-128g --wbits 4 --listen --groupsize 128 --pre_layer 25
 # example with loading api extension and public share: CLI_ARGS=--model llama-7b-4bit --wbits 4 --listen --auto-devices --no-stream --extensions api --share
 # example running 7b with 8bit groupsize             : CLI_ARGS=--model llama-7b --load-in-8bit --listen --auto-devices
 # the port the webui binds to on the host
 HOST_PORT=7860
 # the port the webui binds to inside the container
 CONTAINER_PORT=7860
 # the port the api binds to on the host
 HOST_API_PORT=5000
 # the port the api binds to inside the container
 CONTAINER_API_PORT=5000
 # the version used to install text-generation-webui from
 WEBUI_VERSION=HEAD
--- a/61
+++ b/61
@ -0,0 +1,61 @@
 FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as builder
 RUN apt-get update && \
    apt-get install --no-install-recommends -y git vim build-essential python3-dev python3-venv && \
    rm -rf /var/lib/apt/lists/*
 RUN git clone https://github.com/oobabooga/GPTQ-for-LLaMa /build
 WORKDIR /build
 RUN python3 -m venv /build/venv
 RUN . /build/venv/bin/activate && \
    pip3 install --upgrade pip setuptools && \
    pip3 install torch torchvision torchaudio && \
    pip3 install -r requirements.txt
 # https://developer.nvidia.com/cuda-gpus
 # for a rtx 2060: ARG TORCH_CUDA_ARCH_LIST="7.5"
 ARG TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"
 RUN . /build/venv/bin/activate && \
    python3 setup_cuda.py bdist_wheel -d .
 FROM nvidia/cuda:11.8.0-runtime-ubuntu22.04
 LABEL maintainer="Your Name <your.email@example.com>"
 LABEL description="Docker image for GPTQ-for-LLaMa and Text Generation WebUI"
 RUN apt-get update && \
    apt-get install --no-install-recommends -y git python3 python3-pip && \
    rm -rf /var/lib/apt/lists/*
 RUN --mount=type=cache,target=/root/.cache/pip pip3 install virtualenv
 COPY . /app/
 WORKDIR /app
 ARG WEBUI_VERSION
 RUN test -n "${WEBUI_VERSION}" && git reset --hard ${WEBUI_VERSION} || echo "Using provided webui source"
 RUN virtualenv /app/venv
 RUN . /app/venv/bin/activate && \
    pip3 install --upgrade pip setuptools && \
    pip3 install torch torchvision torchaudio && \
    pip3 install -r requirements.txt
 COPY --from=builder /build /app/repositories/GPTQ-for-LLaMa
 RUN . /app/venv/bin/activate && \
    pip3 install /app/repositories/GPTQ-for-LLaMa/*.whl
 ENV CLI_ARGS=""
 RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/api && pip3 install -r requirements.txt
 RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/elevenlabs_tts && pip3 install -r requirements.txt
 RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/google_translate && pip3 install -r requirements.txt
 RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/silero_tts && pip3 install -r requirements.txt
 RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/whisper_stt && pip3 install -r requirements.txt
 RUN cp /app/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda118.so /app/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so
 CMD . /app/venv/bin/activate && python3 server.py ${CLI_ARGS}
--- a/README.md
+++ b/README.md
@ -117,7 +117,7 @@ As an alternative to the recommended WSL method, you can install the web UI nati
 ### Alternative: Docker
-https://github.com/oobabooga/text-generation-webui/issues/174, https://github.com/oobabooga/text-generation-webui/issues/87
+[docker/docker-compose instructions](docs/README_docker.md)
 ### Updating the requirements
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -0,0 +1,32 @@
 version: "3.3"
 services:
  text-generation-webui:
    build:
      context: .
      args:
        # specify which cuda version your card supports: https://developer.nvidia.com/cuda-gpus
        TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST}
        GPTQ_VERSION: ${GPTQ_VERSION}
        WEBUI_VERSION: ${WEBUI_VERSION}
    env_file: .env
    ports:
      - "${HOST_PORT}:${CONTAINER_PORT}"
      - "${HOST_API_PORT}:${CONTAINER_API_PORT}"
    stdin_open: true
    tty: true
    volumes:
      - ./characters:/app/characters
      - ./extensions:/app/extensions
      - ./loras:/app/loras
      - ./models:/app/models
      - ./presets:/app/presets
      - ./prompts:/app/prompts
      - ./softprompts:/app/softprompts
      - ./training:/app/training
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              device_ids: ['0']
              capabilities: [gpu]
--- a/docs/README_docker.md
+++ b/docs/README_docker.md
@ -0,0 +1,101 @@
 - [Linux](#linux)
  - [Ubuntu 22.04](#ubuntu-2204)
    - [update the drivers](#update-the-drivers)
    - [reboot](#reboot)
    - [docker \& container toolkit](#docker--container-toolkit)
  - [Manjaro](#manjaro)
    - [update the drivers](#update-the-drivers-1)
    - [reboot](#reboot-1)
    - [docker \& container toolkit](#docker--container-toolkit-1)
  - [prepare environment \& startup](#prepare-environment--startup)
    - [place models in models folder](#place-models-in-models-folder)
    - [prepare .env file](#prepare-env-file)
    - [startup docker container](#startup-docker-container)
 - [Windows](#windows)
 # Linux
 ## Ubuntu 22.04
 ### update the drivers
 in the the “software updater” update drivers to the last version of the prop driver.
 ### reboot
 to switch using to new driver
 ```bash
 sudo apt update
 sudo apt-get install curl
 sudo mkdir -m 0755 -p /etc/apt/keyrings
 curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg
 echo \
  "deb [arch="$(dpkg --print-architecture)" signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \
  "$(. /etc/os-release && echo "$VERSION_CODENAME")" stable" | \
  sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
 sudo apt update
 sudo apt-get install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin docker-compose -y
 sudo usermod -aG docker $USER
 newgrp docker
 ```
 ### docker & container toolkit
 ```bash
 curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
 echo "deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://nvidia.github.io/libnvidia-container/stable/ubuntu22.04/amd64 /" | \
 sudo tee /etc/apt/sources.list.d/nvidia.list > /dev/null 
 sudo apt update
 sudo apt install nvidia-docker2 nvidia-container-runtime -y
 sudo systemctl restart docker
 ```
 ## Manjaro
 ### update the drivers
 ```bash
 sudo mhwd -a pci nonfree 0300
 ```
 ### reboot
 ```bash
 reboot
 ```
 ### docker & container toolkit
 ```bash
 yay -S docker docker-compose buildkit gcc nvidia-docker
 sudo usermod -aG docker $USER
 newgrp docker
 sudo systemctl restart docker # required by nvidia-container-runtime
 ```
 ## prepare environment & startup
 ### place models in models folder
 download and place the models inside the models folder. tested with:
 4bit
 https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1483891617
 https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1483941105
 8bit:
 https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1484235789
 ### prepare .env file
 edit .env values to your needs
 ```bash
 cp .env.example .env
 nano .env
 ```
 ### startup docker container
 ```bash
 docker-compose up --build
 ```
 # Windows
 coming soon