diff --git a/.devops/main-cuda.Dockerfile b/.devops/llama-cuda.Dockerfile similarity index 100% rename from .devops/main-cuda.Dockerfile rename to .devops/llama-cuda.Dockerfile diff --git a/.devops/main-intel.Dockerfile b/.devops/llama-intel.Dockerfile similarity index 100% rename from .devops/main-intel.Dockerfile rename to .devops/llama-intel.Dockerfile diff --git a/.devops/main-rocm.Dockerfile b/.devops/llama-rocm.Dockerfile similarity index 100% rename from .devops/main-rocm.Dockerfile rename to .devops/llama-rocm.Dockerfile diff --git a/.devops/server-cuda.Dockerfile b/.devops/llama-server-cuda.Dockerfile similarity index 100% rename from .devops/server-cuda.Dockerfile rename to .devops/llama-server-cuda.Dockerfile diff --git a/.devops/server-intel.Dockerfile b/.devops/llama-server-intel.Dockerfile similarity index 100% rename from .devops/server-intel.Dockerfile rename to .devops/llama-server-intel.Dockerfile diff --git a/.devops/server-rocm.Dockerfile b/.devops/llama-server-rocm.Dockerfile similarity index 100% rename from .devops/server-rocm.Dockerfile rename to .devops/llama-server-rocm.Dockerfile diff --git a/.devops/server-vulkan.Dockerfile b/.devops/llama-server-vulkan.Dockerfile similarity index 100% rename from .devops/server-vulkan.Dockerfile rename to .devops/llama-server-vulkan.Dockerfile diff --git a/.devops/server.Dockerfile b/.devops/llama-server.Dockerfile similarity index 100% rename from .devops/server.Dockerfile rename to .devops/llama-server.Dockerfile diff --git a/.devops/main-vulkan.Dockerfile b/.devops/llama-vulkan.Dockerfile similarity index 100% rename from .devops/main-vulkan.Dockerfile rename to .devops/llama-vulkan.Dockerfile diff --git a/.devops/main.Dockerfile b/.devops/llama.Dockerfile similarity index 100% rename from .devops/main.Dockerfile rename to .devops/llama.Dockerfile diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 9b03d19bc..276854b71 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -30,20 +30,20 @@ jobs: strategy: matrix: config: - - { tag: "light", dockerfile: ".devops/main.Dockerfile", platforms: "linux/amd64,linux/arm64" } + - { tag: "light", dockerfile: ".devops/llama.Dockerfile", platforms: "linux/amd64,linux/arm64" } - { tag: "full", dockerfile: ".devops/full.Dockerfile", platforms: "linux/amd64,linux/arm64" } - - { tag: "server", dockerfile: ".devops/server.Dockerfile", platforms: "linux/amd64,linux/arm64" } + - { tag: "server", dockerfile: ".devops/llama-server.Dockerfile", platforms: "linux/amd64,linux/arm64" } # NOTE(canardletter): The CUDA builds on arm64 are very slow, so I # have disabled them for now until the reason why # is understood. - - { tag: "light-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platforms: "linux/amd64" } + - { tag: "light-cuda", dockerfile: ".devops/llama-cuda.Dockerfile", platforms: "linux/amd64" } - { tag: "full-cuda", dockerfile: ".devops/full-cuda.Dockerfile", platforms: "linux/amd64" } - - { tag: "server-cuda", dockerfile: ".devops/server-cuda.Dockerfile", platforms: "linux/amd64" } - - { tag: "light-rocm", dockerfile: ".devops/main-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" } + - { tag: "server-cuda", dockerfile: ".devops/llama-server-cuda.Dockerfile", platforms: "linux/amd64" } + - { tag: "light-rocm", dockerfile: ".devops/llama-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" } - { tag: "full-rocm", dockerfile: ".devops/full-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" } - - { tag: "server-rocm", dockerfile: ".devops/server-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" } - - { tag: "light-intel", dockerfile: ".devops/main-intel.Dockerfile", platforms: "linux/amd64" } - - { tag: "server-intel", dockerfile: ".devops/server-intel.Dockerfile", platforms: "linux/amd64" } + - { tag: "server-rocm", dockerfile: ".devops/llama-server-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" } + - { tag: "light-intel", dockerfile: ".devops/llama-intel.Dockerfile", platforms: "linux/amd64" } + - { tag: "server-intel", dockerfile: ".devops/llama-server-intel.Dockerfile", platforms: "linux/amd64" } steps: - name: Check out the repo uses: actions/checkout@v4 diff --git a/README-sycl.md b/README-sycl.md index c7d45f3ad..ad750b797 100644 --- a/README-sycl.md +++ b/README-sycl.md @@ -99,14 +99,14 @@ The docker build option is currently limited to *intel GPU* targets. ### Build image ```sh # Using FP16 -docker build -t llama-cpp-sycl --build-arg="LLAMA_SYCL_F16=ON" -f .devops/main-intel.Dockerfile . +docker build -t llama-cpp-sycl --build-arg="LLAMA_SYCL_F16=ON" -f .devops/llama-intel.Dockerfile . ``` *Notes*: To build in default FP32 *(Slower than FP16 alternative)*, you can remove the `--build-arg="LLAMA_SYCL_F16=ON"` argument from the previous command. -You can also use the `.devops/server-intel.Dockerfile`, which builds the *"server"* alternative. +You can also use the `.devops/llama-server-intel.Dockerfile`, which builds the *"server"* alternative. ### Run container diff --git a/README.md b/README.md index d4b895df3..1f077cd15 100644 --- a/README.md +++ b/README.md @@ -556,7 +556,7 @@ Building the program with BLAS support may lead to some performance improvements ```sh # Build the image - docker build -t llama-cpp-vulkan -f .devops/main-vulkan.Dockerfile . + docker build -t llama-cpp-vulkan -f .devops/llama-vulkan.Dockerfile . # Then, use it: docker run -it --rm -v "$(pwd):/app:Z" --device /dev/dri/renderD128:/dev/dri/renderD128 --device /dev/dri/card1:/dev/dri/card1 llama-cpp-vulkan -m "/app/models/YOUR_MODEL_FILE" -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33 @@ -936,8 +936,8 @@ Assuming one has the [nvidia-container-toolkit](https://github.com/NVIDIA/nvidia ```bash docker build -t local/llama.cpp:full-cuda -f .devops/full-cuda.Dockerfile . -docker build -t local/llama.cpp:light-cuda -f .devops/main-cuda.Dockerfile . -docker build -t local/llama.cpp:server-cuda -f .devops/server-cuda.Dockerfile . +docker build -t local/llama.cpp:light-cuda -f .devops/llama-cuda.Dockerfile . +docker build -t local/llama.cpp:server-cuda -f .devops/llama-server-cuda.Dockerfile . ``` You may want to pass in some different `ARGS`, depending on the CUDA environment supported by your container host, as well as the GPU architecture.