diff --git a/.devops/llama-cuda.Dockerfile b/.devops/llama-cuda.Dockerfile index c2a3914ed..d5ce538f6 100644 --- a/.devops/llama-cuda.Dockerfile +++ b/.devops/llama-cuda.Dockerfile @@ -23,7 +23,7 @@ ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} # Enable CUDA ENV LLAMA_CUDA=1 -RUN make -j$(nproc) llama +RUN make -j$(nproc) llama-cli FROM ${BASE_CUDA_RUN_CONTAINER} as runtime diff --git a/.devops/llama-rocm.Dockerfile b/.devops/llama-rocm.Dockerfile index 86b11b07f..7e8a6f0fa 100644 --- a/.devops/llama-rocm.Dockerfile +++ b/.devops/llama-rocm.Dockerfile @@ -40,6 +40,6 @@ ENV LLAMA_HIPBLAS=1 ENV CC=/opt/rocm/llvm/bin/clang ENV CXX=/opt/rocm/llvm/bin/clang++ -RUN make -j$(nproc) llama +RUN make -j$(nproc) llama-cli ENTRYPOINT [ "/app/llama-cli" ] diff --git a/.devops/llama.Dockerfile b/.devops/llama.Dockerfile index 21fdf2a66..38382bfc9 100644 --- a/.devops/llama.Dockerfile +++ b/.devops/llama.Dockerfile @@ -9,7 +9,7 @@ WORKDIR /app COPY . . -RUN make -j$(nproc) llama +RUN make -j$(nproc) llama-cli FROM ubuntu:$UBUNTU_VERSION as runtime diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 98e5244cb..e737ebeb3 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -240,7 +240,7 @@ jobs: echo "Fetch llama2c model" wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin ./bin/convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf - ./bin/llama -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256 + ./bin/llama-cli -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256 - name: Determine tag name id: tag