diff --git a/.devops/main-intel.Dockerfile b/.devops/main-intel.Dockerfile index 572e5d8ea..274b91b71 100644 --- a/.devops/main-intel.Dockerfile +++ b/.devops/main-intel.Dockerfile @@ -10,14 +10,12 @@ WORKDIR /app COPY . . -RUN mkdir build && \ - cd build && \ - if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \ +RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \ echo "LLAMA_SYCL_F16 is set" && \ export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \ fi && \ - cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \ - cmake --build . --config Release --target main + cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \ + cmake --build build --config Release --target main FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime diff --git a/.devops/main-vulkan.Dockerfile b/.devops/main-vulkan.Dockerfile index bca460365..6c2b2ed5b 100644 --- a/.devops/main-vulkan.Dockerfile +++ b/.devops/main-vulkan.Dockerfile @@ -14,10 +14,8 @@ RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key # Build it WORKDIR /app COPY . . -RUN mkdir build && \ - cd build && \ - cmake .. -DLLAMA_VULKAN=1 && \ - cmake --build . --config Release --target main +RUN cmake -B build -DLLAMA_VULKAN=1 && \ + cmake --build build --config Release --target main # Clean up WORKDIR / diff --git a/.devops/server-intel.Dockerfile b/.devops/server-intel.Dockerfile index 304487335..a8e451fa9 100644 --- a/.devops/server-intel.Dockerfile +++ b/.devops/server-intel.Dockerfile @@ -10,14 +10,12 @@ WORKDIR /app COPY . . -RUN mkdir build && \ - cd build && \ - if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \ +RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \ echo "LLAMA_SYCL_F16 is set" && \ export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \ fi && \ - cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \ - cmake --build . --config Release --target server + cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \ + cmake --build build --config Release --target server FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime diff --git a/.devops/server-vulkan.Dockerfile b/.devops/server-vulkan.Dockerfile index 7e5a5283b..6e757e171 100644 --- a/.devops/server-vulkan.Dockerfile +++ b/.devops/server-vulkan.Dockerfile @@ -18,10 +18,8 @@ RUN apt-get update && \ # Build it WORKDIR /app COPY . . -RUN mkdir build && \ - cd build && \ - cmake .. -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 && \ - cmake --build . --config Release --target server +RUN cmake -B build -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 && \ + cmake --build build --config Release --target server # Clean up WORKDIR / diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 6dc91d27b..3e968d179 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -96,9 +96,7 @@ jobs: id: cmake_build run: | set -eux - mkdir build - cd build - cmake .. \ + cmake -B build \ -DLLAMA_NATIVE=OFF \ -DLLAMA_BUILD_SERVER=ON \ -DLLAMA_CURL=ON \ @@ -109,7 +107,7 @@ jobs: -DLLAMA_FATAL_WARNINGS=OFF \ -DLLAMA_ALL_WARNINGS=OFF \ -DCMAKE_BUILD_TYPE=Release; - cmake --build . --config Release -j $(nproc) --target server + cmake --build build --config Release -j $(nproc) --target server - name: Download the dataset id: download_dataset diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml index 79cd7d643..afac89c5b 100644 --- a/.github/workflows/server.yml +++ b/.github/workflows/server.yml @@ -94,15 +94,13 @@ jobs: - name: Build id: cmake_build run: | - mkdir build - cd build - cmake .. \ + cmake -B build \ -DLLAMA_NATIVE=OFF \ -DLLAMA_BUILD_SERVER=ON \ -DLLAMA_CURL=ON \ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ; - cmake --build . --config ${{ matrix.build_type }} -j $(nproc) --target server + cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target server - name: Tests @@ -143,10 +141,8 @@ jobs: - name: Build id: cmake_build run: | - mkdir build - cd build - cmake .. -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include" - cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server + cmake -B build -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include" + cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server - name: Python setup id: setup_python diff --git a/README-sycl.md b/README-sycl.md index dc98c7b3e..cfa248a95 100644 --- a/README-sycl.md +++ b/README-sycl.md @@ -185,9 +185,8 @@ Upon a successful installation, SYCL is enabled for the available intel devices, ```sh git clone https://github.com/oneapi-src/oneMKL cd oneMKL -mkdir -p buildWithCublas && cd buildWithCublas -cmake ../ -DCMAKE_CXX_COMPILER=icpx -DCMAKE_C_COMPILER=icx -DENABLE_MKLGPU_BACKEND=OFF -DENABLE_MKLCPU_BACKEND=OFF -DENABLE_CUBLAS_BACKEND=ON -DTARGET_DOMAINS=blas -make +cmake -B buildWithCublas -DCMAKE_CXX_COMPILER=icpx -DCMAKE_C_COMPILER=icx -DENABLE_MKLGPU_BACKEND=OFF -DENABLE_MKLCPU_BACKEND=OFF -DENABLE_CUBLAS_BACKEND=ON -DTARGET_DOMAINS=blas +cmake --build buildWithCublas --config Release ``` @@ -227,16 +226,15 @@ Similarly, user targeting Nvidia GPUs should expect at least one SYCL-CUDA devic source /opt/intel/oneapi/setvars.sh # Build LLAMA with MKL BLAS acceleration for intel GPU -mkdir -p build && cd build # Option 1: Use FP32 (recommended for better performance in most cases) -cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx +cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx # Option 2: Use FP16 -cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON +cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON -#build all binary -cmake --build . --config Release -j -v +# build all binary +cmake --build build --config Release -j -v ``` #### Nvidia GPU @@ -248,16 +246,15 @@ export CPLUS_INCLUDE_DIR=/path/to/oneMKL/buildWithCublas/include:$CPLUS_INCLUDE_ export CPLUS_INCLUDE_DIR=/path/to/oneMKL/include:$CPLUS_INCLUDE_DIR # Build LLAMA with Nvidia BLAS acceleration through SYCL -mkdir -p build && cd build # Option 1: Use FP32 (recommended for better performance in most cases) -cmake .. -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx +cmake -B build -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx # Option 2: Use FP16 -cmake .. -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON +cmake -B build -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON -#build all binary -cmake --build . --config Release -j -v +# build all binary +cmake --build build --config Release -j -v ``` @@ -412,17 +409,15 @@ b. Download & install mingw-w64 make for Windows provided by w64devkit On the oneAPI command line window, step into the llama.cpp main directory and run the following: ``` -mkdir -p build -cd build @call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force # Option 1: Use FP32 (recommended for better performance in most cases) -cmake -G "MinGW Makefiles" .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release +cmake -B build -G "MinGW Makefiles" -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release # Option 2: Or FP16 -cmake -G "MinGW Makefiles" .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON +cmake -B build -G "MinGW Makefiles" -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON -make -j +cmake --build build --config Release -j ``` Otherwise, run the `win-build-sycl.bat` wrapper which encapsulates the former instructions: diff --git a/README.md b/README.md index a2aa9214f..514ef3af1 100644 --- a/README.md +++ b/README.md @@ -308,6 +308,8 @@ In order to build llama.cpp you have three different options. make ``` + **Note**: for `Debug` builds, run `make LLAMA_DEBUG=1` + - On Windows: 1. Download the latest fortran version of [w64devkit](https://github.com/skeeto/w64devkit/releases). @@ -322,12 +324,26 @@ In order to build llama.cpp you have three different options. - Using `CMake`: ```bash - mkdir build - cd build - cmake .. - cmake --build . --config Release + cmake -B build + cmake --build build --config Release ``` + **Note**: for `Debug` builds, there are two cases: + + - Single-config generators (e.g. default = `Unix Makefiles`; note that they just ignore the `--config` flag): + + ```bash + cmake -B build -DCMAKE_BUILD_TYPE=Debug + cmake --build build + ``` + + - Multi-config generators (`-G` param set to Visual Studio, XCode...): + + ```bash + cmake -B build -G "Xcode" + cmake --build build --config Debug + ``` + - Using `Zig` (version 0.11 or later): Building for optimization levels and CPU features can be accomplished using standard build arguments, for example AVX2, FMA, F16C, @@ -439,10 +455,8 @@ Building the program with BLAS support may lead to some performance improvements - Using `CMake` on Linux: ```bash - mkdir build - cd build - cmake .. -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS - cmake --build . --config Release + cmake -B build -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS + cmake --build build --config Release ``` - #### BLIS @@ -462,11 +476,9 @@ Building the program with BLAS support may lead to some performance improvements - Using manual oneAPI installation: By default, `LLAMA_BLAS_VENDOR` is set to `Generic`, so if you already sourced intel environment script and assign `-DLLAMA_BLAS=ON` in cmake, the mkl version of Blas will automatically been selected. Otherwise please install oneAPI and follow the below steps: ```bash - mkdir build - cd build source /opt/intel/oneapi/setvars.sh # You can skip this step if in oneapi-basekit docker image, only required for manual installation - cmake .. -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_NATIVE=ON - cmake --build . --config Release + cmake -B build -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_NATIVE=ON + cmake --build build --config Release ``` - Using oneAPI docker image: @@ -487,10 +499,8 @@ Building the program with BLAS support may lead to some performance improvements - Using `CMake`: ```bash - mkdir build - cd build - cmake .. -DLLAMA_CUDA=ON - cmake --build . --config Release + cmake -B build -DLLAMA_CUDA=ON + cmake --build build --config Release ``` The environment variable [`CUDA_VISIBLE_DEVICES`](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars) can be used to specify which GPU(s) will be used. The following compilation options are also available to tweak performance: @@ -517,8 +527,8 @@ Building the program with BLAS support may lead to some performance improvements - Using `CMake` for Linux (assuming a gfx1030-compatible AMD GPU): ```bash CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ \ - cmake -H. -Bbuild -DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release \ - && cmake --build build -- -j 16 + cmake -B build -DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release \ + && cmake --build build --config Release -- -j 16 ``` On Linux it is also possible to use unified memory architecture (UMA) to share main memory between the CPU and integrated GPU by setting `-DLLAMA_HIP_UMA=ON"`. However, this hurts performance for non-integrated GPUs (but enables working with integrated GPUs). @@ -564,15 +574,14 @@ Building the program with BLAS support may lead to some performance improvements ```sh git clone --recurse-submodules https://github.com/KhronosGroup/OpenCL-SDK.git - mkdir OpenCL-SDK/build - cd OpenCL-SDK/build - cmake .. -DBUILD_DOCS=OFF \ + cd OpenCL-SDK + cmake -B build -DBUILD_DOCS=OFF \ -DBUILD_EXAMPLES=OFF \ -DBUILD_TESTING=OFF \ -DOPENCL_SDK_BUILD_SAMPLES=OFF \ -DOPENCL_SDK_TEST_SAMPLES=OFF - cmake --build . --config Release - cmake --install . --prefix /some/path + cmake --build build + cmake --install build --prefix /some/path ``` @@ -594,23 +603,23 @@ Building the program with BLAS support may lead to some performance improvements ```cmd set OPENCL_SDK_ROOT="C:/OpenCL-SDK-v2023.04.17-Win-x64" git clone https://github.com/CNugteren/CLBlast.git - mkdir CLBlast\build - cd CLBlast\build - cmake .. -DBUILD_SHARED_LIBS=OFF -DOVERRIDE_MSVC_FLAGS_TO_MT=OFF -DTUNERS=OFF -DOPENCL_ROOT=%OPENCL_SDK_ROOT% -G "Visual Studio 17 2022" -A x64 - cmake --build . --config Release - cmake --install . --prefix C:/CLBlast + cd CLBlast + cmake -B build -DBUILD_SHARED_LIBS=OFF -DOVERRIDE_MSVC_FLAGS_TO_MT=OFF -DTUNERS=OFF -DOPENCL_ROOT=%OPENCL_SDK_ROOT% -G "Visual Studio 17 2022" -A x64 + cmake --build build --config Release + cmake --install build --prefix C:/CLBlast ``` + (note: `--config Release` at build time is the default and only relevant for Visual Studio builds - or multi-config Ninja builds) + -
Unix: ```sh git clone https://github.com/CNugteren/CLBlast.git - mkdir CLBlast/build - cd CLBlast/build - cmake .. -DBUILD_SHARED_LIBS=OFF -DTUNERS=OFF - cmake --build . --config Release - cmake --install . --prefix /some/path + cd CLBlast + cmake -B build -DBUILD_SHARED_LIBS=OFF -DTUNERS=OFF + cmake --build build --config Release + cmake --install build --prefix /some/path ``` Where `/some/path` is where the built library will be installed (default is `/usr/local`). @@ -624,21 +633,17 @@ Building the program with BLAS support may lead to some performance improvements ``` - CMake (Unix): ```sh - mkdir build - cd build - cmake .. -DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path - cmake --build . --config Release + cmake -B build -DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path + cmake --build build --config Release ``` - CMake (Windows): ```cmd set CL_BLAST_CMAKE_PKG="C:/CLBlast/lib/cmake/CLBlast" git clone https://github.com/ggerganov/llama.cpp cd llama.cpp - mkdir build - cd build - cmake .. -DBUILD_SHARED_LIBS=OFF -DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH=%CL_BLAST_CMAKE_PKG% -G "Visual Studio 17 2022" -A x64 - cmake --build . --config Release - cmake --install . --prefix C:/LlamaCPP + cmake -B build -DBUILD_SHARED_LIBS=OFF -DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH=%CL_BLAST_CMAKE_PKG% -G "Visual Studio 17 2022" -A x64 + cmake --build build --config Release + cmake --install build --prefix C:/LlamaCPP ``` ##### Running Llama with CLBlast @@ -694,10 +699,8 @@ Building the program with BLAS support may lead to some performance improvements Then, build llama.cpp using the cmake command below: ```bash - mkdir -p build - cd build - cmake .. -DLLAMA_VULKAN=1 - cmake --build . --config Release + cmake -B build -DLLAMA_VULKAN=1 + cmake --build build --config Release # Test the output binary (with "-ngl 33" to offload all layers to GPU) ./bin/main -m "PATH_TO_MODEL" -p "Hi you how are you" -n 50 -e -ngl 33 -t 4 diff --git a/examples/main-cmake-pkg/README.md b/examples/main-cmake-pkg/README.md index f599fbaec..edf20d8db 100644 --- a/examples/main-cmake-pkg/README.md +++ b/examples/main-cmake-pkg/README.md @@ -17,11 +17,9 @@ In this case, CLBlast was already installed so the CMake package is referenced i ```cmd git clone https://github.com/ggerganov/llama.cpp cd llama.cpp -mkdir build -cd build -cmake .. -DBUILD_SHARED_LIBS=OFF -DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH=C:/CLBlast/lib/cmake/CLBlast -G "Visual Studio 17 2022" -A x64 -cmake --build . --config Release -cmake --install . --prefix C:/LlamaCPP +cmake -B build -DBUILD_SHARED_LIBS=OFF -DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH=C:/CLBlast/lib/cmake/CLBlast -G "Visual Studio 17 2022" -A x64 +cmake --build build --config Release +cmake --install build --prefix C:/LlamaCPP ``` ### Build main-cmake-pkg @@ -29,9 +27,7 @@ cmake --install . --prefix C:/LlamaCPP ```cmd cd ..\examples\main-cmake-pkg -mkdir build -cd build -cmake .. -DBUILD_SHARED_LIBS=OFF -DCMAKE_PREFIX_PATH="C:/CLBlast/lib/cmake/CLBlast;C:/LlamaCPP/lib/cmake/Llama" -G "Visual Studio 17 2022" -A x64 -cmake --build . --config Release -cmake --install . --prefix C:/MyLlamaApp +cmake -B build -DBUILD_SHARED_LIBS=OFF -DCMAKE_PREFIX_PATH="C:/CLBlast/lib/cmake/CLBlast;C:/LlamaCPP/lib/cmake/Llama" -G "Visual Studio 17 2022" -A x64 +cmake --build build --config Release +cmake --install build --prefix C:/MyLlamaApp ``` diff --git a/examples/server/README.md b/examples/server/README.md index 918ac1295..b96a4444a 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -74,15 +74,18 @@ page cache before using this. See https://github.com/ggerganov/llama.cpp/issues/ - Using `make`: ```bash - make + make server ``` - Using `CMake`: ```bash - cmake --build . --config Release + cmake -B build + cmake --build build --config Release -t server ``` + Binary is at `./build/bin/server` + ## Build with SSL `server` can also be built with SSL support using OpenSSL 3 @@ -99,10 +102,8 @@ page cache before using this. See https://github.com/ggerganov/llama.cpp/issues/ - Using `CMake`: ```bash - mkdir build - cd build - cmake .. -DLLAMA_SERVER_SSL=ON - make server + cmake -B build -DLLAMA_SERVER_SSL=ON + cmake --build build --config Release -t server ``` ## Quick Start