mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-27 12:33:06 +01:00
sycl : Add option to set the SYCL architecture for all targets (#10266)
* Add option to set the SYCL architecture for all targets * Convert GGML_SYCL_HIP_TARGET to the more generic GGML_SYCL_ARCH option * Document that setting GGML_SYCL_ARCH can improve the performance
This commit is contained in:
parent
b3e585988f
commit
2a1507c162
@ -312,12 +312,14 @@ export CPLUS_INCLUDE_DIR=/path/to/oneMKL/buildWithCublas/include:$CPLUS_INCLUDE_
|
|||||||
export CPLUS_INCLUDE_DIR=/path/to/oneMKL/include:$CPLUS_INCLUDE_DIR
|
export CPLUS_INCLUDE_DIR=/path/to/oneMKL/include:$CPLUS_INCLUDE_DIR
|
||||||
|
|
||||||
# Build LLAMA with Nvidia BLAS acceleration through SYCL
|
# Build LLAMA with Nvidia BLAS acceleration through SYCL
|
||||||
|
# Setting GGML_SYCL_DEVICE_ARCH is optional but can improve performance
|
||||||
|
GGML_SYCL_DEVICE_ARCH=sm_80 # Example architecture
|
||||||
|
|
||||||
# Option 1: Use FP32 (recommended for better performance in most cases)
|
# Option 1: Use FP32 (recommended for better performance in most cases)
|
||||||
cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DGGML_SYCL_DEVICE_ARCH=${GGML_SYCL_DEVICE_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||||
|
|
||||||
# Option 2: Use FP16
|
# Option 2: Use FP16
|
||||||
cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
|
cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DGGML_SYCL_DEVICE_ARCH=${GGML_SYCL_DEVICE_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
|
||||||
|
|
||||||
# build all binary
|
# build all binary
|
||||||
cmake --build build --config Release -j -v
|
cmake --build build --config Release -j -v
|
||||||
@ -335,8 +337,9 @@ export CPLUS_INCLUDE_DIR=/path/to/oneMKL/buildWithrocBLAS/include:$CPLUS_INCLUDE
|
|||||||
|
|
||||||
## AMD
|
## AMD
|
||||||
# Use FP32, FP16 is not supported
|
# Use FP32, FP16 is not supported
|
||||||
# Find your GGML_SYCL_HIP_TARGET with rocminfo, under the key 'Name:'
|
# Find your GGML_SYCL_DEVICE_ARCH with rocminfo, under the key 'Name:'
|
||||||
cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=AMD -DGGML_SYCL_HIP_TARGET=${GGML_SYCL_HIP_TARGET} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
GGML_SYCL_DEVICE_ARCH=gfx90a # Example architecture
|
||||||
|
cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=AMD -DGGML_SYCL_DEVICE_ARCH=${GGML_SYCL_DEVICE_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||||
|
|
||||||
# build all binary
|
# build all binary
|
||||||
cmake --build build --config Release -j -v
|
cmake --build build --config Release -j -v
|
||||||
@ -646,6 +649,7 @@ use 1 SYCL GPUs: [0] with Max compute units:512
|
|||||||
|--------------------|---------------------------------------|---------------------------------------------|
|
|--------------------|---------------------------------------|---------------------------------------------|
|
||||||
| GGML_SYCL | ON (mandatory) | Enable build with SYCL code path.<br>FP32 path - recommended for better perforemance than FP16 on quantized model|
|
| GGML_SYCL | ON (mandatory) | Enable build with SYCL code path.<br>FP32 path - recommended for better perforemance than FP16 on quantized model|
|
||||||
| GGML_SYCL_TARGET | INTEL *(default)* \| NVIDIA \| AMD | Set the SYCL target device type. |
|
| GGML_SYCL_TARGET | INTEL *(default)* \| NVIDIA \| AMD | Set the SYCL target device type. |
|
||||||
|
| GGML_SYCL_DEVICE_ARCH | Optional (except for AMD) | Set the SYCL device architecture, optional except for AMD. Setting the device architecture can improve the performance. See the table [--offload-arch](https://github.com/intel/llvm/blob/sycl/sycl/doc/design/OffloadDesign.md#--offload-arch) for a list of valid architectures. |
|
||||||
| GGML_SYCL_F16 | OFF *(default)* \|ON *(optional)* | Enable FP16 build with SYCL code path. |
|
| GGML_SYCL_F16 | OFF *(default)* \|ON *(optional)* | Enable FP16 build with SYCL code path. |
|
||||||
| CMAKE_C_COMPILER | `icx` *(Linux)*, `icx/cl` *(Windows)* | Set `icx` compiler for SYCL code path. |
|
| CMAKE_C_COMPILER | `icx` *(Linux)*, `icx/cl` *(Windows)* | Set `icx` compiler for SYCL code path. |
|
||||||
| CMAKE_CXX_COMPILER | `icpx` *(Linux)*, `icx` *(Windows)* | Set `icpx/icx` compiler for SYCL code path. |
|
| CMAKE_CXX_COMPILER | `icpx` *(Linux)*, `icx` *(Windows)* | Set `icpx/icx` compiler for SYCL code path. |
|
||||||
|
@ -164,6 +164,8 @@ option(GGML_SYCL "ggml: use SYCL"
|
|||||||
option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF)
|
option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF)
|
||||||
set (GGML_SYCL_TARGET "INTEL" CACHE STRING
|
set (GGML_SYCL_TARGET "INTEL" CACHE STRING
|
||||||
"ggml: sycl target device")
|
"ggml: sycl target device")
|
||||||
|
set (GGML_SYCL_DEVICE_ARCH "" CACHE STRING
|
||||||
|
"ggml: sycl device architecture")
|
||||||
|
|
||||||
# extra artifacts
|
# extra artifacts
|
||||||
option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE})
|
option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE})
|
||||||
|
@ -72,10 +72,14 @@ else()
|
|||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
|
||||||
target_link_libraries(ggml-sycl PRIVATE sycl pthread m dl onemkl)
|
target_link_libraries(ggml-sycl PRIVATE sycl pthread m dl onemkl)
|
||||||
elseif (GGML_SYCL_TARGET STREQUAL "AMD")
|
elseif (GGML_SYCL_TARGET STREQUAL "AMD")
|
||||||
if (GGML_SYCL_HIP_TARGET STREQUAL "")
|
if (NOT GGML_SYCL_DEVICE_ARCH)
|
||||||
message(ERROR "Can't enable SYCL hip backend, GGML_SYCL_HIP_TARGET has not been set.")
|
message(ERROR "Can't enable SYCL hip backend, GGML_SYCL_DEVICE_ARCH has not been set.")
|
||||||
endif()
|
endif()
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=${GGML_SYCL_HIP_TARGET}")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=amdgcn-amd-amdhsa")
|
||||||
target_link_libraries(ggml-sycl PRIVATE sycl pthread m dl onemkl)
|
target_link_libraries(ggml-sycl PRIVATE sycl pthread m dl onemkl)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (GGML_SYCL_DEVICE_ARCH)
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH}")
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
Loading…
Reference in New Issue
Block a user