mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-26 03:12:23 +01:00
[SYCL] Add support for SYCL Nvidia target (#5738)
* Add support for nvidia target in CMake * Update sycl read-me for Nvidia target * Fix errors
This commit is contained in:
parent
bb6d00bbf9
commit
3814a07392
@ -116,6 +116,7 @@ option(LLAMA_MPI "llama: use MPI"
|
|||||||
option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF)
|
option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF)
|
||||||
option(LLAMA_SYCL "llama: use SYCL" OFF)
|
option(LLAMA_SYCL "llama: use SYCL" OFF)
|
||||||
option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF)
|
option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF)
|
||||||
|
set(LLAMA_SYCL_TARGET "INTEL" CACHE STRING "llama: sycl target device")
|
||||||
option(LLAMA_CPU_HBM "llama: use memkind for CPU HBM" OFF)
|
option(LLAMA_CPU_HBM "llama: use memkind for CPU HBM" OFF)
|
||||||
|
|
||||||
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
|
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
|
||||||
@ -534,6 +535,10 @@ if (LLAMA_HIPBLAS)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (LLAMA_SYCL)
|
if (LLAMA_SYCL)
|
||||||
|
if (NOT LLAMA_SYCL_TARGET MATCHES "^(INTEL|NVIDIA)$")
|
||||||
|
message(FATAL_ERROR "Invalid backend chosen, supported options are INTEL or NVIDIA")
|
||||||
|
endif()
|
||||||
|
|
||||||
if ( NOT DEFINED ENV{ONEAPI_ROOT})
|
if ( NOT DEFINED ENV{ONEAPI_ROOT})
|
||||||
message(FATAL_ERROR "Not detect ENV {ONEAPI_ROOT}, please install oneAPI & source it, like: source /opt/intel/oneapi/setvars.sh")
|
message(FATAL_ERROR "Not detect ENV {ONEAPI_ROOT}, please install oneAPI & source it, like: source /opt/intel/oneapi/setvars.sh")
|
||||||
endif()
|
endif()
|
||||||
@ -555,6 +560,9 @@ if (LLAMA_SYCL)
|
|||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
|
||||||
|
if (LLAMA_SYCL_TARGET STREQUAL "NVIDIA")
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
|
||||||
|
endif()
|
||||||
|
|
||||||
set(GGML_HEADERS_SYCL ggml-sycl.h)
|
set(GGML_HEADERS_SYCL ggml-sycl.h)
|
||||||
set(GGML_SOURCES_SYCL ggml-sycl.cpp)
|
set(GGML_SOURCES_SYCL ggml-sycl.cpp)
|
||||||
@ -562,7 +570,11 @@ if (LLAMA_SYCL)
|
|||||||
if (WIN32)
|
if (WIN32)
|
||||||
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl sycl7 OpenCL mkl_sycl_blas_dll.lib mkl_intel_ilp64_dll.lib mkl_sequential_dll.lib mkl_core_dll.lib)
|
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl sycl7 OpenCL mkl_sycl_blas_dll.lib mkl_intel_ilp64_dll.lib mkl_sequential_dll.lib mkl_core_dll.lib)
|
||||||
else()
|
else()
|
||||||
|
if (LLAMA_SYCL_TARGET STREQUAL "INTEL")
|
||||||
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
|
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
|
||||||
|
elseif (LLAMA_SYCL_TARGET STREQUAL "NVIDIA")
|
||||||
|
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl pthread m dl onemkl)
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
@ -73,6 +73,29 @@ For iGPU, please make sure the shared memory from host memory is enough. For lla
|
|||||||
|
|
||||||
For dGPU, please make sure the device memory is enough. For llama-2-7b.Q4_0, recommend the device memory is 4GB+.
|
For dGPU, please make sure the device memory is enough. For llama-2-7b.Q4_0, recommend the device memory is 4GB+.
|
||||||
|
|
||||||
|
## Nvidia GPU
|
||||||
|
|
||||||
|
### Verified
|
||||||
|
|
||||||
|
|Intel GPU| Status | Verified Model|
|
||||||
|
|-|-|-|
|
||||||
|
|Ampere Series| Support| A100|
|
||||||
|
|
||||||
|
### oneMKL
|
||||||
|
|
||||||
|
The current oneMKL release does not contain the oneMKL cuBlas backend.
|
||||||
|
As a result for Nvidia GPU's oneMKL must be built from source.
|
||||||
|
|
||||||
|
```
|
||||||
|
git clone https://github.com/oneapi-src/oneMKL
|
||||||
|
cd oneMKL
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
cmake -G Ninja .. -DCMAKE_CXX_COMPILER=icpx -DCMAKE_C_COMPILER=icx -DENABLE_MKLGPU_BACKEND=OFF -DENABLE_MKLCPU_BACKEND=OFF -DENABLE_CUBLAS_BACKEND=ON
|
||||||
|
ninja
|
||||||
|
// Add paths as necessary
|
||||||
|
```
|
||||||
|
|
||||||
## Docker
|
## Docker
|
||||||
|
|
||||||
Note:
|
Note:
|
||||||
@ -186,6 +209,9 @@ source /opt/intel/oneapi/setvars.sh
|
|||||||
# Or, for FP32:
|
# Or, for FP32:
|
||||||
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||||
|
|
||||||
|
# For Nvidia GPUs
|
||||||
|
cmake .. -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||||
|
|
||||||
# Build example/main only
|
# Build example/main only
|
||||||
#cmake --build . --config Release --target main
|
#cmake --build . --config Release --target main
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user