From 3814a07392d2bdc22911652bc7c2f9bdb0ce042e Mon Sep 17 00:00:00 2001 From: AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com> Date: Mon, 11 Mar 2024 01:13:57 +0000 Subject: [PATCH] [SYCL] Add support for SYCL Nvidia target (#5738) * Add support for nvidia target in CMake * Update sycl read-me for Nvidia target * Fix errors --- CMakeLists.txt | 14 +++++++++++++- README-sycl.md | 26 ++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9309ca6bb..00a26391a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -116,6 +116,7 @@ option(LLAMA_MPI "llama: use MPI" option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF) option(LLAMA_SYCL "llama: use SYCL" OFF) option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF) +set(LLAMA_SYCL_TARGET "INTEL" CACHE STRING "llama: sycl target device") option(LLAMA_CPU_HBM "llama: use memkind for CPU HBM" OFF) option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE}) @@ -534,6 +535,10 @@ if (LLAMA_HIPBLAS) endif() if (LLAMA_SYCL) + if (NOT LLAMA_SYCL_TARGET MATCHES "^(INTEL|NVIDIA)$") + message(FATAL_ERROR "Invalid backend chosen, supported options are INTEL or NVIDIA") + endif() + if ( NOT DEFINED ENV{ONEAPI_ROOT}) message(FATAL_ERROR "Not detect ENV {ONEAPI_ROOT}, please install oneAPI & source it, like: source /opt/intel/oneapi/setvars.sh") endif() @@ -555,6 +560,9 @@ if (LLAMA_SYCL) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib") + if (LLAMA_SYCL_TARGET STREQUAL "NVIDIA") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda") + endif() set(GGML_HEADERS_SYCL ggml-sycl.h) set(GGML_SOURCES_SYCL ggml-sycl.cpp) @@ -562,7 +570,11 @@ if (LLAMA_SYCL) if (WIN32) set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl sycl7 OpenCL mkl_sycl_blas_dll.lib mkl_intel_ilp64_dll.lib mkl_sequential_dll.lib mkl_core_dll.lib) else() - set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread) + if (LLAMA_SYCL_TARGET STREQUAL "INTEL") + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread) + elseif (LLAMA_SYCL_TARGET STREQUAL "NVIDIA") + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl pthread m dl onemkl) + endif() endif() endif() diff --git a/README-sycl.md b/README-sycl.md index 85eb16f2b..9359a9490 100644 --- a/README-sycl.md +++ b/README-sycl.md @@ -73,6 +73,29 @@ For iGPU, please make sure the shared memory from host memory is enough. For lla For dGPU, please make sure the device memory is enough. For llama-2-7b.Q4_0, recommend the device memory is 4GB+. +## Nvidia GPU + +### Verified + +|Intel GPU| Status | Verified Model| +|-|-|-| +|Ampere Series| Support| A100| + +### oneMKL + +The current oneMKL release does not contain the oneMKL cuBlas backend. +As a result for Nvidia GPU's oneMKL must be built from source. + +``` +git clone https://github.com/oneapi-src/oneMKL +cd oneMKL +mkdir build +cd build +cmake -G Ninja .. -DCMAKE_CXX_COMPILER=icpx -DCMAKE_C_COMPILER=icx -DENABLE_MKLGPU_BACKEND=OFF -DENABLE_MKLCPU_BACKEND=OFF -DENABLE_CUBLAS_BACKEND=ON +ninja +// Add paths as necessary +``` + ## Docker Note: @@ -186,6 +209,9 @@ source /opt/intel/oneapi/setvars.sh # Or, for FP32: cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx +# For Nvidia GPUs +cmake .. -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx + # Build example/main only #cmake --build . --config Release --target main