cmake : fix VULKAN and ROCm builds (#5525)

* cmake : fix VULKAN and ROCm builds

* cmake : fix (cont)

* vulkan : fix compile warnings

ggml-ci

* cmake : fix

ggml-ci

* cmake : minor

ggml-ci
This commit is contained in:
Georgi Gerganov 2024-02-16 19:05:56 +02:00 committed by GitHub
parent d2819d5577
commit 5bf2b94dd4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 205 additions and 186 deletions

View File

@ -112,17 +112,14 @@ option(LLAMA_MPI "llama: use MPI"
option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF) option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF)
option(LLAMA_SYCL "llama: use SYCL" OFF) option(LLAMA_SYCL "llama: use SYCL" OFF)
option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF) option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF)
option(LLAMA_CPU_HBM "llama: use memkind for CPU HBM" OFF)
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE}) option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE}) option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_SERVER "llama: build server example" ON) option(LLAMA_BUILD_SERVER "llama: build server example" ON)
# add perf arguments # add perf arguments
option(LLAMA_PERF "llama: enable perf" OFF) option(LLAMA_PERF "llama: enable perf" OFF)
if (LLAMA_PERF)
add_definitions(-DGGML_PERF)
endif()
# Required for relocatable CMake package # Required for relocatable CMake package
include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake) include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake)
@ -130,6 +127,7 @@ include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake)
# #
# Compile flags # Compile flags
# #
if (LLAMA_SYCL) if (LLAMA_SYCL)
set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD 17)
else() else()
@ -140,6 +138,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED true)
set(CMAKE_C_STANDARD 11) set(CMAKE_C_STANDARD 11)
set(CMAKE_C_STANDARD_REQUIRED true) set(CMAKE_C_STANDARD_REQUIRED true)
set(THREADS_PREFER_PTHREAD_FLAG ON) set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED) find_package(Threads REQUIRED)
include(CheckCXXCompilerFlag) include(CheckCXXCompilerFlag)
@ -298,14 +297,17 @@ if (LLAMA_BLAS)
endif() endif()
message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}") message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}")
add_compile_options(${BLAS_LINKER_FLAGS}) add_compile_options(${BLAS_LINKER_FLAGS})
add_compile_definitions(GGML_USE_OPENBLAS) add_compile_definitions(GGML_USE_OPENBLAS)
if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${LLAMA_BLAS_VENDOR} MATCHES "Generic" OR ${LLAMA_BLAS_VENDOR} MATCHES "Intel")) if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${LLAMA_BLAS_VENDOR} MATCHES "Generic" OR ${LLAMA_BLAS_VENDOR} MATCHES "Intel"))
add_compile_definitions(GGML_BLAS_USE_MKL) add_compile_definitions(GGML_BLAS_USE_MKL)
endif() endif()
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${BLAS_LIBRARIES}) set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${BLAS_LIBRARIES})
set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${BLAS_INCLUDE_DIRS}) set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${BLAS_INCLUDE_DIRS})
else() else()
message(WARNING "BLAS not found, please refer to " message(WARNING "BLAS not found, please refer to "
"https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors" "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
@ -330,9 +332,6 @@ if (LLAMA_CUBLAS)
set(GGML_SOURCES_CUDA ggml-cuda.cu) set(GGML_SOURCES_CUDA ggml-cuda.cu)
add_compile_definitions(GGML_USE_CUBLAS) add_compile_definitions(GGML_USE_CUBLAS)
# if (LLAMA_CUDA_CUBLAS)
# add_compile_definitions(GGML_CUDA_CUBLAS)
# endif()
if (LLAMA_CUDA_FORCE_DMMV) if (LLAMA_CUDA_FORCE_DMMV)
add_compile_definitions(GGML_CUDA_FORCE_DMMV) add_compile_definitions(GGML_CUDA_FORCE_DMMV)
endif() endif()
@ -387,15 +386,20 @@ if (LLAMA_MPI)
find_package(MPI) find_package(MPI)
if (MPI_C_FOUND) if (MPI_C_FOUND)
message(STATUS "MPI found") message(STATUS "MPI found")
set(GGML_HEADERS_MPI ggml-mpi.h) set(GGML_HEADERS_MPI ggml-mpi.h)
set(GGML_SOURCES_MPI ggml-mpi.c ggml-mpi.h) set(GGML_SOURCES_MPI ggml-mpi.c)
add_compile_definitions(GGML_USE_MPI) add_compile_definitions(GGML_USE_MPI)
add_compile_definitions(${MPI_C_COMPILE_DEFINITIONS}) add_compile_definitions(${MPI_C_COMPILE_DEFINITIONS})
if (NOT MSVC) if (NOT MSVC)
add_compile_options(-Wno-cast-qual) add_compile_options(-Wno-cast-qual)
endif() endif()
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${MPI_C_LIBRARIES}) set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${MPI_C_LIBRARIES})
set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${MPI_C_INCLUDE_DIRS}) set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${MPI_C_INCLUDE_DIRS})
# Even if you're only using the C header, C++ programs may bring in MPI # Even if you're only using the C header, C++ programs may bring in MPI
# C++ functions, so more linkage is needed # C++ functions, so more linkage is needed
if (MPI_CXX_FOUND) if (MPI_CXX_FOUND)
@ -427,31 +431,28 @@ if (LLAMA_VULKAN)
if (Vulkan_FOUND) if (Vulkan_FOUND)
message(STATUS "Vulkan found") message(STATUS "Vulkan found")
add_library(ggml-vulkan OBJECT ggml-vulkan.cpp ggml-vulkan.h) set(GGML_HEADERS_VULKAN ggml-vulkan.h)
if (BUILD_SHARED_LIBS) set(GGML_SOURCES_VULKAN ggml-vulkan.cpp)
set_target_properties(ggml-vulkan PROPERTIES POSITION_INDEPENDENT_CODE ON)
endif()
target_link_libraries(ggml-vulkan PRIVATE Vulkan::Vulkan)
add_compile_definitions(GGML_USE_VULKAN) add_compile_definitions(GGML_USE_VULKAN)
if (LLAMA_VULKAN_CHECK_RESULTS) if (LLAMA_VULKAN_CHECK_RESULTS)
target_compile_definitions(ggml-vulkan PRIVATE GGML_VULKAN_CHECK_RESULTS) add_compile_definitions(GGML_VULKAN_CHECK_RESULTS)
endif() endif()
if (LLAMA_VULKAN_DEBUG) if (LLAMA_VULKAN_DEBUG)
target_compile_definitions(ggml-vulkan PRIVATE GGML_VULKAN_DEBUG) add_compile_definitions(GGML_VULKAN_DEBUG)
endif() endif()
if (LLAMA_VULKAN_VALIDATE) if (LLAMA_VULKAN_VALIDATE)
target_compile_definitions(ggml-vulkan PRIVATE GGML_VULKAN_VALIDATE) add_compile_definitions(GGML_VULKAN_VALIDATE)
endif() endif()
if (LLAMA_VULKAN_RUN_TESTS) if (LLAMA_VULKAN_RUN_TESTS)
target_compile_definitions(ggml-vulkan PRIVATE GGML_VULKAN_RUN_TESTS) add_compile_definitions(GGML_VULKAN_RUN_TESTS)
endif() endif()
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ggml-vulkan) set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} Vulkan::Vulkan)
else() else()
message(WARNING "Vulkan not found") message(WARNING "Vulkan not found")
endif() endif()
@ -463,43 +464,45 @@ if (LLAMA_HIPBLAS)
if (NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang") if (NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang")
message(WARNING "Only LLVM is supported for HIP, hint: CC=/opt/rocm/llvm/bin/clang") message(WARNING "Only LLVM is supported for HIP, hint: CC=/opt/rocm/llvm/bin/clang")
endif() endif()
if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang") if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++") message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++")
endif() endif()
find_package(hip) find_package(hip REQUIRED)
find_package(hipblas) find_package(hipblas REQUIRED)
find_package(rocblas) find_package(rocblas REQUIRED)
if (${hipblas_FOUND} AND ${hip_FOUND})
message(STATUS "HIP and hipBLAS found") message(STATUS "HIP and hipBLAS found")
set(GGML_HEADERS_ROCM ggml-cuda.h)
set(GGML_SOURCES_ROCM ggml-cuda.cu)
add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUBLAS) add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUBLAS)
if (LLAMA_HIP_UMA) if (LLAMA_HIP_UMA)
add_compile_definitions(GGML_HIP_UMA) add_compile_definitions(GGML_HIP_UMA)
endif() endif()
add_library(ggml-rocm OBJECT ggml-cuda.cu ggml-cuda.h)
if (BUILD_SHARED_LIBS)
set_target_properties(ggml-rocm PROPERTIES POSITION_INDEPENDENT_CODE ON)
endif()
if (LLAMA_CUDA_FORCE_DMMV) if (LLAMA_CUDA_FORCE_DMMV)
target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_FORCE_DMMV) add_compile_definitions(GGML_CUDA_FORCE_DMMV)
endif() endif()
if (LLAMA_CUDA_FORCE_MMQ) if (LLAMA_CUDA_FORCE_MMQ)
target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_FORCE_MMQ) add_compile_definitions(GGML_CUDA_FORCE_MMQ)
endif() endif()
target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y}) add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
target_compile_definitions(ggml-rocm PRIVATE K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER}) add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
set_source_files_properties(ggml-cuda.cu PROPERTIES LANGUAGE CXX) set_source_files_properties(ggml-cuda.cu PROPERTIES LANGUAGE CXX)
target_link_libraries(ggml-rocm PRIVATE hip::device PUBLIC hip::host roc::rocblas roc::hipblas)
if (LLAMA_STATIC) if (LLAMA_STATIC)
message(FATAL_ERROR "Static linking not supported for HIP/ROCm") message(FATAL_ERROR "Static linking not supported for HIP/ROCm")
endif() endif()
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ggml-rocm)
else() set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} hip::device PUBLIC hip::host roc::rocblas roc::hipblas)
message(WARNING "hipBLAS or HIP not found. Try setting CMAKE_PREFIX_PATH=/opt/rocm")
endif()
endif() endif()
if (LLAMA_SYCL) if (LLAMA_SYCL)
@ -509,10 +512,14 @@ if (LLAMA_SYCL)
#todo: AOT #todo: AOT
find_package(IntelSYCL REQUIRED) find_package(IntelSYCL REQUIRED)
message(STATUS "SYCL found")
add_compile_definitions(GML_USE_SYCL)
if (LLAMA_SYCL_F16) if (LLAMA_SYCL_F16)
add_compile_definitions(GGML_SYCL_F16) add_compile_definitions(GGML_SYCL_F16)
endif() endif()
add_compile_definitions(GGML_USE_SYCL)
add_compile_options(-I./) #include DPCT add_compile_options(-I./) #include DPCT
add_compile_options(-I/${SYCL_INCLUDE_DIR}) add_compile_options(-I/${SYCL_INCLUDE_DIR})
@ -521,7 +528,7 @@ if (LLAMA_SYCL)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
set(GGML_HEADERS_SYCL ggml.h ggml-sycl.h) set(GGML_HEADERS_SYCL ggml-sycl.h)
set(GGML_SOURCES_SYCL ggml-sycl.cpp) set(GGML_SOURCES_SYCL ggml-sycl.cpp)
if (WIN32) if (WIN32)
@ -677,7 +684,9 @@ if (LLAMA_KOMPUTE)
# Add the stamp to the main sources to ensure dependency tracking # Add the stamp to the main sources to ensure dependency tracking
set(GGML_SOURCES_KOMPUTE ggml-kompute.cpp ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp) set(GGML_SOURCES_KOMPUTE ggml-kompute.cpp ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp)
set(GGML_HEADERS_KOMPUTE ggml-kompute.h ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp) set(GGML_HEADERS_KOMPUTE ggml-kompute.h ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp)
add_compile_definitions(GGML_USE_KOMPUTE) add_compile_definitions(GGML_USE_KOMPUTE)
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} kompute) set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} kompute)
set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${CMAKE_BINARY_DIR}) set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${CMAKE_BINARY_DIR})
else() else()
@ -685,6 +694,18 @@ if (LLAMA_KOMPUTE)
endif() endif()
endif() endif()
if (LLAMA_CPU_HBM)
find_library(memkind memkind REQUIRED)
add_compile_definitions(GGML_USE_CPU_HBM)
target_link_libraries(ggml PUBLIC memkind)
endif()
if (LLAMA_PERF)
add_compile_definitions(GGML_PERF)
endif()
function(get_flags CCID CCVER) function(get_flags CCID CCVER)
set(C_FLAGS "") set(C_FLAGS "")
set(CXX_FLAGS "") set(CXX_FLAGS "")
@ -821,6 +842,7 @@ execute_process(
ERROR_VARIABLE output ERROR_VARIABLE output
OUTPUT_QUIET OUTPUT_QUIET
) )
if (output MATCHES "dyld-1015\.7") if (output MATCHES "dyld-1015\.7")
add_compile_definitions(HAVE_BUGGY_APPLE_LINKER) add_compile_definitions(HAVE_BUGGY_APPLE_LINKER)
endif() endif()
@ -1027,11 +1049,6 @@ endif()
# ggml # ggml
if (GGML_USE_CPU_HBM)
add_definitions(-DGGML_USE_CPU_HBM)
find_library(memkind memkind REQUIRED)
endif()
add_library(ggml OBJECT add_library(ggml OBJECT
ggml.c ggml.c
ggml.h ggml.h
@ -1048,16 +1065,17 @@ add_library(ggml OBJECT
${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA} ${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA}
${GGML_SOURCES_SYCL} ${GGML_HEADERS_SYCL} ${GGML_SOURCES_SYCL} ${GGML_HEADERS_SYCL}
${GGML_SOURCES_KOMPUTE} ${GGML_HEADERS_KOMPUTE} ${GGML_SOURCES_KOMPUTE} ${GGML_HEADERS_KOMPUTE}
${GGML_SOURCES_VULKAN} ${GGML_HEADERS_VULKAN}
${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
) )
target_include_directories(ggml PUBLIC . ${LLAMA_EXTRA_INCLUDES}) target_include_directories(ggml PUBLIC . ${LLAMA_EXTRA_INCLUDES})
target_compile_features (ggml PUBLIC c_std_11) # don't bump target_compile_features (ggml PUBLIC c_std_11) # don't bump
target_link_libraries(ggml PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS}) target_link_libraries(ggml PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
if (GGML_USE_CPU_HBM)
target_link_libraries(ggml PUBLIC memkind)
endif()
add_library(ggml_static STATIC $<TARGET_OBJECTS:ggml>) add_library(ggml_static STATIC $<TARGET_OBJECTS:ggml>)
if (BUILD_SHARED_LIBS) if (BUILD_SHARED_LIBS)
set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON)
add_library(ggml_shared SHARED $<TARGET_OBJECTS:ggml>) add_library(ggml_shared SHARED $<TARGET_OBJECTS:ggml>)
@ -1074,6 +1092,7 @@ add_library(llama
target_include_directories(llama PUBLIC .) target_include_directories(llama PUBLIC .)
target_compile_features (llama PUBLIC cxx_std_11) # don't bump target_compile_features (llama PUBLIC cxx_std_11) # don't bump
target_link_libraries(llama PRIVATE target_link_libraries(llama PRIVATE
ggml ggml
${LLAMA_EXTRA_LIBS} ${LLAMA_EXTRA_LIBS}

View File

@ -1091,7 +1091,7 @@ static void ggml_vk_print_gpu_info(size_t idx) {
} }
} }
void ggml_vk_instance_init() { static void ggml_vk_instance_init() {
if (vk_instance_initialized) { if (vk_instance_initialized) {
return; return;
} }
@ -1150,7 +1150,7 @@ void ggml_vk_instance_init() {
vk_instance_initialized = true; vk_instance_initialized = true;
} }
void ggml_vk_init(ggml_backend_vk_context * ctx, size_t idx) { static void ggml_vk_init(ggml_backend_vk_context * ctx, size_t idx) {
GGML_ASSERT(idx < vk_instance.device_indices.size()); GGML_ASSERT(idx < vk_instance.device_indices.size());
size_t dev_num = vk_instance.device_indices[idx]; size_t dev_num = vk_instance.device_indices[idx];
#ifdef GGML_VULKAN_DEBUG #ifdef GGML_VULKAN_DEBUG
@ -4556,13 +4556,13 @@ static void ggml_vk_cleanup(ggml_backend_vk_context * ctx) {
} }
} }
GGML_CALL int ggml_vk_get_device_count() { GGML_CALL static int ggml_vk_get_device_count() {
ggml_vk_instance_init(); ggml_vk_instance_init();
return vk_instance.device_indices.size(); return vk_instance.device_indices.size();
} }
GGML_CALL void ggml_vk_get_device_description(int device, char * description, size_t description_size) { GGML_CALL static void ggml_vk_get_device_description(int device, char * description, size_t description_size) {
ggml_vk_instance_init(); ggml_vk_instance_init();
std::vector<vk::PhysicalDevice> devices = vk_instance.instance.enumeratePhysicalDevices(); std::vector<vk::PhysicalDevice> devices = vk_instance.instance.enumeratePhysicalDevices();
@ -4580,7 +4580,7 @@ void ggml_vk_init_cpu_assist() {
std::cerr << "ggml_vulkan: Found " << ggml_vk_get_device_count() << " Vulkan devices:" << std::endl; std::cerr << "ggml_vulkan: Found " << ggml_vk_get_device_count() << " Vulkan devices:" << std::endl;
for (size_t i = 0; i < ggml_vk_get_device_count(); i++) { for (int i = 0; i < ggml_vk_get_device_count(); i++) {
ggml_vk_print_gpu_info(i); ggml_vk_print_gpu_info(i);
} }
// Initialize the first backend to make sure CPU matrix multiplications can be offloaded. // Initialize the first backend to make sure CPU matrix multiplications can be offloaded.
@ -5267,7 +5267,7 @@ GGML_CALL void ggml_backend_vk_get_device_description(int device, char * descrip
} }
GGML_CALL void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total) { GGML_CALL void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total) {
GGML_ASSERT(device < vk_instance.device_indices.size()); GGML_ASSERT(device < (int) vk_instance.device_indices.size());
vk::PhysicalDevice vkdev = vk_instance.instance.enumeratePhysicalDevices()[vk_instance.device_indices[device]]; vk::PhysicalDevice vkdev = vk_instance.instance.enumeratePhysicalDevices()[vk_instance.device_indices[device]];