mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 13:58:46 +01:00
cmake : fix CMake requirement for CUDA (#7821)
This commit is contained in:
parent
fd5ea0f897
commit
864a99e7a0
@ -402,12 +402,26 @@ if (LLAMA_CUBLAS)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (LLAMA_CUDA)
|
if (LLAMA_CUDA)
|
||||||
cmake_minimum_required(VERSION 3.17)
|
cmake_minimum_required(VERSION 3.18) # for CMAKE_CUDA_ARCHITECTURES
|
||||||
|
|
||||||
find_package(CUDAToolkit)
|
find_package(CUDAToolkit)
|
||||||
if (CUDAToolkit_FOUND)
|
if (CUDAToolkit_FOUND)
|
||||||
message(STATUS "CUDA found")
|
message(STATUS "CUDA found")
|
||||||
|
|
||||||
|
if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
|
||||||
|
# 52 == lowest CUDA 12 standard
|
||||||
|
# 60 == f16 CUDA intrinsics
|
||||||
|
# 61 == integer CUDA intrinsics
|
||||||
|
# 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster
|
||||||
|
if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
|
||||||
|
set(CMAKE_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA intrinsics
|
||||||
|
else()
|
||||||
|
set(CMAKE_CUDA_ARCHITECTURES "52;61;70") # lowest CUDA 12 standard + lowest for integer intrinsics
|
||||||
|
#set(CMAKE_CUDA_ARCHITECTURES "OFF") # use this to compile much faster, but only F16 models work
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
|
||||||
|
|
||||||
enable_language(CUDA)
|
enable_language(CUDA)
|
||||||
|
|
||||||
set(GGML_HEADERS_CUDA ggml-cuda.h)
|
set(GGML_HEADERS_CUDA ggml-cuda.h)
|
||||||
@ -472,21 +486,6 @@ if (LLAMA_CUDA)
|
|||||||
else()
|
else()
|
||||||
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cuda_driver) # required by cuDeviceGetAttribute(), cuMemGetAllocationGranularity(...), ...
|
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cuda_driver) # required by cuDeviceGetAttribute(), cuMemGetAllocationGranularity(...), ...
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
|
|
||||||
# 52 == lowest CUDA 12 standard
|
|
||||||
# 60 == f16 CUDA intrinsics
|
|
||||||
# 61 == integer CUDA intrinsics
|
|
||||||
# 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster
|
|
||||||
if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
|
|
||||||
set(CMAKE_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA intrinsics
|
|
||||||
else()
|
|
||||||
set(CMAKE_CUDA_ARCHITECTURES "52;61;70") # lowest CUDA 12 standard + lowest for integer intrinsics
|
|
||||||
#set(CMAKE_CUDA_ARCHITECTURES "") # use this to compile much faster, but only F16 models work
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
|
|
||||||
|
|
||||||
else()
|
else()
|
||||||
message(WARNING "CUDA not found")
|
message(WARNING "CUDA not found")
|
||||||
endif()
|
endif()
|
||||||
|
Loading…
Reference in New Issue
Block a user