mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-26 12:21:40 +01:00
ci : enable -Werror for CUDA builds (#5579)
* cmake : pass -Werror through -Xcompiler ggml-ci * make, cmake : enable CUDA errors on warnings ggml-ci
This commit is contained in:
parent
68a6b98b3c
commit
d0e3ce51f4
@ -145,14 +145,6 @@ set(THREADS_PREFER_PTHREAD_FLAG ON)
|
|||||||
find_package(Threads REQUIRED)
|
find_package(Threads REQUIRED)
|
||||||
include(CheckCXXCompilerFlag)
|
include(CheckCXXCompilerFlag)
|
||||||
|
|
||||||
if (LLAMA_FATAL_WARNINGS)
|
|
||||||
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
|
||||||
add_compile_options(-Werror)
|
|
||||||
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
|
||||||
add_compile_options(/WX)
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# enable libstdc++ assertions for debug builds
|
# enable libstdc++ assertions for debug builds
|
||||||
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||||
add_compile_definitions($<$<CONFIG:Debug>:_GLIBCXX_ASSERTIONS>)
|
add_compile_definitions($<$<CONFIG:Debug>:_GLIBCXX_ASSERTIONS>)
|
||||||
@ -747,15 +739,24 @@ function(get_flags CCID CCVER)
|
|||||||
set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE)
|
set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE)
|
||||||
endfunction()
|
endfunction()
|
||||||
|
|
||||||
|
if (LLAMA_FATAL_WARNINGS)
|
||||||
|
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
||||||
|
list(APPEND C_FLAGS -Werror)
|
||||||
|
list(APPEND CXX_FLAGS -Werror)
|
||||||
|
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||||
|
add_compile_options(/WX)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
if (LLAMA_ALL_WARNINGS)
|
if (LLAMA_ALL_WARNINGS)
|
||||||
if (NOT MSVC)
|
if (NOT MSVC)
|
||||||
set(WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
|
list(APPEND WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
|
||||||
set(C_FLAGS -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes
|
list(APPEND C_FLAGS -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes
|
||||||
-Werror=implicit-int -Werror=implicit-function-declaration)
|
-Werror=implicit-int -Werror=implicit-function-declaration)
|
||||||
set(CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn)
|
list(APPEND CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn)
|
||||||
|
|
||||||
set(C_FLAGS ${WARNING_FLAGS} ${C_FLAGS})
|
list(APPEND C_FLAGS ${WARNING_FLAGS})
|
||||||
set(CXX_FLAGS ${WARNING_FLAGS} ${CXX_FLAGS})
|
list(APPEND CXX_FLAGS ${WARNING_FLAGS})
|
||||||
|
|
||||||
get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION})
|
get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION})
|
||||||
|
|
||||||
@ -773,6 +774,10 @@ set(CUDA_CXX_FLAGS "")
|
|||||||
if (LLAMA_CUBLAS)
|
if (LLAMA_CUBLAS)
|
||||||
set(CUDA_FLAGS -use_fast_math)
|
set(CUDA_FLAGS -use_fast_math)
|
||||||
|
|
||||||
|
if (LLAMA_FATAL_WARNINGS)
|
||||||
|
list(APPEND CUDA_FLAGS -Werror all-warnings)
|
||||||
|
endif()
|
||||||
|
|
||||||
if (LLAMA_ALL_WARNINGS AND NOT MSVC)
|
if (LLAMA_ALL_WARNINGS AND NOT MSVC)
|
||||||
set(NVCC_CMD ${CMAKE_CUDA_COMPILER} .c)
|
set(NVCC_CMD ${CMAKE_CUDA_COMPILER} .c)
|
||||||
if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "")
|
if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "")
|
||||||
|
3
Makefile
3
Makefile
@ -385,6 +385,9 @@ ifdef LLAMA_CUBLAS
|
|||||||
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib
|
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib
|
||||||
OBJS += ggml-cuda.o
|
OBJS += ggml-cuda.o
|
||||||
MK_NVCCFLAGS += -use_fast_math
|
MK_NVCCFLAGS += -use_fast_math
|
||||||
|
ifdef LLAMA_FATAL_WARNINGS
|
||||||
|
MK_NVCCFLAGS += -Werror all-warnings
|
||||||
|
endif # LLAMA_FATAL_WARNINGS
|
||||||
ifndef JETSON_EOL_MODULE_DETECT
|
ifndef JETSON_EOL_MODULE_DETECT
|
||||||
MK_NVCCFLAGS += --forward-unknown-to-host-compiler
|
MK_NVCCFLAGS += --forward-unknown-to-host-compiler
|
||||||
endif # JETSON_EOL_MODULE_DETECT
|
endif # JETSON_EOL_MODULE_DETECT
|
||||||
|
50
ggml-cuda.cu
50
ggml-cuda.cu
@ -651,18 +651,18 @@ static __device__ __forceinline__ float2 warp_reduce_sum(float2 a) {
|
|||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __device__ __forceinline__ half2 warp_reduce_sum(half2 a) {
|
//static __device__ __forceinline__ half2 warp_reduce_sum(half2 a) {
|
||||||
#if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL
|
//#if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL
|
||||||
#pragma unroll
|
//#pragma unroll
|
||||||
for (int mask = 16; mask > 0; mask >>= 1) {
|
// for (int mask = 16; mask > 0; mask >>= 1) {
|
||||||
a = __hadd2(a, __shfl_xor_sync(0xffffffff, a, mask, 32));
|
// a = __hadd2(a, __shfl_xor_sync(0xffffffff, a, mask, 32));
|
||||||
}
|
// }
|
||||||
return a;
|
// return a;
|
||||||
#else
|
//#else
|
||||||
(void) a;
|
// (void) a;
|
||||||
NO_DEVICE_CODE;
|
// NO_DEVICE_CODE;
|
||||||
#endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL
|
//#endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL
|
||||||
}
|
//}
|
||||||
|
|
||||||
static __device__ __forceinline__ float warp_reduce_max(float x) {
|
static __device__ __forceinline__ float warp_reduce_max(float x) {
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
@ -672,18 +672,18 @@ static __device__ __forceinline__ float warp_reduce_max(float x) {
|
|||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __device__ __forceinline__ half2 warp_reduce_max(half2 x) {
|
//static __device__ __forceinline__ half2 warp_reduce_max(half2 x) {
|
||||||
#if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL && CUDART_VERSION >= CUDART_HMAX
|
//#if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL && CUDART_VERSION >= CUDART_HMAX
|
||||||
#pragma unroll
|
//#pragma unroll
|
||||||
for (int mask = 16; mask > 0; mask >>= 1) {
|
// for (int mask = 16; mask > 0; mask >>= 1) {
|
||||||
x = __hmax2(x, __shfl_xor_sync(0xffffffff, x, mask, 32));
|
// x = __hmax2(x, __shfl_xor_sync(0xffffffff, x, mask, 32));
|
||||||
}
|
// }
|
||||||
return x;
|
// return x;
|
||||||
#else
|
//#else
|
||||||
(void) x;
|
// (void) x;
|
||||||
NO_DEVICE_CODE;
|
// NO_DEVICE_CODE;
|
||||||
#endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL && CUDART_VERSION >= CUDART_HMAX
|
//#endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL && CUDART_VERSION >= CUDART_HMAX
|
||||||
}
|
//}
|
||||||
|
|
||||||
static __device__ __forceinline__ float op_repeat(const float a, const float b) {
|
static __device__ __forceinline__ float op_repeat(const float a, const float b) {
|
||||||
return b;
|
return b;
|
||||||
@ -4641,10 +4641,12 @@ static __device__ __forceinline__ float vec_dot_iq2_xs_q8_1(
|
|||||||
const float d = (float)bq2->d * __low2float(bq8_1[ib32].ds) * 0.25f;
|
const float d = (float)bq2->d * __low2float(bq8_1[ib32].ds) * 0.25f;
|
||||||
return d * ((0.5f + ls1) * sumi1 + (0.5f + ls2) * sumi2);
|
return d * ((0.5f + ls1) * sumi1 + (0.5f + ls2) * sumi2);
|
||||||
#else
|
#else
|
||||||
|
(void) ksigns64;
|
||||||
assert(false);
|
assert(false);
|
||||||
return 0.f;
|
return 0.f;
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
|
(void) ksigns64;
|
||||||
assert(false);
|
assert(false);
|
||||||
return 0.f;
|
return 0.f;
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
Reference in New Issue
Block a user