ci : enable -Werror for CUDA builds (#5579)

* cmake : pass -Werror through -Xcompiler

ggml-ci

* make, cmake : enable CUDA errors on warnings

ggml-ci
This commit is contained in:
Georgi Gerganov 2024-02-19 14:45:41 +02:00 committed by GitHub
parent 68a6b98b3c
commit d0e3ce51f4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 49 additions and 39 deletions

View File

@ -145,14 +145,6 @@ set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED) find_package(Threads REQUIRED)
include(CheckCXXCompilerFlag) include(CheckCXXCompilerFlag)
if (LLAMA_FATAL_WARNINGS)
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
add_compile_options(-Werror)
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
add_compile_options(/WX)
endif()
endif()
# enable libstdc++ assertions for debug builds # enable libstdc++ assertions for debug builds
if (CMAKE_SYSTEM_NAME MATCHES "Linux") if (CMAKE_SYSTEM_NAME MATCHES "Linux")
add_compile_definitions($<$<CONFIG:Debug>:_GLIBCXX_ASSERTIONS>) add_compile_definitions($<$<CONFIG:Debug>:_GLIBCXX_ASSERTIONS>)
@ -747,15 +739,24 @@ function(get_flags CCID CCVER)
set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE) set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE)
endfunction() endfunction()
if (LLAMA_FATAL_WARNINGS)
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
list(APPEND C_FLAGS -Werror)
list(APPEND CXX_FLAGS -Werror)
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
add_compile_options(/WX)
endif()
endif()
if (LLAMA_ALL_WARNINGS) if (LLAMA_ALL_WARNINGS)
if (NOT MSVC) if (NOT MSVC)
set(WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function) list(APPEND WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
set(C_FLAGS -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes list(APPEND C_FLAGS -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes
-Werror=implicit-int -Werror=implicit-function-declaration) -Werror=implicit-int -Werror=implicit-function-declaration)
set(CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn) list(APPEND CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn)
set(C_FLAGS ${WARNING_FLAGS} ${C_FLAGS}) list(APPEND C_FLAGS ${WARNING_FLAGS})
set(CXX_FLAGS ${WARNING_FLAGS} ${CXX_FLAGS}) list(APPEND CXX_FLAGS ${WARNING_FLAGS})
get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}) get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION})
@ -773,6 +774,10 @@ set(CUDA_CXX_FLAGS "")
if (LLAMA_CUBLAS) if (LLAMA_CUBLAS)
set(CUDA_FLAGS -use_fast_math) set(CUDA_FLAGS -use_fast_math)
if (LLAMA_FATAL_WARNINGS)
list(APPEND CUDA_FLAGS -Werror all-warnings)
endif()
if (LLAMA_ALL_WARNINGS AND NOT MSVC) if (LLAMA_ALL_WARNINGS AND NOT MSVC)
set(NVCC_CMD ${CMAKE_CUDA_COMPILER} .c) set(NVCC_CMD ${CMAKE_CUDA_COMPILER} .c)
if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "") if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "")

View File

@ -385,6 +385,9 @@ ifdef LLAMA_CUBLAS
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib
OBJS += ggml-cuda.o OBJS += ggml-cuda.o
MK_NVCCFLAGS += -use_fast_math MK_NVCCFLAGS += -use_fast_math
ifdef LLAMA_FATAL_WARNINGS
MK_NVCCFLAGS += -Werror all-warnings
endif # LLAMA_FATAL_WARNINGS
ifndef JETSON_EOL_MODULE_DETECT ifndef JETSON_EOL_MODULE_DETECT
MK_NVCCFLAGS += --forward-unknown-to-host-compiler MK_NVCCFLAGS += --forward-unknown-to-host-compiler
endif # JETSON_EOL_MODULE_DETECT endif # JETSON_EOL_MODULE_DETECT

View File

@ -651,18 +651,18 @@ static __device__ __forceinline__ float2 warp_reduce_sum(float2 a) {
return a; return a;
} }
static __device__ __forceinline__ half2 warp_reduce_sum(half2 a) { //static __device__ __forceinline__ half2 warp_reduce_sum(half2 a) {
#if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL //#if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL
#pragma unroll //#pragma unroll
for (int mask = 16; mask > 0; mask >>= 1) { // for (int mask = 16; mask > 0; mask >>= 1) {
a = __hadd2(a, __shfl_xor_sync(0xffffffff, a, mask, 32)); // a = __hadd2(a, __shfl_xor_sync(0xffffffff, a, mask, 32));
} // }
return a; // return a;
#else //#else
(void) a; // (void) a;
NO_DEVICE_CODE; // NO_DEVICE_CODE;
#endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL //#endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL
} //}
static __device__ __forceinline__ float warp_reduce_max(float x) { static __device__ __forceinline__ float warp_reduce_max(float x) {
#pragma unroll #pragma unroll
@ -672,18 +672,18 @@ static __device__ __forceinline__ float warp_reduce_max(float x) {
return x; return x;
} }
static __device__ __forceinline__ half2 warp_reduce_max(half2 x) { //static __device__ __forceinline__ half2 warp_reduce_max(half2 x) {
#if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL && CUDART_VERSION >= CUDART_HMAX //#if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL && CUDART_VERSION >= CUDART_HMAX
#pragma unroll //#pragma unroll
for (int mask = 16; mask > 0; mask >>= 1) { // for (int mask = 16; mask > 0; mask >>= 1) {
x = __hmax2(x, __shfl_xor_sync(0xffffffff, x, mask, 32)); // x = __hmax2(x, __shfl_xor_sync(0xffffffff, x, mask, 32));
} // }
return x; // return x;
#else //#else
(void) x; // (void) x;
NO_DEVICE_CODE; // NO_DEVICE_CODE;
#endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL && CUDART_VERSION >= CUDART_HMAX //#endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL && CUDART_VERSION >= CUDART_HMAX
} //}
static __device__ __forceinline__ float op_repeat(const float a, const float b) { static __device__ __forceinline__ float op_repeat(const float a, const float b) {
return b; return b;
@ -4641,10 +4641,12 @@ static __device__ __forceinline__ float vec_dot_iq2_xs_q8_1(
const float d = (float)bq2->d * __low2float(bq8_1[ib32].ds) * 0.25f; const float d = (float)bq2->d * __low2float(bq8_1[ib32].ds) * 0.25f;
return d * ((0.5f + ls1) * sumi1 + (0.5f + ls2) * sumi2); return d * ((0.5f + ls1) * sumi1 + (0.5f + ls2) * sumi2);
#else #else
(void) ksigns64;
assert(false); assert(false);
return 0.f; return 0.f;
#endif #endif
#else #else
(void) ksigns64;
assert(false); assert(false);
return 0.f; return 0.f;
#endif #endif