mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 05:48:47 +01:00
build : detect host compiler and cuda compiler separately (#4414)
This commit is contained in:
parent
9fb13f9584
commit
70f806b821
@ -15,6 +15,9 @@ indent_size = 4
|
|||||||
[Makefile]
|
[Makefile]
|
||||||
indent_style = tab
|
indent_style = tab
|
||||||
|
|
||||||
|
[scripts/*.mk]
|
||||||
|
indent_style = tab
|
||||||
|
|
||||||
[prompts/*.txt]
|
[prompts/*.txt]
|
||||||
insert_final_newline = unset
|
insert_final_newline = unset
|
||||||
|
|
||||||
|
118
CMakeLists.txt
118
CMakeLists.txt
@ -397,58 +397,103 @@ if (LLAMA_HIPBLAS)
|
|||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (LLAMA_ALL_WARNINGS)
|
function(get_flags CCID CCVER)
|
||||||
if (NOT MSVC)
|
set(C_FLAGS "")
|
||||||
set(warning_flags -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
|
set(CXX_FLAGS "")
|
||||||
set(c_flags -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration)
|
|
||||||
set(cxx_flags -Wmissing-declarations -Wmissing-noreturn)
|
|
||||||
set(host_cxx_flags "")
|
|
||||||
|
|
||||||
if (CMAKE_C_COMPILER_ID MATCHES "Clang")
|
if (CCID MATCHES "Clang")
|
||||||
set(warning_flags ${warning_flags} -Wunreachable-code-break -Wunreachable-code-return)
|
set(C_FLAGS -Wunreachable-code-break -Wunreachable-code-return)
|
||||||
set(host_cxx_flags ${host_cxx_flags} -Wmissing-prototypes -Wextra-semi)
|
set(CXX_FLAGS -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi)
|
||||||
|
|
||||||
if (
|
if (
|
||||||
(CMAKE_C_COMPILER_ID STREQUAL "Clang" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 3.8.0) OR
|
(CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR
|
||||||
(CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 7.3.0)
|
(CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0)
|
||||||
)
|
)
|
||||||
set(c_flags ${c_flags} -Wdouble-promotion)
|
set(C_FLAGS ${C_FLAGS} -Wdouble-promotion)
|
||||||
endif()
|
endif()
|
||||||
elseif (CMAKE_C_COMPILER_ID STREQUAL "GNU")
|
elseif (CCID STREQUAL "GNU")
|
||||||
set(c_flags ${c_flags} -Wdouble-promotion)
|
set(C_FLAGS -Wdouble-promotion)
|
||||||
set(host_cxx_flags ${host_cxx_flags} -Wno-array-bounds)
|
set(CXX_FLAGS -Wno-array-bounds)
|
||||||
|
|
||||||
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 7.1.0)
|
if (CCVER VERSION_GREATER_EQUAL 7.1.0)
|
||||||
set(host_cxx_flags ${host_cxx_flags} -Wno-format-truncation)
|
set(CXX_FLAGS ${CXX_FLAGS} -Wno-format-truncation)
|
||||||
endif()
|
endif()
|
||||||
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 8.1.0)
|
if (CCVER VERSION_GREATER_EQUAL 8.1.0)
|
||||||
set(host_cxx_flags ${host_cxx_flags} -Wextra-semi)
|
set(CXX_FLAGS ${CXX_FLAGS} -Wextra-semi)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
set(GF_C_FLAGS ${C_FLAGS} PARENT_SCOPE)
|
||||||
|
set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE)
|
||||||
|
endfunction()
|
||||||
|
|
||||||
|
if (LLAMA_ALL_WARNINGS)
|
||||||
|
if (NOT MSVC)
|
||||||
|
set(WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
|
||||||
|
set(C_FLAGS -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes
|
||||||
|
-Werror=implicit-int -Werror=implicit-function-declaration)
|
||||||
|
set(CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn)
|
||||||
|
|
||||||
|
set(C_FLAGS ${WARNING_FLAGS} ${C_FLAGS})
|
||||||
|
set(CXX_FLAGS ${WARNING_FLAGS} ${CXX_FLAGS})
|
||||||
|
|
||||||
|
get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION})
|
||||||
|
|
||||||
|
add_compile_options("$<$<COMPILE_LANGUAGE:C>:${C_FLAGS};${GF_C_FLAGS}>"
|
||||||
|
"$<$<COMPILE_LANGUAGE:CXX>:${CXX_FLAGS};${GF_CXX_FLAGS}>")
|
||||||
else()
|
else()
|
||||||
# todo : msvc
|
# todo : msvc
|
||||||
|
set(C_FLAGS "")
|
||||||
|
set(CXX_FLAGS "")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (LLAMA_CUBLAS)
|
||||||
|
set(CUDA_FLAGS ${CXX_FLAGS} -use_fast_math)
|
||||||
|
if (NOT MSVC)
|
||||||
|
set(CUDA_FLAGS ${CUDA_FLAGS} -Wno-pedantic)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
set(c_flags ${c_flags} ${warning_flags})
|
if (LLAMA_ALL_WARNINGS AND NOT MSVC)
|
||||||
set(cxx_flags ${cxx_flags} ${warning_flags})
|
set(NVCC_CMD ${CMAKE_CUDA_COMPILER} .c)
|
||||||
add_compile_options("$<$<COMPILE_LANGUAGE:C>:${c_flags}>"
|
if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "")
|
||||||
"$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>"
|
set(NVCC_CMD ${NVCC_CMD} -ccbin ${CMAKE_CUDA_HOST_COMPILER})
|
||||||
"$<$<COMPILE_LANGUAGE:CXX>:${host_cxx_flags}>")
|
endif()
|
||||||
|
|
||||||
|
execute_process(
|
||||||
|
COMMAND ${NVCC_CMD} -Xcompiler --version
|
||||||
|
OUTPUT_VARIABLE CUDA_CCFULLVER
|
||||||
|
ERROR_QUIET
|
||||||
|
)
|
||||||
|
|
||||||
|
if (NOT CUDA_CCFULLVER MATCHES clang)
|
||||||
|
set(CUDA_CCID "GNU")
|
||||||
|
execute_process(
|
||||||
|
COMMAND ${NVCC_CMD} -Xcompiler "-dumpfullversion -dumpversion"
|
||||||
|
OUTPUT_VARIABLE CUDA_CCVER
|
||||||
|
ERROR_QUIET
|
||||||
|
)
|
||||||
|
else()
|
||||||
|
if (CUDA_CCFULLVER MATCHES Apple)
|
||||||
|
set(CUDA_CCID "AppleClang")
|
||||||
|
else()
|
||||||
|
set(CUDA_CCID "Clang")
|
||||||
|
endif()
|
||||||
|
string(REGEX REPLACE "^.* version ([0-9.]*).*$" "\\1" CUDA_CCVER ${CUDA_CCFULLVER})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
message("-- CUDA host compiler is ${CUDA_CCID} ${CUDA_CCVER}")
|
||||||
|
|
||||||
|
get_flags(${CUDA_CCID} ${CUDA_CCVER})
|
||||||
|
list(JOIN GF_CXX_FLAGS " " CUDA_CXX_FLAGS) # pass host compiler flags as a single argument
|
||||||
|
if (NOT CUDA_CXX_FLAGS STREQUAL "")
|
||||||
|
set(CUDA_FLAGS ${CUDA_FLAGS} -Xcompiler ${CUDA_CXX_FLAGS})
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS}>")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (NOT MSVC)
|
|
||||||
set(cuda_flags -Wno-pedantic)
|
|
||||||
endif()
|
|
||||||
set(cuda_flags ${cxx_flags} -use_fast_math ${cuda_flags})
|
|
||||||
|
|
||||||
list(JOIN host_cxx_flags " " cuda_host_flags) # pass host compiler flags as a single argument
|
|
||||||
if (NOT cuda_host_flags STREQUAL "")
|
|
||||||
set(cuda_flags ${cuda_flags} -Xcompiler ${cuda_host_flags})
|
|
||||||
endif()
|
|
||||||
|
|
||||||
add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${cuda_flags}>")
|
|
||||||
|
|
||||||
if (WIN32)
|
if (WIN32)
|
||||||
add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
|
add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
|
||||||
|
|
||||||
@ -471,6 +516,7 @@ endif()
|
|||||||
execute_process(
|
execute_process(
|
||||||
COMMAND ${CMAKE_C_COMPILER} ${CMAKE_EXE_LINKER_FLAGS} -Wl,-v
|
COMMAND ${CMAKE_C_COMPILER} ${CMAKE_EXE_LINKER_FLAGS} -Wl,-v
|
||||||
ERROR_VARIABLE output
|
ERROR_VARIABLE output
|
||||||
|
OUTPUT_QUIET
|
||||||
)
|
)
|
||||||
if (output MATCHES "dyld-1015\.7")
|
if (output MATCHES "dyld-1015\.7")
|
||||||
add_compile_definitions(HAVE_BUGGY_APPLE_LINKER)
|
add_compile_definitions(HAVE_BUGGY_APPLE_LINKER)
|
||||||
|
108
Makefile
108
Makefile
@ -26,20 +26,6 @@ ifndef UNAME_M
|
|||||||
UNAME_M := $(shell uname -m)
|
UNAME_M := $(shell uname -m)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq '' '$(findstring clang,$(shell $(CC) --version))'
|
|
||||||
CC_IS_GCC=1
|
|
||||||
CC_VER := $(shell $(CC) -dumpfullversion -dumpversion | awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }')
|
|
||||||
else
|
|
||||||
CC_IS_CLANG=1
|
|
||||||
ifeq '' '$(findstring Apple,$(shell $(CC) --version))'
|
|
||||||
CC_IS_LLVM_CLANG=1
|
|
||||||
else
|
|
||||||
CC_IS_APPLE_CLANG=1
|
|
||||||
endif
|
|
||||||
CC_VER := $(shell $(CC) --version | sed -n 's/^.* version \([0-9.]*\).*$$/\1/p' \
|
|
||||||
| awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }')
|
|
||||||
endif
|
|
||||||
|
|
||||||
# Mac OS + Arm can report x86_64
|
# Mac OS + Arm can report x86_64
|
||||||
# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
|
# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
|
||||||
ifeq ($(UNAME_S),Darwin)
|
ifeq ($(UNAME_S),Darwin)
|
||||||
@ -122,8 +108,8 @@ MK_CXXFLAGS = -std=c++11 -fPIC
|
|||||||
# -Ofast tends to produce faster code, but may not be available for some compilers.
|
# -Ofast tends to produce faster code, but may not be available for some compilers.
|
||||||
ifdef LLAMA_FAST
|
ifdef LLAMA_FAST
|
||||||
MK_CFLAGS += -Ofast
|
MK_CFLAGS += -Ofast
|
||||||
MK_HOST_CXXFLAGS += -Ofast
|
HOST_CXXFLAGS += -Ofast
|
||||||
MK_CUDA_CXXFLAGS += -O3
|
MK_NVCCFLAGS += -O3
|
||||||
else
|
else
|
||||||
MK_CFLAGS += -O3
|
MK_CFLAGS += -O3
|
||||||
MK_CXXFLAGS += -O3
|
MK_CXXFLAGS += -O3
|
||||||
@ -220,30 +206,6 @@ MK_CFLAGS += $(WARN_FLAGS) -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmis
|
|||||||
-Werror=implicit-function-declaration
|
-Werror=implicit-function-declaration
|
||||||
MK_CXXFLAGS += $(WARN_FLAGS) -Wmissing-declarations -Wmissing-noreturn
|
MK_CXXFLAGS += $(WARN_FLAGS) -Wmissing-declarations -Wmissing-noreturn
|
||||||
|
|
||||||
ifeq ($(CC_IS_CLANG), 1)
|
|
||||||
# clang options
|
|
||||||
MK_CFLAGS += -Wunreachable-code-break -Wunreachable-code-return
|
|
||||||
MK_HOST_CXXFLAGS += -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi
|
|
||||||
|
|
||||||
ifneq '' '$(and $(CC_IS_LLVM_CLANG),$(filter 1,$(shell expr $(CC_VER) \>= 030800)))'
|
|
||||||
MK_CFLAGS += -Wdouble-promotion
|
|
||||||
endif
|
|
||||||
ifneq '' '$(and $(CC_IS_APPLE_CLANG),$(filter 1,$(shell expr $(CC_VER) \>= 070300)))'
|
|
||||||
MK_CFLAGS += -Wdouble-promotion
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
# gcc options
|
|
||||||
MK_CFLAGS += -Wdouble-promotion
|
|
||||||
MK_HOST_CXXFLAGS += -Wno-array-bounds
|
|
||||||
|
|
||||||
ifeq ($(shell expr $(CC_VER) \>= 070100), 1)
|
|
||||||
MK_HOST_CXXFLAGS += -Wno-format-truncation
|
|
||||||
endif
|
|
||||||
ifeq ($(shell expr $(CC_VER) \>= 080100), 1)
|
|
||||||
MK_HOST_CXXFLAGS += -Wextra-semi
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
# this version of Apple ld64 is buggy
|
# this version of Apple ld64 is buggy
|
||||||
ifneq '' '$(findstring dyld-1015.7,$(shell $(CC) $(LDFLAGS) -Wl,-v 2>&1))'
|
ifneq '' '$(findstring dyld-1015.7,$(shell $(CC) $(LDFLAGS) -Wl,-v 2>&1))'
|
||||||
MK_CPPFLAGS += -DHAVE_BUGGY_APPLE_LINKER
|
MK_CPPFLAGS += -DHAVE_BUGGY_APPLE_LINKER
|
||||||
@ -295,7 +257,7 @@ ifndef RISCV
|
|||||||
ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
|
ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
|
||||||
# Use all CPU extensions that are available:
|
# Use all CPU extensions that are available:
|
||||||
MK_CFLAGS += -march=native -mtune=native
|
MK_CFLAGS += -march=native -mtune=native
|
||||||
MK_HOST_CXXFLAGS += -march=native -mtune=native
|
HOST_CXXFLAGS += -march=native -mtune=native
|
||||||
|
|
||||||
# Usage AVX-only
|
# Usage AVX-only
|
||||||
#MK_CFLAGS += -mfma -mf16c -mavx
|
#MK_CFLAGS += -mfma -mf16c -mavx
|
||||||
@ -398,10 +360,10 @@ ifdef LLAMA_CUBLAS
|
|||||||
MK_CPPFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
|
MK_CPPFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
|
||||||
MK_LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib
|
MK_LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib
|
||||||
OBJS += ggml-cuda.o
|
OBJS += ggml-cuda.o
|
||||||
NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math
|
MK_NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math
|
||||||
|
|
||||||
ifdef LLAMA_DEBUG
|
ifdef LLAMA_DEBUG
|
||||||
NVCCFLAGS += -lineinfo
|
MK_NVCCFLAGS += -lineinfo
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifdef LLAMA_CUDA_NVCC
|
ifdef LLAMA_CUDA_NVCC
|
||||||
@ -410,54 +372,52 @@ else
|
|||||||
NVCC = nvcc
|
NVCC = nvcc
|
||||||
endif #LLAMA_CUDA_NVCC
|
endif #LLAMA_CUDA_NVCC
|
||||||
ifdef CUDA_DOCKER_ARCH
|
ifdef CUDA_DOCKER_ARCH
|
||||||
NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
|
MK_NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
|
||||||
else ifdef CUDA_POWER_ARCH
|
else ifndef CUDA_POWER_ARCH
|
||||||
NVCCFLAGS +=
|
MK_NVCCFLAGS += -arch=native
|
||||||
else
|
|
||||||
NVCCFLAGS += -arch=native
|
|
||||||
endif # CUDA_DOCKER_ARCH
|
endif # CUDA_DOCKER_ARCH
|
||||||
ifdef LLAMA_CUDA_FORCE_DMMV
|
ifdef LLAMA_CUDA_FORCE_DMMV
|
||||||
NVCCFLAGS += -DGGML_CUDA_FORCE_DMMV
|
MK_NVCCFLAGS += -DGGML_CUDA_FORCE_DMMV
|
||||||
endif # LLAMA_CUDA_FORCE_DMMV
|
endif # LLAMA_CUDA_FORCE_DMMV
|
||||||
ifdef LLAMA_CUDA_FORCE_MMQ
|
ifdef LLAMA_CUDA_FORCE_MMQ
|
||||||
NVCCFLAGS += -DGGML_CUDA_FORCE_MMQ
|
MK_NVCCFLAGS += -DGGML_CUDA_FORCE_MMQ
|
||||||
endif # LLAMA_CUDA_FORCE_MMQ
|
endif # LLAMA_CUDA_FORCE_MMQ
|
||||||
ifdef LLAMA_CUDA_DMMV_X
|
ifdef LLAMA_CUDA_DMMV_X
|
||||||
NVCCFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
|
MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
|
||||||
else
|
else
|
||||||
NVCCFLAGS += -DGGML_CUDA_DMMV_X=32
|
MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X=32
|
||||||
endif # LLAMA_CUDA_DMMV_X
|
endif # LLAMA_CUDA_DMMV_X
|
||||||
ifdef LLAMA_CUDA_MMV_Y
|
ifdef LLAMA_CUDA_MMV_Y
|
||||||
NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
|
MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
|
||||||
else ifdef LLAMA_CUDA_DMMV_Y
|
else ifdef LLAMA_CUDA_DMMV_Y
|
||||||
NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_DMMV_Y) # for backwards compatibility
|
MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_DMMV_Y) # for backwards compatibility
|
||||||
else
|
else
|
||||||
NVCCFLAGS += -DGGML_CUDA_MMV_Y=1
|
MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=1
|
||||||
endif # LLAMA_CUDA_MMV_Y
|
endif # LLAMA_CUDA_MMV_Y
|
||||||
ifdef LLAMA_CUDA_F16
|
ifdef LLAMA_CUDA_F16
|
||||||
NVCCFLAGS += -DGGML_CUDA_F16
|
MK_NVCCFLAGS += -DGGML_CUDA_F16
|
||||||
endif # LLAMA_CUDA_F16
|
endif # LLAMA_CUDA_F16
|
||||||
ifdef LLAMA_CUDA_DMMV_F16
|
ifdef LLAMA_CUDA_DMMV_F16
|
||||||
NVCCFLAGS += -DGGML_CUDA_F16
|
MK_NVCCFLAGS += -DGGML_CUDA_F16
|
||||||
endif # LLAMA_CUDA_DMMV_F16
|
endif # LLAMA_CUDA_DMMV_F16
|
||||||
ifdef LLAMA_CUDA_KQUANTS_ITER
|
ifdef LLAMA_CUDA_KQUANTS_ITER
|
||||||
NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
|
MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
|
||||||
else
|
else
|
||||||
NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2
|
MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2
|
||||||
endif
|
endif
|
||||||
ifdef LLAMA_CUDA_PEER_MAX_BATCH_SIZE
|
ifdef LLAMA_CUDA_PEER_MAX_BATCH_SIZE
|
||||||
NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(LLAMA_CUDA_PEER_MAX_BATCH_SIZE)
|
MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(LLAMA_CUDA_PEER_MAX_BATCH_SIZE)
|
||||||
else
|
else
|
||||||
NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128
|
MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128
|
||||||
endif # LLAMA_CUDA_PEER_MAX_BATCH_SIZE
|
endif # LLAMA_CUDA_PEER_MAX_BATCH_SIZE
|
||||||
#ifdef LLAMA_CUDA_CUBLAS
|
#ifdef LLAMA_CUDA_CUBLAS
|
||||||
# NVCCFLAGS += -DGGML_CUDA_CUBLAS
|
# MK_NVCCFLAGS += -DGGML_CUDA_CUBLAS
|
||||||
#endif # LLAMA_CUDA_CUBLAS
|
#endif # LLAMA_CUDA_CUBLAS
|
||||||
ifdef LLAMA_CUDA_CCBIN
|
ifdef LLAMA_CUDA_CCBIN
|
||||||
NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
|
MK_NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
|
||||||
endif
|
endif
|
||||||
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
|
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
|
||||||
$(NVCC) $(NVCCFLAGS) -c $< -o $@
|
$(NVCC) $(BASE_CXXFLAGS) $(NVCCFLAGS) -Wno-pedantic -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
|
||||||
endif # LLAMA_CUBLAS
|
endif # LLAMA_CUBLAS
|
||||||
|
|
||||||
ifdef LLAMA_CLBLAST
|
ifdef LLAMA_CLBLAST
|
||||||
@ -519,16 +479,22 @@ ggml-mpi.o: ggml-mpi.c ggml-mpi.h
|
|||||||
$(CC) $(CFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) -c $< -o $@
|
||||||
endif # LLAMA_MPI
|
endif # LLAMA_MPI
|
||||||
|
|
||||||
|
GF_CC := $(CC)
|
||||||
|
include scripts/get-flags.mk
|
||||||
|
|
||||||
# combine build flags with cmdline overrides
|
# combine build flags with cmdline overrides
|
||||||
override CFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS) $(MK_CFLAGS) $(CFLAGS)
|
override CFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS) $(MK_CFLAGS) $(GF_CFLAGS) $(CFLAGS)
|
||||||
override CXXFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS) $(MK_CXXFLAGS) $(CXXFLAGS)
|
BASE_CXXFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS) $(MK_CXXFLAGS) $(CXXFLAGS)
|
||||||
override CUDA_CXXFLAGS := $(MK_CUDA_CXXFLAGS) $(CUDA_CXXFLAGS)
|
override CXXFLAGS := $(BASE_CXXFLAGS) $(HOST_CXXFLAGS) $(GF_CXXFLAGS)
|
||||||
override HOST_CXXFLAGS := $(MK_HOST_CXXFLAGS) $(HOST_CXXFLAGS)
|
override NVCCFLAGS := $(MK_NVCCFLAGS) $(NVCCFLAGS)
|
||||||
override LDFLAGS := $(MK_LDFLAGS) $(LDFLAGS)
|
override LDFLAGS := $(MK_LDFLAGS) $(LDFLAGS)
|
||||||
|
|
||||||
# save CXXFLAGS before we add host-only options
|
# identify CUDA host compiler
|
||||||
NVCCFLAGS := $(NVCCFLAGS) $(CXXFLAGS) $(CUDA_CXXFLAGS) -Wno-pedantic -Xcompiler "$(HOST_CXXFLAGS)"
|
ifdef LLAMA_CUBLAS
|
||||||
override CXXFLAGS += $(HOST_CXXFLAGS)
|
GF_CC := $(NVCC) $(NVCCFLAGS) 2>/dev/null .c -Xcompiler
|
||||||
|
include scripts/get-flags.mk
|
||||||
|
CUDA_CXXFLAGS := $(GF_CXXFLAGS)
|
||||||
|
endif
|
||||||
|
|
||||||
#
|
#
|
||||||
# Print build information
|
# Print build information
|
||||||
|
38
scripts/get-flags.mk
Normal file
38
scripts/get-flags.mk
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
ifeq '' '$(findstring clang,$(shell $(GF_CC) --version))'
|
||||||
|
GF_CC_IS_GCC = 1
|
||||||
|
GF_CC_VER := $(shell { $(GF_CC) -dumpfullversion 2>/dev/null || $(GF_CC) -dumpversion; } | awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }')
|
||||||
|
else
|
||||||
|
GF_CC_IS_CLANG = 1
|
||||||
|
ifeq '' '$(findstring Apple,$(shell $(GF_CC) --version))'
|
||||||
|
GF_CC_IS_LLVM_CLANG = 1
|
||||||
|
else
|
||||||
|
GF_CC_IS_APPLE_CLANG = 1
|
||||||
|
endif
|
||||||
|
GF_CC_VER := \
|
||||||
|
$(shell $(GF_CC) --version | sed -n 's/^.* version \([0-9.]*\).*$$/\1/p' \
|
||||||
|
| awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }')
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(GF_CC_IS_CLANG), 1)
|
||||||
|
# clang options
|
||||||
|
GF_CFLAGS = -Wunreachable-code-break -Wunreachable-code-return
|
||||||
|
GF_CXXFLAGS = -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi
|
||||||
|
|
||||||
|
ifneq '' '$(and $(GF_CC_IS_LLVM_CLANG),$(filter 1,$(shell expr $(GF_CC_VER) \>= 030800)))'
|
||||||
|
GF_CFLAGS += -Wdouble-promotion
|
||||||
|
endif
|
||||||
|
ifneq '' '$(and $(GF_CC_IS_APPLE_CLANG),$(filter 1,$(shell expr $(GF_CC_VER) \>= 070300)))'
|
||||||
|
GF_CFLAGS += -Wdouble-promotion
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
# gcc options
|
||||||
|
GF_CFLAGS = -Wdouble-promotion
|
||||||
|
GF_CXXFLAGS = -Wno-array-bounds
|
||||||
|
|
||||||
|
ifeq ($(shell expr $(GF_CC_VER) \>= 070100), 1)
|
||||||
|
GF_CXXFLAGS += -Wno-format-truncation
|
||||||
|
endif
|
||||||
|
ifeq ($(shell expr $(GF_CC_VER) \>= 080100), 1)
|
||||||
|
GF_CXXFLAGS += -Wextra-semi
|
||||||
|
endif
|
||||||
|
endif
|
Loading…
Reference in New Issue
Block a user