2023-03-10 20:40:58 +02:00
i f n d e f U N A M E _ S
UNAME_S := $( shell uname -s)
e n d i f
i f n d e f U N A M E _ P
UNAME_P := $( shell uname -p)
e n d i f
i f n d e f U N A M E _ M
UNAME_M := $( shell uname -m)
e n d i f
CCV := $( shell $( CC) --version | head -n 1)
CXXV := $( shell $( CXX) --version | head -n 1)
# Mac OS + Arm can report x86_64
# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
i f e q ( $( UNAME_S ) , D a r w i n )
ifneq ( $( UNAME_P) ,arm)
2023-03-21 23:44:11 +08:00
SYSCTL_M := $( shell sysctl -n hw.optional.arm64 2>/dev/null)
2023-03-10 20:40:58 +02:00
ifeq ( $( SYSCTL_M) ,1)
# UNAME_P := arm
# UNAME_M := arm64
warn := $( warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\# issuecomment-1282546789)
endif
endif
e n d i f
#
# Compile flags
#
2023-03-21 17:29:41 +02:00
# keep standard at C11 and C++11
2023-03-10 20:40:58 +02:00
CFLAGS = -I. -O3 -DNDEBUG -std= c11 -fPIC
2023-03-21 17:29:41 +02:00
CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std= c++11 -fPIC
2023-03-10 20:40:58 +02:00
LDFLAGS =
# OS specific
# TODO: support Windows
i f e q ( $( UNAME_S ) , L i n u x )
CFLAGS += -pthread
CXXFLAGS += -pthread
e n d i f
i f e q ( $( UNAME_S ) , D a r w i n )
CFLAGS += -pthread
CXXFLAGS += -pthread
e n d i f
i f e q ( $( UNAME_S ) , F r e e B S D )
CFLAGS += -pthread
CXXFLAGS += -pthread
e n d i f
2023-03-13 17:40:54 +01:00
i f e q ( $( UNAME_S ) , N e t B S D )
CFLAGS += -pthread
CXXFLAGS += -pthread
e n d i f
2023-03-21 09:50:09 -06:00
i f e q ( $( UNAME_S ) , O p e n B S D )
CFLAGS += -pthread
CXXFLAGS += -pthread
e n d i f
2023-03-10 20:40:58 +02:00
i f e q ( $( UNAME_S ) , H a i k u )
CFLAGS += -pthread
CXXFLAGS += -pthread
e n d i f
# Architecture specific
# TODO: probably these flags need to be tweaked on some architectures
# feel free to update the Makefile for your architecture and send a pull request or issue
i f e q ( $( UNAME_M ) , $( filter $ ( UNAME_M ) ,x 86_ 64 i 686) )
ifeq ( $( UNAME_S) ,Darwin)
CFLAGS += -mf16c
AVX1_M := $( shell sysctl machdep.cpu.features)
ifneq ( ,$( findstring FMA,$( AVX1_M) ) )
CFLAGS += -mfma
endif
ifneq ( ,$( findstring AVX1.0,$( AVX1_M) ) )
CFLAGS += -mavx
endif
AVX2_M := $( shell sysctl machdep.cpu.leaf7_features)
ifneq ( ,$( findstring AVX2,$( AVX2_M) ) )
CFLAGS += -mavx2
endif
else ifeq ( $( UNAME_S) ,Linux)
AVX1_M := $( shell grep "avx " /proc/cpuinfo)
ifneq ( ,$( findstring avx,$( AVX1_M) ) )
CFLAGS += -mavx
endif
AVX2_M := $( shell grep "avx2 " /proc/cpuinfo)
ifneq ( ,$( findstring avx2,$( AVX2_M) ) )
CFLAGS += -mavx2
endif
FMA_M := $( shell grep "fma " /proc/cpuinfo)
ifneq ( ,$( findstring fma,$( FMA_M) ) )
CFLAGS += -mfma
endif
F16C_M := $( shell grep "f16c " /proc/cpuinfo)
ifneq ( ,$( findstring f16c,$( F16C_M) ) )
CFLAGS += -mf16c
endif
SSE3_M := $( shell grep "sse3 " /proc/cpuinfo)
ifneq ( ,$( findstring sse3,$( SSE3_M) ) )
CFLAGS += -msse3
endif
2023-03-21 07:35:42 -07:00
AVX512F_M := $( shell grep "avx512f " /proc/cpuinfo)
ifneq ( ,$( findstring avx512f,$( AVX512F_M) ) )
CFLAGS += -mavx512f
endif
AVX512BW_M := $( shell grep "avx512bw " /proc/cpuinfo)
ifneq ( ,$( findstring avx512bw,$( AVX512BW_M) ) )
CFLAGS += -mavx512bw
endif
AVX512DQ_M := $( shell grep "avx512dq " /proc/cpuinfo)
ifneq ( ,$( findstring avx512dq,$( AVX512DQ_M) ) )
CFLAGS += -mavx512dq
endif
AVX512VL_M := $( shell grep "avx512vl " /proc/cpuinfo)
ifneq ( ,$( findstring avx512vl,$( AVX512VL_M) ) )
CFLAGS += -mavx512vl
endif
AVX512CD_M := $( shell grep "avx512cd " /proc/cpuinfo)
ifneq ( ,$( findstring avx512cd,$( AVX512CD_M) ) )
CFLAGS += -mavx512cd
endif
AVX512ER_M := $( shell grep "avx512er " /proc/cpuinfo)
ifneq ( ,$( findstring avx512er,$( AVX512ER_M) ) )
CFLAGS += -mavx512er
endif
AVX512IFMA_M := $( shell grep "avx512ifma " /proc/cpuinfo)
ifneq ( ,$( findstring avx512ifma,$( AVX512IFMA_M) ) )
CFLAGS += -mavx512ifma
endif
AVX512PF_M := $( shell grep "avx512pf " /proc/cpuinfo)
ifneq ( ,$( findstring avx512pf,$( AVX512PF_M) ) )
CFLAGS += -mavx512pf
endif
2023-03-10 20:40:58 +02:00
else ifeq ( $( UNAME_S) ,Haiku)
AVX1_M := $( shell sysinfo -cpu | grep "AVX " )
ifneq ( ,$( findstring avx,$( AVX1_M) ) )
CFLAGS += -mavx
endif
AVX2_M := $( shell sysinfo -cpu | grep "AVX2 " )
ifneq ( ,$( findstring avx2,$( AVX2_M) ) )
CFLAGS += -mavx2
endif
FMA_M := $( shell sysinfo -cpu | grep "FMA " )
ifneq ( ,$( findstring fma,$( FMA_M) ) )
CFLAGS += -mfma
endif
F16C_M := $( shell sysinfo -cpu | grep "F16C " )
ifneq ( ,$( findstring f16c,$( F16C_M) ) )
CFLAGS += -mf16c
endif
else
CFLAGS += -mfma -mf16c -mavx -mavx2
endif
e n d i f
i f n e q ( $( filter ppc 64%,$ ( UNAME_M ) ) , )
POWER9_M := $( shell grep "POWER9" /proc/cpuinfo)
ifneq ( ,$( findstring POWER9,$( POWER9_M) ) )
CFLAGS += -mpower9-vector
endif
# Require c++23's std::byteswap for big-endian support.
ifeq ( $( UNAME_M) ,ppc64)
CXXFLAGS += -std= c++23 -DGGML_BIG_ENDIAN
endif
e n d i f
2023-03-11 12:26:16 +02:00
i f n d e f L L A M A _ N O _ A C C E L E R A T E
2023-03-21 23:44:11 +08:00
# Mac M1 - include Accelerate framework.
# `-framework Accelerate` works on Mac Intel as well, with negliable performance boost (as of the predict time).
2023-03-10 20:40:58 +02:00
ifeq ( $( UNAME_S) ,Darwin)
CFLAGS += -DGGML_USE_ACCELERATE
LDFLAGS += -framework Accelerate
endif
e n d i f
2023-03-11 12:26:16 +02:00
i f d e f L L A M A _ O P E N B L A S
2023-03-10 20:40:58 +02:00
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
LDFLAGS += -lopenblas
e n d i f
2023-03-11 12:26:16 +02:00
i f d e f L L A M A _ G P R O F
2023-03-10 20:40:58 +02:00
CFLAGS += -pg
CXXFLAGS += -pg
e n d i f
i f n e q ( $( filter aarch 64%,$ ( UNAME_M ) ) , )
CFLAGS += -mcpu= native
CXXFLAGS += -mcpu= native
e n d i f
i f n e q ( $( filter armv 6%,$ ( UNAME_M ) ) , )
# Raspberry Pi 1, 2, 3
CFLAGS += -mfpu= neon-fp-armv8 -mfp16-format= ieee -mno-unaligned-access
e n d i f
i f n e q ( $( filter armv 7%,$ ( UNAME_M ) ) , )
# Raspberry Pi 4
CFLAGS += -mfpu= neon-fp-armv8 -mfp16-format= ieee -mno-unaligned-access -funsafe-math-optimizations
e n d i f
i f n e q ( $( filter armv 8%,$ ( UNAME_M ) ) , )
# Raspberry Pi 4
CFLAGS += -mfp16-format= ieee -mno-unaligned-access
e n d i f
#
# Print build information
#
$(info I llama.cpp build info : )
$(info I UNAME_S : $( UNAME_S ) )
$(info I UNAME_P : $( UNAME_P ) )
$(info I UNAME_M : $( UNAME_M ) )
$(info I CFLAGS : $( CFLAGS ) )
$(info I CXXFLAGS : $( CXXFLAGS ) )
$(info I LDFLAGS : $( LDFLAGS ) )
$(info I CC : $( CCV ) )
$(info I CXX : $( CXXV ) )
$( info )
default : main quantize
#
# Build library
#
ggml.o : ggml .c ggml .h
$( CC) $( CFLAGS) -c ggml.c -o ggml.o
utils.o : utils .cpp utils .h
$( CXX) $( CXXFLAGS) -c utils.cpp -o utils.o
clean :
rm -f *.o main quantize
main : main .cpp ggml .o utils .o
$( CXX) $( CXXFLAGS) main.cpp ggml.o utils.o -o main $( LDFLAGS)
2023-03-21 23:44:11 +08:00
@echo "\x1b[36mrun ./main -h for help\x1b[0m"
2023-03-10 20:40:58 +02:00
quantize : quantize .cpp ggml .o utils .o
$( CXX) $( CXXFLAGS) quantize.cpp ggml.o utils.o -o quantize $( LDFLAGS)
#
# Tests
#
.PHONY : tests
tests :
bash ./tests/run-tests.sh