2023-03-10 19:40:58 +01:00
i f n d e f U N A M E _ S
UNAME_S := $( shell uname -s)
e n d i f
i f n d e f U N A M E _ P
UNAME_P := $( shell uname -p)
e n d i f
i f n d e f U N A M E _ M
UNAME_M := $( shell uname -m)
e n d i f
CCV := $( shell $( CC) --version | head -n 1)
CXXV := $( shell $( CXX) --version | head -n 1)
# Mac OS + Arm can report x86_64
# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
i f e q ( $( UNAME_S ) , D a r w i n )
ifneq ( $( UNAME_P) ,arm)
SYSCTL_M := $( shell sysctl -n hw.optional.arm64)
ifeq ( $( SYSCTL_M) ,1)
# UNAME_P := arm
# UNAME_M := arm64
warn := $( warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\# issuecomment-1282546789)
endif
endif
e n d i f
#
# Compile flags
#
2023-03-28 19:36:25 +02:00
CFLAGS = -I. -O3 -DNDEBUG -std= c11 -fPIC
CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std= c++11 -fPIC
2023-03-10 19:40:58 +01:00
LDFLAGS =
# OS specific
# TODO: support Windows
i f e q ( $( UNAME_S ) , L i n u x )
CFLAGS += -pthread
CXXFLAGS += -pthread
e n d i f
i f e q ( $( UNAME_S ) , D a r w i n )
CFLAGS += -pthread
CXXFLAGS += -pthread
e n d i f
i f e q ( $( UNAME_S ) , F r e e B S D )
CFLAGS += -pthread
CXXFLAGS += -pthread
e n d i f
2023-03-13 17:40:54 +01:00
i f e q ( $( UNAME_S ) , N e t B S D )
CFLAGS += -pthread
CXXFLAGS += -pthread
e n d i f
2023-03-10 19:40:58 +01:00
i f e q ( $( UNAME_S ) , H a i k u )
CFLAGS += -pthread
CXXFLAGS += -pthread
e n d i f
# Architecture specific
# TODO: probably these flags need to be tweaked on some architectures
# feel free to update the Makefile for your architecture and send a pull request or issue
i f e q ( $( UNAME_M ) , $( filter $ ( UNAME_M ) ,x 86_ 64 i 686) )
ifeq ( $( UNAME_S) ,Darwin)
CFLAGS += -mf16c
AVX1_M := $( shell sysctl machdep.cpu.features)
ifneq ( ,$( findstring FMA,$( AVX1_M) ) )
CFLAGS += -mfma
endif
ifneq ( ,$( findstring AVX1.0,$( AVX1_M) ) )
CFLAGS += -mavx
endif
AVX2_M := $( shell sysctl machdep.cpu.leaf7_features)
ifneq ( ,$( findstring AVX2,$( AVX2_M) ) )
CFLAGS += -mavx2
endif
else ifeq ( $( UNAME_S) ,Linux)
AVX1_M := $( shell grep "avx " /proc/cpuinfo)
ifneq ( ,$( findstring avx,$( AVX1_M) ) )
CFLAGS += -mavx
endif
AVX2_M := $( shell grep "avx2 " /proc/cpuinfo)
ifneq ( ,$( findstring avx2,$( AVX2_M) ) )
CFLAGS += -mavx2
endif
FMA_M := $( shell grep "fma " /proc/cpuinfo)
ifneq ( ,$( findstring fma,$( FMA_M) ) )
CFLAGS += -mfma
endif
F16C_M := $( shell grep "f16c " /proc/cpuinfo)
ifneq ( ,$( findstring f16c,$( F16C_M) ) )
CFLAGS += -mf16c
endif
SSE3_M := $( shell grep "sse3 " /proc/cpuinfo)
ifneq ( ,$( findstring sse3,$( SSE3_M) ) )
CFLAGS += -msse3
endif
else ifeq ( $( UNAME_S) ,Haiku)
AVX1_M := $( shell sysinfo -cpu | grep "AVX " )
ifneq ( ,$( findstring avx,$( AVX1_M) ) )
CFLAGS += -mavx
endif
AVX2_M := $( shell sysinfo -cpu | grep "AVX2 " )
ifneq ( ,$( findstring avx2,$( AVX2_M) ) )
CFLAGS += -mavx2
endif
FMA_M := $( shell sysinfo -cpu | grep "FMA " )
ifneq ( ,$( findstring fma,$( FMA_M) ) )
CFLAGS += -mfma
endif
F16C_M := $( shell sysinfo -cpu | grep "F16C " )
ifneq ( ,$( findstring f16c,$( F16C_M) ) )
CFLAGS += -mf16c
endif
else
CFLAGS += -mfma -mf16c -mavx -mavx2
endif
e n d i f
i f e q ( $( UNAME_M ) , a m d 6 4 )
CFLAGS += -mavx -mavx2 -mfma -mf16c
e n d i f
i f n e q ( $( filter ppc 64%,$ ( UNAME_M ) ) , )
POWER9_M := $( shell grep "POWER9" /proc/cpuinfo)
ifneq ( ,$( findstring POWER9,$( POWER9_M) ) )
CFLAGS += -mpower9-vector
endif
# Require c++23's std::byteswap for big-endian support.
ifeq ( $( UNAME_M) ,ppc64)
CXXFLAGS += -std= c++23 -DGGML_BIG_ENDIAN
endif
e n d i f
2023-03-11 11:26:16 +01:00
i f n d e f L L A M A _ N O _ A C C E L E R A T E
2023-03-10 19:40:58 +01:00
# Mac M1 - include Accelerate framework
ifeq ( $( UNAME_S) ,Darwin)
CFLAGS += -DGGML_USE_ACCELERATE
LDFLAGS += -framework Accelerate
endif
e n d i f
2023-03-11 11:26:16 +01:00
i f d e f L L A M A _ O P E N B L A S
2023-03-10 19:40:58 +01:00
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
LDFLAGS += -lopenblas
e n d i f
2023-03-11 11:26:16 +01:00
i f d e f L L A M A _ G P R O F
2023-03-10 19:40:58 +01:00
CFLAGS += -pg
CXXFLAGS += -pg
e n d i f
i f n e q ( $( filter aarch 64%,$ ( UNAME_M ) ) , )
CFLAGS += -mcpu= native
CXXFLAGS += -mcpu= native
e n d i f
i f n e q ( $( filter armv 6%,$ ( UNAME_M ) ) , )
# Raspberry Pi 1, 2, 3
CFLAGS += -mfpu= neon-fp-armv8 -mfp16-format= ieee -mno-unaligned-access
e n d i f
i f n e q ( $( filter armv 7%,$ ( UNAME_M ) ) , )
# Raspberry Pi 4
CFLAGS += -mfpu= neon-fp-armv8 -mfp16-format= ieee -mno-unaligned-access -funsafe-math-optimizations
e n d i f
i f n e q ( $( filter armv 8%,$ ( UNAME_M ) ) , )
# Raspberry Pi 4
CFLAGS += -mfp16-format= ieee -mno-unaligned-access
e n d i f
#
# Print build information
#
$(info I llama.cpp build info : )
$(info I UNAME_S : $( UNAME_S ) )
$(info I UNAME_P : $( UNAME_P ) )
$(info I UNAME_M : $( UNAME_M ) )
$(info I CFLAGS : $( CFLAGS ) )
$(info I CXXFLAGS : $( CXXFLAGS ) )
$(info I LDFLAGS : $( LDFLAGS ) )
$(info I CC : $( CCV ) )
$(info I CXX : $( CXXV ) )
$( info )
default : main quantize
#
# Build library
#
ggml.o : ggml .c ggml .h
$( CC) $( CFLAGS) -c ggml.c -o ggml.o
2023-03-28 18:27:41 +02:00
mmap.o : mmap .c mmap .h
$( CC) $( CFLAGS) -c mmap.c -o mmap.o
2023-03-10 19:40:58 +01:00
utils.o : utils .cpp utils .h
$( CXX) $( CXXFLAGS) -c utils.cpp -o utils.o
clean :
rm -f *.o main quantize
2023-03-28 18:27:41 +02:00
main : main .cpp ggml .o utils .o mmap .o
$( CXX) $( CXXFLAGS) main.cpp ggml.o utils.o mmap.o -o main $( LDFLAGS)
2023-03-10 19:40:58 +01:00
./main -h
quantize : quantize .cpp ggml .o utils .o
$( CXX) $( CXXFLAGS) quantize.cpp ggml.o utils.o -o quantize $( LDFLAGS)
#
# Tests
#
.PHONY : tests
tests :
bash ./tests/run-tests.sh