llama.cpp/Makefile

ifndef UNAME_S
UNAME_S := $(shell uname -s)
endif

ifndef UNAME_P
UNAME_P := $(shell uname -p)
endif

ifndef UNAME_M
UNAME_M := $(shell uname -m)
endif

CCV := $(shell $(CC) --version | head -n 1)
CXXV := $(shell $(CXX) --version | head -n 1)

# Mac OS + Arm can report x86_64
# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
ifeq ($(UNAME_S),Darwin)
	ifneq ($(UNAME_P),arm)
		SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null)
		ifeq ($(SYSCTL_M),1)
			# UNAME_P := arm
			# UNAME_M := arm64
			warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789)
		endif
	endif
endif

#
# Compile flags
#

# keep standard at C11 and C++11
CFLAGS   = -I.              -O3 -DNDEBUG -std=c11   -fPIC
CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC
LDFLAGS  =

# warnings
CFLAGS   += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith -Wno-unused-function
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function

# OS specific
# TODO: support Windows
ifeq ($(UNAME_S),Linux)
	CFLAGS   += -pthread
	CXXFLAGS += -pthread
endif
ifeq ($(UNAME_S),Darwin)
	CFLAGS   += -pthread
	CXXFLAGS += -pthread
endif
ifeq ($(UNAME_S),FreeBSD)
	CFLAGS   += -pthread
	CXXFLAGS += -pthread
endif
ifeq ($(UNAME_S),NetBSD)
	CFLAGS   += -pthread
	CXXFLAGS += -pthread
endif
ifeq ($(UNAME_S),OpenBSD)
	CFLAGS   += -pthread
	CXXFLAGS += -pthread
endif
ifeq ($(UNAME_S),Haiku)
	CFLAGS   += -pthread
	CXXFLAGS += -pthread
endif

# Architecture specific
# TODO: probably these flags need to be tweaked on some architectures
#       feel free to update the Makefile for your architecture and send a pull request or issue
ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
	# Use all CPU extensions that are available:
	CFLAGS += -march=native -mtune=native
	CXXFLAGS += -march=native -mtune=native
endif
ifneq ($(filter ppc64%,$(UNAME_M)),)
	POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
	ifneq (,$(findstring POWER9,$(POWER9_M)))
		CFLAGS += -mcpu=power9
		CXXFLAGS += -mcpu=power9
	endif
	# Require c++23's std::byteswap for big-endian support.
	ifeq ($(UNAME_M),ppc64)
		CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN
	endif
endif
ifndef LLAMA_NO_ACCELERATE
	# Mac M1 - include Accelerate framework.
	# `-framework Accelerate` works on Mac Intel as well, with negliable performance boost (as of the predict time).
	ifeq ($(UNAME_S),Darwin)
		CFLAGS  += -DGGML_USE_ACCELERATE
		LDFLAGS += -framework Accelerate
	endif
endif
ifdef LLAMA_OPENBLAS
	CFLAGS  += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
	LDFLAGS += -lopenblas
endif
ifdef LLAMA_GPROF
	CFLAGS   += -pg
	CXXFLAGS += -pg
endif
ifneq ($(filter aarch64%,$(UNAME_M)),)
	CFLAGS += -mcpu=native
	CXXFLAGS += -mcpu=native
endif
ifneq ($(filter armv6%,$(UNAME_M)),)
	# Raspberry Pi 1, 2, 3
	CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
endif
ifneq ($(filter armv7%,$(UNAME_M)),)
	# Raspberry Pi 4
	CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
endif
ifneq ($(filter armv8%,$(UNAME_M)),)
	# Raspberry Pi 4
	CFLAGS += -mfp16-format=ieee -mno-unaligned-access
endif

#
# Print build information
#

$(info I llama.cpp build info: )
$(info I UNAME_S:  $(UNAME_S))
$(info I UNAME_P:  $(UNAME_P))
$(info I UNAME_M:  $(UNAME_M))
$(info I CFLAGS:   $(CFLAGS))
$(info I CXXFLAGS: $(CXXFLAGS))
$(info I LDFLAGS:  $(LDFLAGS))
$(info I CC:       $(CCV))
$(info I CXX:      $(CXXV))
$(info )

default: main quantize perplexity embedding

#
# Build library
#

ggml.o: ggml.c ggml.h
	$(CC)  $(CFLAGS)   -c ggml.c -o ggml.o

llama.o: llama.cpp llama.h
	$(CXX) $(CXXFLAGS) -c llama.cpp -o llama.o

common.o: examples/common.cpp examples/common.h
	$(CXX) $(CXXFLAGS) -c examples/common.cpp -o common.o

clean:
	rm -vf *.o main quantize quantize-stats perplexity embedding

main: examples/main/main.cpp ggml.o llama.o common.o
	$(CXX) $(CXXFLAGS) examples/main/main.cpp ggml.o llama.o common.o -o main $(LDFLAGS)
	@echo
	@echo '====  Run ./main -h for help.  ===='
	@echo

quantize: examples/quantize/quantize.cpp ggml.o llama.o
	$(CXX) $(CXXFLAGS) examples/quantize/quantize.cpp ggml.o llama.o -o quantize $(LDFLAGS)

quantize-stats: examples/quantize-stats/quantize-stats.cpp ggml.o llama.o
	$(CXX) $(CXXFLAGS) examples/quantize-stats/quantize-stats.cpp ggml.o llama.o -o quantize-stats $(LDFLAGS)

perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o common.o
	$(CXX) $(CXXFLAGS) examples/perplexity/perplexity.cpp ggml.o llama.o common.o -o perplexity $(LDFLAGS)

embedding: examples/embedding/embedding.cpp ggml.o llama.o common.o
	$(CXX) $(CXXFLAGS) examples/embedding/embedding.cpp ggml.o llama.o common.o -o embedding $(LDFLAGS)

libllama.so: llama.o ggml.o
	$(CXX) $(CXXFLAGS) -shared -fPIC -o libllama.so llama.o ggml.o $(LDFLAGS)
#
# Tests
#

.PHONY: tests
tests:
	bash ./tests/run-tests.sh
Initial release 2023-03-10 19:40:58 +01:00			`ifndef UNAME_S`
			`UNAME_S := $(shell uname -s)`
			`endif`

			`ifndef UNAME_P`
			`UNAME_P := $(shell uname -p)`
			`endif`

			`ifndef UNAME_M`
			`UNAME_M := $(shell uname -m)`
			`endif`

			`CCV := $(shell $(CC) --version \| head -n 1)`
			`CXXV := $(shell $(CXX) --version \| head -n 1)`

			`# Mac OS + Arm can report x86_64`
			`# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789`
			`ifeq ($(UNAME_S),Darwin)`
			`ifneq ($(UNAME_P),arm)`
Makefile: slightly cleanup for Mac Intel; echo instead of run ./main -h (#335) 2023-03-21 16:44:11 +01:00			`SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null)`
Initial release 2023-03-10 19:40:58 +01:00			`ifeq ($(SYSCTL_M),1)`
			`# UNAME_P := arm`
			`# UNAME_M := arm64`
			`warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789)`
			`endif`
			`endif`
			`endif`

			`#`
			`# Compile flags`
			`#`

Add tokenizer test + revert to C++11 (#355) * Add test-tokenizer-0 to do a few tokenizations - feel free to expand * Added option to convert-pth-to-ggml.py script to dump just the vocabulary * Added ./models/ggml-vocab.bin containing just LLaMA vocab data (used for tests) * Added utility to load vocabulary file from previous point (temporary implementation) * Avoid using std::string_view and drop back to C++11 (hope I didn't break something) * Rename gpt_vocab -> llama_vocab * All CMake binaries go into ./bin/ now 2023-03-21 16:29:41 +01:00			`# keep standard at C11 and C++11`
Initial release 2023-03-10 19:40:58 +01:00			`CFLAGS = -I. -O3 -DNDEBUG -std=c11 -fPIC`
Add tokenizer test + revert to C++11 (#355) * Add test-tokenizer-0 to do a few tokenizations - feel free to expand * Added option to convert-pth-to-ggml.py script to dump just the vocabulary * Added ./models/ggml-vocab.bin containing just LLaMA vocab data (used for tests) * Added utility to load vocabulary file from previous point (temporary implementation) * Avoid using std::string_view and drop back to C++11 (hope I didn't break something) * Rename gpt_vocab -> llama_vocab * All CMake binaries go into ./bin/ now 2023-03-21 16:29:41 +01:00			`CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC`
Initial release 2023-03-10 19:40:58 +01:00			`LDFLAGS =`

all : be more strict about converting float to double (#458) * Be more strict about converting float to double * Test equivalence of round, SILU implementations Test module is commented out in CMakeLists.txt because the tests may take a long time, depending on how much the compiler optimizes. * Fix softmax in perplexity.cpp * all : prefer float over double where appropriate * perplexity : add <cmath> --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> 2023-03-28 18:48:20 +02:00			`# warnings`
			`CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith -Wno-unused-function`
			`CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function`

Initial release 2023-03-10 19:40:58 +01:00			`# OS specific`
			`# TODO: support Windows`
			`ifeq ($(UNAME_S),Linux)`
			`CFLAGS += -pthread`
			`CXXFLAGS += -pthread`
			`endif`
			`ifeq ($(UNAME_S),Darwin)`
			`CFLAGS += -pthread`
			`CXXFLAGS += -pthread`
			`endif`
			`ifeq ($(UNAME_S),FreeBSD)`
			`CFLAGS += -pthread`
			`CXXFLAGS += -pthread`
			`endif`
Add NetBSD support. (#90) 2023-03-13 17:40:54 +01:00			`ifeq ($(UNAME_S),NetBSD)`
			`CFLAGS += -pthread`
			`CXXFLAGS += -pthread`
			`endif`
Add OpenBSD support (#314) 2023-03-21 16:50:09 +01:00			`ifeq ($(UNAME_S),OpenBSD)`
			`CFLAGS += -pthread`
			`CXXFLAGS += -pthread`
			`endif`
Initial release 2023-03-10 19:40:58 +01:00			`ifeq ($(UNAME_S),Haiku)`
			`CFLAGS += -pthread`
			`CXXFLAGS += -pthread`
			`endif`

			`# Architecture specific`
			`# TODO: probably these flags need to be tweaked on some architectures`
			`# feel free to update the Makefile for your architecture and send a pull request or issue`
			`ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))`
make : use -march=native -mtune=native on x86 (#609) 2023-04-02 09:17:05 +02:00			`# Use all CPU extensions that are available:`
			`CFLAGS += -march=native -mtune=native`
make : missing host optimizations in CXXFLAGS (#763) 2023-04-05 16:38:37 +02:00			`CXXFLAGS += -march=native -mtune=native`
Initial release 2023-03-10 19:40:58 +01:00			`endif`
			`ifneq ($(filter ppc64%,$(UNAME_M)),)`
			`POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)`
			`ifneq (,$(findstring POWER9,$(POWER9_M)))`
additional optimizations for POWER9 (#454) 2023-03-24 16:19:26 +01:00			`CFLAGS += -mcpu=power9`
			`CXXFLAGS += -mcpu=power9`
Initial release 2023-03-10 19:40:58 +01:00			`endif`
			`# Require c++23's std::byteswap for big-endian support.`
			`ifeq ($(UNAME_M),ppc64)`
			`CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN`
			`endif`
			`endif`
Update Makefile var + add comment 2023-03-11 11:26:16 +01:00			`ifndef LLAMA_NO_ACCELERATE`
Makefile: slightly cleanup for Mac Intel; echo instead of run ./main -h (#335) 2023-03-21 16:44:11 +01:00			`# Mac M1 - include Accelerate framework.`
			# `-framework Accelerate` works on Mac Intel as well, with negliable performance boost (as of the predict time).
Initial release 2023-03-10 19:40:58 +01:00			`ifeq ($(UNAME_S),Darwin)`
			`CFLAGS += -DGGML_USE_ACCELERATE`
			`LDFLAGS += -framework Accelerate`
			`endif`
			`endif`
Update Makefile var + add comment 2023-03-11 11:26:16 +01:00			`ifdef LLAMA_OPENBLAS`
Initial release 2023-03-10 19:40:58 +01:00			`CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas`
			`LDFLAGS += -lopenblas`
			`endif`
Update Makefile var + add comment 2023-03-11 11:26:16 +01:00			`ifdef LLAMA_GPROF`
Initial release 2023-03-10 19:40:58 +01:00			`CFLAGS += -pg`
			`CXXFLAGS += -pg`
			`endif`
			`ifneq ($(filter aarch64%,$(UNAME_M)),)`
			`CFLAGS += -mcpu=native`
			`CXXFLAGS += -mcpu=native`
			`endif`
			`ifneq ($(filter armv6%,$(UNAME_M)),)`
			`# Raspberry Pi 1, 2, 3`
			`CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access`
			`endif`
			`ifneq ($(filter armv7%,$(UNAME_M)),)`
			`# Raspberry Pi 4`
			`CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations`
			`endif`
			`ifneq ($(filter armv8%,$(UNAME_M)),)`
			`# Raspberry Pi 4`
			`CFLAGS += -mfp16-format=ieee -mno-unaligned-access`
			`endif`

			`#`
			`# Print build information`
			`#`

			`$(info I llama.cpp build info: )`
			`$(info I UNAME_S: $(UNAME_S))`
			`$(info I UNAME_P: $(UNAME_P))`
			`$(info I UNAME_M: $(UNAME_M))`
			`$(info I CFLAGS: $(CFLAGS))`
			`$(info I CXXFLAGS: $(CXXFLAGS))`
			`$(info I LDFLAGS: $(LDFLAGS))`
			`$(info I CC: $(CCV))`
			`$(info I CXX: $(CXXV))`
			`$(info )`

Add embedding example to Makefile (#540) 2023-03-28 08:11:09 +02:00			`default: main quantize perplexity embedding`
Initial release 2023-03-10 19:40:58 +01:00
			`#`
			`# Build library`
			`#`

			`ggml.o: ggml.c ggml.h`
			`$(CC) $(CFLAGS) -c ggml.c -o ggml.o`

Introduce C-style API (#370) * Major refactoring - introduce C-style API * Clean up * Add <cassert> * Add <iterator> * Add <algorithm> .... * Fix timing reporting and accumulation * Measure eval time only for single-token calls * Change llama_tokenize return meaning 2023-03-22 06:32:36 +01:00			`llama.o: llama.cpp llama.h`
			`$(CXX) $(CXXFLAGS) -c llama.cpp -o llama.o`

Overhaul the examples structure - main -> examples - utils -> examples (renamed to "common") - quantize -> examples - separate tools for "perplexity" and "embedding" Hope I didn't break something ! 2023-03-25 19:26:40 +01:00			`common.o: examples/common.cpp examples/common.h`
			`$(CXX) $(CXXFLAGS) -c examples/common.cpp -o common.o`
Initial release 2023-03-10 19:40:58 +01:00
			`clean:`
Add quantize-stats command for testing quantization (#728) Command that calculates some statistics over the errors introduced by quantization, like mean square error, max error and some percentile errors for layer weights. Should be useful for testing quantization improvements. Exposes some internal state from ggml and llama for testing 2023-04-08 00:09:18 +02:00			`rm -vf *.o main quantize quantize-stats perplexity embedding`
Initial release 2023-03-10 19:40:58 +01:00
Overhaul the examples structure - main -> examples - utils -> examples (renamed to "common") - quantize -> examples - separate tools for "perplexity" and "embedding" Hope I didn't break something ! 2023-03-25 19:26:40 +01:00			`main: examples/main/main.cpp ggml.o llama.o common.o`
			`$(CXX) $(CXXFLAGS) examples/main/main.cpp ggml.o llama.o common.o -o main $(LDFLAGS)`
Fix Makefile echo escape codes (by removing them). (#418) 2023-03-23 12:41:32 +01:00			`@echo`
			`@echo '==== Run ./main -h for help. ===='`
			`@echo`
Initial release 2023-03-10 19:40:58 +01:00
Overhaul the examples structure - main -> examples - utils -> examples (renamed to "common") - quantize -> examples - separate tools for "perplexity" and "embedding" Hope I didn't break something ! 2023-03-25 19:26:40 +01:00			`quantize: examples/quantize/quantize.cpp ggml.o llama.o`
			`$(CXX) $(CXXFLAGS) examples/quantize/quantize.cpp ggml.o llama.o -o quantize $(LDFLAGS)`

Add quantize-stats command for testing quantization (#728) Command that calculates some statistics over the errors introduced by quantization, like mean square error, max error and some percentile errors for layer weights. Should be useful for testing quantization improvements. Exposes some internal state from ggml and llama for testing 2023-04-08 00:09:18 +02:00			`quantize-stats: examples/quantize-stats/quantize-stats.cpp ggml.o llama.o`
			`$(CXX) $(CXXFLAGS) examples/quantize-stats/quantize-stats.cpp ggml.o llama.o -o quantize-stats $(LDFLAGS)`

Overhaul the examples structure - main -> examples - utils -> examples (renamed to "common") - quantize -> examples - separate tools for "perplexity" and "embedding" Hope I didn't break something ! 2023-03-25 19:26:40 +01:00			`perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o common.o`
			`$(CXX) $(CXXFLAGS) examples/perplexity/perplexity.cpp ggml.o llama.o common.o -o perplexity $(LDFLAGS)`
Initial release 2023-03-10 19:40:58 +01:00
Add embedding example to Makefile (#540) 2023-03-28 08:11:09 +02:00			`embedding: examples/embedding/embedding.cpp ggml.o llama.o common.o`
			`$(CXX) $(CXXFLAGS) examples/embedding/embedding.cpp ggml.o llama.o common.o -o embedding $(LDFLAGS)`

make : add libllama.so target for llama-cpp-python (#797) I was able to get llama-cpp-python working but only when I build libllama.so with make. 2023-04-07 18:11:58 +02:00			`libllama.so: llama.o ggml.o`
			`$(CXX) $(CXXFLAGS) -shared -fPIC -o libllama.so llama.o ggml.o $(LDFLAGS)`
Initial release 2023-03-10 19:40:58 +01:00			`#`
			`# Tests`
			`#`

			`.PHONY: tests`
			`tests:`
			`bash ./tests/run-tests.sh`