mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-27 06:39:25 +01:00
60ce97c9d8
add intel amx isa detection add vnni kernel for gemv cases add vnni and amx kernel support for block_q8_0 code cleanup fix packing B issue enable openmp fine tune amx kernel switch to aten parallel pattern add error message for nested parallelism code cleanup add f16 support in ggml-amx add amx kernels for QK_K quant formats: Q4_K, Q5_K, Q6_K and IQ4_XS update CMakeList update README fix some compilation warning fix compiler warning when amx is not enabled minor change ggml-ci move ggml_amx_init from ggml.c to ggml-amx/mmq.cpp ggml-ci update CMakeLists with -mamx-tile, -mamx-int8 and -mamx-bf16 ggml-ci add amx as an ggml-backend update header file, the old path for immintrin.h has changed to ggml-cpu-impl.h minor change update CMakeLists.txt minor change apply weight prepacking in set_tensor method in ggml-backend fix compile error ggml-ci minor change ggml-ci update CMakeLists.txt ggml-ci add march dependency minor change ggml-ci change ggml_backend_buffer_is_host to return false for amx backend ggml-ci fix supports_op use device reg for AMX backend ggml-ci minor change ggml-ci minor change fix rebase set .buffer_from_host_ptr to be false for AMX backend
217 lines
6.7 KiB
CMake
217 lines
6.7 KiB
CMake
cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories.
|
|
project("llama.cpp" C CXX)
|
|
include(CheckIncludeFileCXX)
|
|
|
|
#set(CMAKE_WARN_DEPRECATED YES)
|
|
set(CMAKE_WARN_UNUSED_CLI YES)
|
|
|
|
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
|
|
|
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
|
|
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
|
|
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
|
|
endif()
|
|
|
|
# Add path to modules
|
|
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
|
|
|
|
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
|
|
|
if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
|
|
set(LLAMA_STANDALONE ON)
|
|
|
|
include(git-vars)
|
|
|
|
# configure project version
|
|
# TODO
|
|
else()
|
|
set(LLAMA_STANDALONE OFF)
|
|
endif()
|
|
|
|
if (EMSCRIPTEN)
|
|
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
|
|
|
option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" ON)
|
|
else()
|
|
if (MINGW)
|
|
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
|
else()
|
|
set(BUILD_SHARED_LIBS_DEFAULT ON)
|
|
endif()
|
|
endif()
|
|
|
|
option(BUILD_SHARED_LIBS "build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
|
|
|
|
if (WIN32)
|
|
add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
|
|
endif()
|
|
|
|
#
|
|
# option list
|
|
#
|
|
|
|
# debug
|
|
option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON)
|
|
option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF)
|
|
|
|
# build
|
|
option(LLAMA_FATAL_WARNINGS "llama: enable -Werror flag" OFF)
|
|
|
|
# sanitizers
|
|
option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF)
|
|
option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF)
|
|
option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF)
|
|
|
|
# utils
|
|
option(LLAMA_BUILD_COMMON "llama: build common utils library" ${LLAMA_STANDALONE})
|
|
|
|
# extra artifacts
|
|
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
|
|
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
|
|
option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE})
|
|
|
|
# 3rd party libs
|
|
option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF)
|
|
|
|
# Required for relocatable CMake package
|
|
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
|
|
|
|
# override ggml options
|
|
set(GGML_SANITIZE_THREAD ${LLAMA_SANITIZE_THREAD})
|
|
set(GGML_SANITIZE_ADDRESS ${LLAMA_SANITIZE_ADDRESS})
|
|
set(GGML_SANITIZE_UNDEFINED ${LLAMA_SANITIZE_UNDEFINED})
|
|
set(GGML_ALL_WARNINGS ${LLAMA_ALL_WARNINGS})
|
|
set(GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS})
|
|
|
|
# change the default for these ggml options
|
|
if (NOT DEFINED GGML_LLAMAFILE)
|
|
set(GGML_LLAMAFILE_DEFAULT ON)
|
|
endif()
|
|
|
|
if (NOT DEFINED GGML_AMX)
|
|
set(GGML_AMX ON)
|
|
endif()
|
|
|
|
if (NOT DEFINED GGML_CUDA_GRAPHS)
|
|
set(GGML_CUDA_GRAPHS_DEFAULT ON)
|
|
endif()
|
|
|
|
# transition helpers
|
|
function (llama_option_depr TYPE OLD NEW)
|
|
if (${OLD})
|
|
message(${TYPE} "${OLD} is deprecated and will be removed in the future.\nUse ${NEW} instead\n")
|
|
set(${NEW} ON PARENT_SCOPE)
|
|
endif()
|
|
endfunction()
|
|
|
|
llama_option_depr(FATAL_ERROR LLAMA_CUBLAS GGML_CUDA)
|
|
llama_option_depr(WARNING LLAMA_CUDA GGML_CUDA)
|
|
llama_option_depr(WARNING LLAMA_KOMPUTE GGML_KOMPUTE)
|
|
llama_option_depr(WARNING LLAMA_METAL GGML_METAL)
|
|
llama_option_depr(WARNING LLAMA_METAL_EMBED_LIBRARY GGML_METAL_EMBED_LIBRARY)
|
|
llama_option_depr(WARNING LLAMA_NATIVE GGML_NATIVE)
|
|
llama_option_depr(WARNING LLAMA_RPC GGML_RPC)
|
|
llama_option_depr(WARNING LLAMA_SYCL GGML_SYCL)
|
|
llama_option_depr(WARNING LLAMA_SYCL_F16 GGML_SYCL_F16)
|
|
llama_option_depr(WARNING LLAMA_CANN GGML_CANN)
|
|
|
|
#
|
|
# build the library
|
|
#
|
|
|
|
if (NOT TARGET ggml)
|
|
add_subdirectory(ggml)
|
|
# ... otherwise assume ggml is added by a parent CMakeLists.txt
|
|
endif()
|
|
add_subdirectory(src)
|
|
|
|
#
|
|
# install
|
|
#
|
|
|
|
include(GNUInstallDirs)
|
|
include(CMakePackageConfigHelpers)
|
|
|
|
set(LLAMA_BUILD_NUMBER ${BUILD_NUMBER})
|
|
set(LLAMA_BUILD_COMMIT ${BUILD_COMMIT})
|
|
set(LLAMA_INSTALL_VERSION 0.0.${BUILD_NUMBER})
|
|
|
|
set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files")
|
|
set(LLAMA_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
|
|
set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
|
|
|
|
|
|
# At the moment some compile definitions are placed within the ggml/src
|
|
# directory but not exported on the `ggml` target. This could be improved by
|
|
# determining _precisely_ which defines are necessary for the llama-config
|
|
# package.
|
|
#
|
|
set(GGML_TRANSIENT_DEFINES)
|
|
get_target_property(GGML_DIRECTORY ggml SOURCE_DIR)
|
|
get_directory_property(GGML_DIR_DEFINES DIRECTORY ${GGML_DIRECTORY} COMPILE_DEFINITIONS)
|
|
if (GGML_DIR_DEFINES)
|
|
list(APPEND GGML_TRANSIENT_DEFINES ${GGML_DIR_DEFINES})
|
|
endif()
|
|
get_target_property(GGML_TARGET_DEFINES ggml COMPILE_DEFINITIONS)
|
|
if (GGML_TARGET_DEFINES)
|
|
list(APPEND GGML_TRANSIENT_DEFINES ${GGML_TARGET_DEFINES})
|
|
endif()
|
|
get_target_property(GGML_LINK_LIBRARIES ggml LINK_LIBRARIES)
|
|
|
|
set_target_properties(llama PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/llama.h)
|
|
install(TARGETS llama LIBRARY PUBLIC_HEADER)
|
|
|
|
configure_package_config_file(
|
|
${CMAKE_CURRENT_SOURCE_DIR}/cmake/llama-config.cmake.in
|
|
${CMAKE_CURRENT_BINARY_DIR}/llama-config.cmake
|
|
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/llama
|
|
PATH_VARS LLAMA_INCLUDE_INSTALL_DIR
|
|
LLAMA_LIB_INSTALL_DIR
|
|
LLAMA_BIN_INSTALL_DIR )
|
|
|
|
write_basic_package_version_file(
|
|
${CMAKE_CURRENT_BINARY_DIR}/llama-version.cmake
|
|
VERSION ${LLAMA_INSTALL_VERSION}
|
|
COMPATIBILITY SameMajorVersion)
|
|
|
|
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/llama-config.cmake
|
|
${CMAKE_CURRENT_BINARY_DIR}/llama-version.cmake
|
|
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/llama)
|
|
|
|
install(
|
|
FILES convert_hf_to_gguf.py
|
|
PERMISSIONS
|
|
OWNER_READ
|
|
OWNER_WRITE
|
|
OWNER_EXECUTE
|
|
GROUP_READ
|
|
GROUP_EXECUTE
|
|
WORLD_READ
|
|
WORLD_EXECUTE
|
|
DESTINATION ${CMAKE_INSTALL_BINDIR})
|
|
|
|
configure_file(cmake/llama.pc.in
|
|
"${CMAKE_CURRENT_BINARY_DIR}/llama.pc"
|
|
@ONLY)
|
|
|
|
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/llama.pc"
|
|
DESTINATION lib/pkgconfig)
|
|
|
|
#
|
|
# utils, programs, examples and tests
|
|
#
|
|
|
|
if (LLAMA_BUILD_COMMON)
|
|
add_subdirectory(common)
|
|
endif()
|
|
|
|
if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
|
|
include(CTest)
|
|
add_subdirectory(tests)
|
|
endif()
|
|
|
|
if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_EXAMPLES)
|
|
add_subdirectory(examples)
|
|
add_subdirectory(pocs)
|
|
endif()
|