mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-08 03:36:53 +01:00
3420909dff
* ggml : automatic selection of best CPU backend * amx : minor opt * add GGML_AVX_VNNI to enable avx-vnni, fix checks
520 lines
14 KiB
C++
520 lines
14 KiB
C++
#include "ggml-backend-impl.h"
|
|
#include "ggml-backend.h"
|
|
#include "ggml-impl.h"
|
|
#include <algorithm>
|
|
#include <codecvt>
|
|
#include <cstring>
|
|
#include <filesystem>
|
|
#include <locale>
|
|
#include <memory>
|
|
#include <string>
|
|
#include <type_traits>
|
|
#include <vector>
|
|
|
|
#ifdef _WIN32
|
|
# define WIN32_LEAN_AND_MEAN
|
|
# ifndef NOMINMAX
|
|
# define NOMINMAX
|
|
# endif
|
|
# include <windows.h>
|
|
#elif defined(__APPLE__)
|
|
# include <mach-o/dyld.h>
|
|
# include <dlfcn.h>
|
|
#else
|
|
# include <dlfcn.h>
|
|
# include <unistd.h>
|
|
#endif
|
|
|
|
// Backend registry
|
|
#ifdef GGML_USE_CPU
|
|
#include "ggml-cpu.h"
|
|
#endif
|
|
|
|
#ifdef GGML_USE_CUDA
|
|
#include "ggml-cuda.h"
|
|
#endif
|
|
|
|
#ifdef GGML_USE_METAL
|
|
#include "ggml-metal.h"
|
|
#endif
|
|
|
|
#ifdef GGML_USE_SYCL
|
|
#include "ggml-sycl.h"
|
|
#endif
|
|
|
|
#ifdef GGML_USE_VULKAN
|
|
#include "ggml-vulkan.h"
|
|
#endif
|
|
|
|
#ifdef GGML_USE_BLAS
|
|
#include "ggml-blas.h"
|
|
#endif
|
|
|
|
#ifdef GGML_USE_RPC
|
|
#include "ggml-rpc.h"
|
|
#endif
|
|
|
|
#ifdef GGML_USE_CANN
|
|
#include "ggml-cann.h"
|
|
#endif
|
|
|
|
#ifdef GGML_USE_KOMPUTE
|
|
#include "ggml-kompute.h"
|
|
#endif
|
|
|
|
#ifdef _WIN32
|
|
|
|
using dl_handle = std::remove_pointer_t<HMODULE>;
|
|
|
|
struct dl_handle_deleter {
|
|
void operator()(HMODULE handle) {
|
|
FreeLibrary(handle);
|
|
}
|
|
};
|
|
|
|
static dl_handle * dl_load_library(const std::wstring & path) {
|
|
// suppress error dialogs for missing DLLs
|
|
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
|
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
|
|
|
HMODULE handle = LoadLibraryW(path.c_str());
|
|
|
|
SetErrorMode(old_mode);
|
|
|
|
return handle;
|
|
}
|
|
|
|
static dl_handle * dl_load_library(const std::string & path) {
|
|
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
|
|
return dl_load_library(converter.from_bytes(path));
|
|
}
|
|
|
|
static void * dl_get_sym(dl_handle * handle, const char * name) {
|
|
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
|
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
|
|
|
void * p = (void *) GetProcAddress(handle, name);
|
|
|
|
SetErrorMode(old_mode);
|
|
|
|
return p;
|
|
}
|
|
|
|
#else
|
|
|
|
using dl_handle = void;
|
|
|
|
struct dl_handle_deleter {
|
|
void operator()(void * handle) {
|
|
dlclose(handle);
|
|
}
|
|
};
|
|
|
|
static void * dl_load_library(const std::string & path) {
|
|
dl_handle * handle = dlopen(path.c_str(), RTLD_NOW | RTLD_LOCAL);
|
|
|
|
return handle;
|
|
}
|
|
|
|
static void * dl_get_sym(dl_handle * handle, const char * name) {
|
|
return dlsym(handle, name);
|
|
}
|
|
|
|
#endif
|
|
|
|
using dl_handle_ptr = std::unique_ptr<dl_handle, dl_handle_deleter>;
|
|
|
|
struct ggml_backend_reg_entry {
|
|
ggml_backend_reg_t reg;
|
|
dl_handle_ptr handle;
|
|
};
|
|
|
|
struct ggml_backend_registry {
|
|
std::vector<ggml_backend_reg_entry> backends;
|
|
std::vector<ggml_backend_dev_t> devices;
|
|
|
|
ggml_backend_registry() {
|
|
#ifdef GGML_USE_CUDA
|
|
register_backend(ggml_backend_cuda_reg());
|
|
#endif
|
|
#ifdef GGML_USE_METAL
|
|
register_backend(ggml_backend_metal_reg());
|
|
#endif
|
|
#ifdef GGML_USE_SYCL
|
|
register_backend(ggml_backend_sycl_reg());
|
|
#endif
|
|
#ifdef GGML_USE_VULKAN
|
|
register_backend(ggml_backend_vk_reg());
|
|
#endif
|
|
#ifdef GGML_USE_CANN
|
|
register_backend(ggml_backend_cann_reg());
|
|
#endif
|
|
#ifdef GGML_USE_BLAS
|
|
register_backend(ggml_backend_blas_reg());
|
|
#endif
|
|
#ifdef GGML_USE_RPC
|
|
register_backend(ggml_backend_rpc_reg());
|
|
#endif
|
|
#ifdef GGML_USE_KOMPUTE
|
|
register_backend(ggml_backend_kompute_reg());
|
|
#endif
|
|
#ifdef GGML_USE_CPU
|
|
register_backend(ggml_backend_cpu_reg());
|
|
#endif
|
|
}
|
|
|
|
~ggml_backend_registry() {
|
|
// FIXME: backends cannot be safely unloaded without a function to destroy all the backend resources,
|
|
// since backend threads may still be running and accessing resources from the dynamic library
|
|
for (auto & entry : backends) {
|
|
if (entry.handle) {
|
|
entry.handle.release(); // NOLINT
|
|
}
|
|
}
|
|
}
|
|
|
|
void register_backend(ggml_backend_reg_t reg, dl_handle_ptr handle = nullptr) {
|
|
if (!reg) {
|
|
return;
|
|
}
|
|
|
|
#ifndef NDEBUG
|
|
GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
|
|
__func__, ggml_backend_reg_name(reg), ggml_backend_reg_dev_count(reg));
|
|
#endif
|
|
backends.push_back({ reg, std::move(handle) });
|
|
for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) {
|
|
register_device(ggml_backend_reg_dev_get(reg, i));
|
|
}
|
|
}
|
|
|
|
void register_device(ggml_backend_dev_t device) {
|
|
#ifndef NDEBUG
|
|
GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, ggml_backend_dev_name(device), ggml_backend_dev_description(device));
|
|
#endif
|
|
devices.push_back(device);
|
|
}
|
|
|
|
ggml_backend_reg_t load_backend(const char * path, bool silent) {
|
|
dl_handle_ptr handle { dl_load_library(path) };
|
|
if (!handle) {
|
|
if (!silent) {
|
|
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path);
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
|
|
if (score_fn && score_fn() == 0) {
|
|
if (!silent) {
|
|
GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path);
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
|
|
if (!backend_init_fn) {
|
|
if (!silent) {
|
|
GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path);
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
ggml_backend_reg_t reg = backend_init_fn();
|
|
if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
|
|
if (!silent) {
|
|
if (!reg) {
|
|
GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, path);
|
|
} else {
|
|
GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
|
|
__func__, path, reg->api_version, GGML_BACKEND_API_VERSION);
|
|
}
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path);
|
|
|
|
register_backend(reg, std::move(handle));
|
|
|
|
return reg;
|
|
}
|
|
|
|
void unload_backend(ggml_backend_reg_t reg, bool silent) {
|
|
auto it = std::find_if(backends.begin(), backends.end(),
|
|
[reg](const ggml_backend_reg_entry & entry) { return entry.reg == reg; });
|
|
|
|
if (it == backends.end()) {
|
|
if (!silent) {
|
|
GGML_LOG_ERROR("%s: backend not found\n", __func__);
|
|
}
|
|
return;
|
|
}
|
|
|
|
if (!silent) {
|
|
GGML_LOG_DEBUG("%s: unloading %s backend\n", __func__, ggml_backend_reg_name(reg));
|
|
}
|
|
|
|
// remove devices
|
|
devices.erase(
|
|
std::remove_if(devices.begin(), devices.end(),
|
|
[reg](ggml_backend_dev_t dev) { return ggml_backend_dev_backend_reg(dev) == reg; }),
|
|
devices.end());
|
|
|
|
// remove backend
|
|
backends.erase(it);
|
|
}
|
|
};
|
|
|
|
static ggml_backend_registry & get_reg() {
|
|
static ggml_backend_registry reg;
|
|
return reg;
|
|
}
|
|
|
|
// Internal API
|
|
void ggml_backend_register(ggml_backend_reg_t reg) {
|
|
get_reg().register_backend(reg);
|
|
}
|
|
|
|
void ggml_backend_device_register(ggml_backend_dev_t device) {
|
|
get_reg().register_device(device);
|
|
}
|
|
|
|
// Backend (reg) enumeration
|
|
static bool striequals(const char * a, const char * b) {
|
|
for (; *a && *b; a++, b++) {
|
|
if (std::tolower(*a) != std::tolower(*b)) {
|
|
return false;
|
|
}
|
|
}
|
|
return *a == *b;
|
|
}
|
|
|
|
size_t ggml_backend_reg_count() {
|
|
return get_reg().backends.size();
|
|
}
|
|
|
|
ggml_backend_reg_t ggml_backend_reg_get(size_t index) {
|
|
GGML_ASSERT(index < ggml_backend_reg_count());
|
|
return get_reg().backends[index].reg;
|
|
}
|
|
|
|
ggml_backend_reg_t ggml_backend_reg_by_name(const char * name) {
|
|
for (size_t i = 0; i < ggml_backend_reg_count(); i++) {
|
|
ggml_backend_reg_t reg = ggml_backend_reg_get(i);
|
|
if (striequals(ggml_backend_reg_name(reg), name)) {
|
|
return reg;
|
|
}
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
// Device enumeration
|
|
size_t ggml_backend_dev_count() {
|
|
return get_reg().devices.size();
|
|
}
|
|
|
|
ggml_backend_dev_t ggml_backend_dev_get(size_t index) {
|
|
GGML_ASSERT(index < ggml_backend_dev_count());
|
|
return get_reg().devices[index];
|
|
}
|
|
|
|
ggml_backend_dev_t ggml_backend_dev_by_name(const char * name) {
|
|
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
|
|
ggml_backend_dev_t dev = ggml_backend_dev_get(i);
|
|
if (striequals(ggml_backend_dev_name(dev), name)) {
|
|
return dev;
|
|
}
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type) {
|
|
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
|
|
ggml_backend_dev_t dev = ggml_backend_dev_get(i);
|
|
if (ggml_backend_dev_type(dev) == type) {
|
|
return dev;
|
|
}
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
// Convenience functions
|
|
ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params) {
|
|
ggml_backend_dev_t dev = ggml_backend_dev_by_name(name);
|
|
if (!dev) {
|
|
return nullptr;
|
|
}
|
|
return ggml_backend_dev_init(dev, params);
|
|
}
|
|
|
|
ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params) {
|
|
ggml_backend_dev_t dev = ggml_backend_dev_by_type(type);
|
|
if (!dev) {
|
|
return nullptr;
|
|
}
|
|
return ggml_backend_dev_init(dev, params);
|
|
}
|
|
|
|
ggml_backend_t ggml_backend_init_best(void) {
|
|
ggml_backend_dev_t dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU);
|
|
if (!dev) {
|
|
dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
|
|
}
|
|
if (!dev) {
|
|
return nullptr;
|
|
}
|
|
return ggml_backend_dev_init(dev, nullptr);
|
|
}
|
|
|
|
// Dynamic loading
|
|
ggml_backend_reg_t ggml_backend_load(const char * path) {
|
|
return get_reg().load_backend(path, false);
|
|
}
|
|
|
|
void ggml_backend_unload(ggml_backend_reg_t reg) {
|
|
get_reg().unload_backend(reg, true);
|
|
}
|
|
|
|
static std::string get_executable_path() {
|
|
#if defined(__APPLE__)
|
|
// get executable path
|
|
std::vector<char> path;
|
|
uint32_t size;
|
|
while (true) {
|
|
size = path.size();
|
|
if (_NSGetExecutablePath(path.data(), &size) == 0) {
|
|
break;
|
|
}
|
|
path.resize(size);
|
|
}
|
|
std::string base_path(path.data(), size);
|
|
// remove executable name
|
|
auto last_slash = base_path.find_last_of('/');
|
|
if (last_slash != std::string::npos) {
|
|
base_path = base_path.substr(0, last_slash);
|
|
}
|
|
return base_path + "/";
|
|
#elif defined(__linux__)
|
|
std::string base_path = ".";
|
|
std::vector<char> path(1024);
|
|
while (true) {
|
|
// get executable path
|
|
ssize_t len = readlink("/proc/self/exe", path.data(), path.size());
|
|
if (len == -1) {
|
|
break;
|
|
}
|
|
if (len < (ssize_t) path.size()) {
|
|
base_path = std::string(path.data(), len);
|
|
// remove executable name
|
|
auto last_slash = base_path.find_last_of('/');
|
|
if (last_slash != std::string::npos) {
|
|
base_path = base_path.substr(0, last_slash);
|
|
}
|
|
break;
|
|
}
|
|
path.resize(path.size() * 2);
|
|
}
|
|
|
|
return base_path + "/";
|
|
#elif defined(_WIN32)
|
|
std::vector<char> path(MAX_PATH);
|
|
DWORD len = GetModuleFileNameA(NULL, path.data(), path.size());
|
|
if (len == 0) {
|
|
return "";
|
|
}
|
|
std::string base_path(path.data(), len);
|
|
// remove executable name
|
|
auto last_slash = base_path.find_last_of('\\');
|
|
if (last_slash != std::string::npos) {
|
|
base_path = base_path.substr(0, last_slash);
|
|
}
|
|
return base_path + "\\";
|
|
#endif
|
|
}
|
|
|
|
static std::string backend_filename_prefix() {
|
|
#ifdef _WIN32
|
|
return "ggml-";
|
|
#else
|
|
return "libggml-";
|
|
#endif
|
|
}
|
|
|
|
static std::string backend_filename_suffix() {
|
|
#ifdef _WIN32
|
|
return ".dll";
|
|
#else
|
|
return ".so";
|
|
#endif
|
|
}
|
|
|
|
static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent) {
|
|
// enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
|
|
// TODO: search system paths
|
|
std::vector<std::string> search_paths = { "./", get_executable_path() };
|
|
std::string file_prefix = backend_filename_prefix() + name + "-";
|
|
|
|
int best_score = 0;
|
|
std::string best_path;
|
|
|
|
namespace fs = std::filesystem;
|
|
for (const auto & search_path : search_paths) {
|
|
if (!fs::exists(search_path)) {
|
|
continue;
|
|
}
|
|
for (const auto & entry : fs::directory_iterator(search_path)) {
|
|
if (entry.is_regular_file()) {
|
|
std::string filename = entry.path().filename().string();
|
|
std::string ext = entry.path().extension().string();
|
|
if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
|
|
dl_handle_ptr handle { dl_load_library(entry.path().c_str()) };
|
|
if (!handle && !silent) {
|
|
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
|
|
}
|
|
if (handle) {
|
|
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
|
|
if (score_fn) {
|
|
int s = score_fn();
|
|
#ifndef NDEBUG
|
|
GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
|
|
#endif
|
|
if (s > best_score) {
|
|
best_score = s;
|
|
best_path = entry.path().string();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (best_score == 0) {
|
|
// try to load the base backend
|
|
for (const auto & search_path : search_paths) {
|
|
std::string path = search_path + backend_filename_prefix() + name + backend_filename_suffix();
|
|
if (fs::exists(path)) {
|
|
return get_reg().load_backend(path.c_str(), silent);
|
|
}
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
return get_reg().load_backend(best_path.c_str(), silent);
|
|
}
|
|
|
|
void ggml_backend_load_all() {
|
|
ggml_backend_load_best("blas", true);
|
|
ggml_backend_load_best("cann", true);
|
|
ggml_backend_load_best("cuda", true);
|
|
ggml_backend_load_best("hip", true);
|
|
ggml_backend_load_best("kompute", true);
|
|
ggml_backend_load_best("metal", true);
|
|
ggml_backend_load_best("rpc", true);
|
|
ggml_backend_load_best("sycl", true);
|
|
ggml_backend_load_best("vulkan", true);
|
|
ggml_backend_load_best("musa", true);
|
|
ggml_backend_load_best("cpu", true);
|
|
}
|