diff --git a/ggml/include/ggml-kompute.h b/ggml/include/ggml-kompute.h index 171465456..c0c43521b 100644 --- a/ggml/include/ggml-kompute.h +++ b/ggml/include/ggml-kompute.h @@ -11,6 +11,8 @@ extern "C" { #endif +#define GGML_KOMPUTE_MAX_DEVICES 16 + struct ggml_vk_device { int index; int type; // same as VkPhysicalDeviceType @@ -41,6 +43,8 @@ GGML_API bool ggml_backend_is_kompute(ggml_backend_t backend); GGML_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device); +GGML_API ggml_backend_reg_t ggml_backend_kompute_reg(void); + #ifdef __cplusplus } #endif diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp index fd574887f..f397f6252 100644 --- a/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp @@ -562,6 +562,10 @@ void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * na #include "ggml-cann.h" #endif +#ifdef GGML_USE_KOMPUTE +#include "ggml-kompute.h" +#endif + struct ggml_backend_registry { std::vector backends; std::vector devices; @@ -591,8 +595,9 @@ struct ggml_backend_registry { #ifdef GGML_USE_AMX register_backend(ggml_backend_amx_reg()); #endif - - // TODO: kompute +#ifdef GGML_USE_KOMPUTE + register_backend(ggml_backend_kompute_reg()); +#endif register_backend(ggml_backend_cpu_reg()); } diff --git a/ggml/src/ggml-kompute.cpp b/ggml/src/ggml-kompute.cpp index 1f2220234..fea69fb04 100644 --- a/ggml/src/ggml-kompute.cpp +++ b/ggml/src/ggml-kompute.cpp @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -273,18 +274,9 @@ static std::vector ggml_vk_available_devices_internal(size_t mem return results; } -// public API returns a C-style array -ggml_vk_device * ggml_vk_available_devices(size_t memoryRequired, size_t * count) { - auto devices = ggml_vk_available_devices_internal(memoryRequired); - *count = devices.size(); - if (devices.empty()) { - return nullptr; - } - - size_t nbytes = sizeof (ggml_vk_device) * (devices.size()); - auto * arr = static_cast(malloc(nbytes)); - memcpy(arr, devices.data(), nbytes); - return arr; +static std::vector& ggml_vk_available_devices() { + static std::vector devices = ggml_vk_available_devices_internal(0); + return devices; } static void ggml_vk_filterByVendor(std::vector& devices, const std::string& targetVendor) { @@ -341,7 +333,7 @@ ggml_vk_device ggml_vk_current_device() { if (!komputeManager()->hasDevice()) return ggml_vk_device(); - auto devices = ggml_vk_available_devices_internal(0); + auto devices = ggml_vk_available_devices(); ggml_vk_filterByName(devices, komputeManager()->physicalDevice()->getProperties().deviceName.data()); GGML_ASSERT(!devices.empty()); return devices.front(); @@ -1323,17 +1315,7 @@ static void ggml_vk_cpy_f16_f32(Args&&... args) { ggml_vk_cpy(spirv, 2, 4, std::forward(args)...); } -static bool ggml_vk_supports_op(const struct ggml_tensor * op) { - switch (op->type) { - case GGML_TYPE_F16: - case GGML_TYPE_F32: - case GGML_TYPE_Q4_0: - case GGML_TYPE_Q4_1: - break; - default: - return false; - } - +static bool ggml_backend_kompute_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) { switch (op->op) { case GGML_OP_UNARY: switch (ggml_get_unary_op(op)) { @@ -1410,6 +1392,8 @@ static bool ggml_vk_supports_op(const struct ggml_tensor * op) { ; } return false; + + GGML_UNUSED(dev); } static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml_cgraph * gf) { @@ -1458,11 +1442,6 @@ static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml any_commands_recorded = true; - if (!ggml_vk_supports_op(dst)) { - fprintf(stderr, "%s: error: unsupported op '%s'\n", __func__, ggml_op_desc(dst)); - GGML_ABORT("unsupported op"); - } - const int32_t ne00 = src0 ? src0->ne[0] : 0; const int32_t ne01 = src0 ? src0->ne[1] : 0; const int32_t ne02 = src0 ? src0->ne[2] : 0; @@ -1907,25 +1886,31 @@ static ggml_backend_buffer_type_i ggml_backend_kompute_buffer_type_interface = { }; ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device) { - static std::vector bufts = []() { - std::vector vec; - auto devices = ggml_vk_available_devices_internal(0); - vec.reserve(devices.size()); + static std::mutex mutex; + std::lock_guard lock(mutex); - for (const auto & dev : devices) { - vec.push_back({ - /* .iface = */ ggml_backend_kompute_buffer_type_interface, - /* .device = */ nullptr, - /* .context = */ new ggml_backend_kompute_buffer_type_context(dev.index, dev.bufferAlignment, dev.maxAlloc) - }); + auto devices = ggml_vk_available_devices(); + int32_t device_count = (int32_t) devices.size(); + GGML_ASSERT(device < device_count); + GGML_ASSERT(devices.size() <= GGML_KOMPUTE_MAX_DEVICES); + + static ggml_backend_buffer_type + ggml_backend_kompute_buffer_types[GGML_KOMPUTE_MAX_DEVICES]; + + static bool ggml_backend_kompute_buffer_type_initialized = false; + + if (!ggml_backend_kompute_buffer_type_initialized) { + for (int32_t i = 0; i < device_count; i++) { + ggml_backend_kompute_buffer_types[i] = { + /* .iface = */ ggml_backend_kompute_buffer_type_interface, + /* .device = */ ggml_backend_reg_dev_get(ggml_backend_kompute_reg(), i), + /* .context = */ new ggml_backend_kompute_buffer_type_context{ i, devices[i].bufferAlignment, devices[i].maxAlloc }, + }; } - return vec; - }(); + ggml_backend_kompute_buffer_type_initialized = true; + } - auto it = std::find_if(bufts.begin(), bufts.end(), [device](const ggml_backend_buffer_type & t) { - return device == static_cast(t.context)->device; - }); - return it < bufts.end() ? &*it : nullptr; + return &ggml_backend_kompute_buffer_types[device]; } // backend @@ -1953,16 +1938,6 @@ static ggml_status ggml_backend_kompute_graph_compute(ggml_backend_t backend, st return GGML_STATUS_SUCCESS; } -static bool ggml_backend_kompute_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) { - GGML_UNUSED(backend); - return ggml_vk_supports_op(op); -} - -static bool ggml_backend_kompute_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft) { - GGML_UNUSED(backend); - return buft->iface.get_name == ggml_backend_kompute_buffer_type_get_name; -} - static struct ggml_backend_i kompute_backend_i = { /* .get_name = */ ggml_backend_kompute_name, /* .free = */ ggml_backend_kompute_free, @@ -1991,7 +1966,7 @@ ggml_backend_t ggml_backend_kompute_init(int device) { ggml_backend_t kompute_backend = new ggml_backend { /* .guid = */ ggml_backend_kompute_guid(), /* .interface = */ kompute_backend_i, - /* .device = */ nullptr, + /* .device = */ ggml_backend_reg_dev_get(ggml_backend_kompute_reg(), device), /* .context = */ s_kompute_context, }; @@ -2001,3 +1976,167 @@ ggml_backend_t ggml_backend_kompute_init(int device) { bool ggml_backend_is_kompute(ggml_backend_t backend) { return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_kompute_guid()); } + +static size_t ggml_backend_kompute_get_device_count() { + auto devices = ggml_vk_available_devices(); + return devices.size(); +} + +static void ggml_backend_kompute_get_device_description(int device, char * description, size_t description_size) { + auto devices = ggml_vk_available_devices(); + GGML_ASSERT((size_t) device < devices.size()); + snprintf(description, description_size, "%s", devices[device].name); +} + +static void ggml_backend_kompute_get_device_memory(int device, size_t * free, size_t * total) { + auto devices = ggml_vk_available_devices(); + GGML_ASSERT((size_t) device < devices.size()); + *total = devices[device].heapSize; + *free = devices[device].heapSize; +} + +////////////////////////// + +struct ggml_backend_kompute_device_context { + int device; + std::string name; + std::string description; +}; + +static const char * ggml_backend_kompute_device_get_name(ggml_backend_dev_t dev) { + ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context; + return ctx->name.c_str(); +} + +static const char * ggml_backend_kompute_device_get_description(ggml_backend_dev_t dev) { + ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context; + return ctx->description.c_str(); +} + +static void ggml_backend_kompute_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { + ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context; + ggml_backend_kompute_get_device_memory(ctx->device, free, total); +} + +static ggml_backend_buffer_type_t ggml_backend_kompute_device_get_buffer_type(ggml_backend_dev_t dev) { + ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context; + return ggml_backend_kompute_buffer_type(ctx->device); +} + +static bool ggml_backend_kompute_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) { + if (buft->iface.get_name != ggml_backend_kompute_buffer_type_get_name) { + return false; + } + + ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context; + ggml_backend_kompute_buffer_type_context * buft_ctx = (ggml_backend_kompute_buffer_type_context *)buft->context; + + return buft_ctx->device == ctx->device; +} + +static enum ggml_backend_dev_type ggml_backend_kompute_device_get_type(ggml_backend_dev_t dev) { + GGML_UNUSED(dev); + return GGML_BACKEND_DEVICE_TYPE_GPU; +} + +static void ggml_backend_kompute_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) { + props->name = ggml_backend_kompute_device_get_name(dev); + props->description = ggml_backend_kompute_device_get_description(dev); + props->type = ggml_backend_kompute_device_get_type(dev); + ggml_backend_kompute_device_get_memory(dev, &props->memory_free, &props->memory_total); + props->caps = { + /* async = */ false, + /* host_buffer = */ false, + /* .buffer_from_host_ptr = */ false, + /* events = */ false, + }; +} + +static ggml_backend_t ggml_backend_kompute_device_init(ggml_backend_dev_t dev, const char * params) { + GGML_UNUSED(params); + ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context; + return ggml_backend_kompute_init(ctx->device); +} + +static bool ggml_backend_kompute_device_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) { + const int min_batch_size = 32; + + return (op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS) || + (op->ne[2] >= min_batch_size && op->op == GGML_OP_MUL_MAT_ID); + + GGML_UNUSED(dev); +} + +static const struct ggml_backend_device_i ggml_backend_kompute_device_i = { + /* .get_name = */ ggml_backend_kompute_device_get_name, + /* .get_description = */ ggml_backend_kompute_device_get_description, + /* .get_memory = */ ggml_backend_kompute_device_get_memory, + /* .get_type = */ ggml_backend_kompute_device_get_type, + /* .get_props = */ ggml_backend_kompute_device_get_props, + /* .init_backend = */ ggml_backend_kompute_device_init, + /* .get_buffer_type = */ ggml_backend_kompute_device_get_buffer_type, + /* .get_host_buffer_type = */ NULL, + /* .buffer_from_host_ptr = */ NULL, + /* .supports_op = */ ggml_backend_kompute_device_supports_op, + /* .supports_buft = */ ggml_backend_kompute_device_supports_buft, + /* .offload_op = */ ggml_backend_kompute_device_offload_op, + /* .event_new = */ NULL, + /* .event_free = */ NULL, + /* .event_synchronize = */ NULL, +}; + +static const char * ggml_backend_kompute_reg_get_name(ggml_backend_reg_t reg) { + GGML_UNUSED(reg); + return "Kompute"; +} + +static size_t ggml_backend_kompute_reg_get_device_count(ggml_backend_reg_t reg) { + GGML_UNUSED(reg); + return ggml_backend_kompute_get_device_count(); +} + +static ggml_backend_dev_t ggml_backend_kompute_reg_get_device(ggml_backend_reg_t reg, size_t device) { + static std::vector devices; + + static bool initialized = false; + + { + static std::mutex mutex; + std::lock_guard lock(mutex); + if (!initialized) { + for (size_t i = 0; i < ggml_backend_kompute_get_device_count(); i++) { + ggml_backend_kompute_device_context * ctx = new ggml_backend_kompute_device_context; + char desc[256]; + ggml_backend_kompute_get_device_description(i, desc, sizeof(desc)); + ctx->device = i; + ctx->name = "Kompute" + std::to_string(i); + ctx->description = desc; + devices.push_back(new ggml_backend_device { + /* .iface = */ ggml_backend_kompute_device_i, + /* .reg = */ reg, + /* .context = */ ctx, + }); + } + initialized = true; + } + } + + GGML_ASSERT(device < devices.size()); + return devices[device]; +} + +static const struct ggml_backend_reg_i ggml_backend_kompute_reg_i = { + /* .get_name = */ ggml_backend_kompute_reg_get_name, + /* .get_device_count = */ ggml_backend_kompute_reg_get_device_count, + /* .get_device = */ ggml_backend_kompute_reg_get_device, + /* .get_proc_address = */ NULL, +}; + +ggml_backend_reg_t ggml_backend_kompute_reg() { + static ggml_backend_reg reg = { + /* .iface = */ ggml_backend_kompute_reg_i, + /* .context = */ nullptr, + }; + + return ® +}