From b7e2e691d40ca0a6e8e1e1a9186e16eafde599ae Mon Sep 17 00:00:00 2001 From: Adam Treat Date: Tue, 12 Sep 2023 13:04:55 -0400 Subject: [PATCH] Completely revamp how we do object management with the vulkan backend and stop using so many static objects so we can tear down and bring up vulkan on new devices in the same runtime. --- ggml-vulkan.cpp | 185 +++++++++++++--------- ggml-vulkan.h | 1 + kompute/src/Algorithm.cpp | 26 +-- kompute/src/Manager.cpp | 41 +++-- kompute/src/include/kompute/Algorithm.hpp | 5 +- kompute/src/include/kompute/Manager.hpp | 27 +++- 6 files changed, 172 insertions(+), 113 deletions(-) diff --git a/ggml-vulkan.cpp b/ggml-vulkan.cpp index 055b1124d..89de70fa4 100644 --- a/ggml-vulkan.cpp +++ b/ggml-vulkan.cpp @@ -65,9 +65,21 @@ struct ggml_kompute_context { } }; +// FIXME: It would be good to consolidate the kompute manager and the kompute context into one object +// and consolidate the init functions and simplify object lifetime management. As it currently stands, +// we *have* to have the kompute manager no matter what for device discovery, but the kompute context +// is only created when a device is set and vulkan is explicitly turned on. ggml_kompute_context *ggml_kompute_context::instance; - -kp::Manager mgr; +kp::Manager *komputeManager() { + static kp::Manager *s_mgr = nullptr; + if (s_mgr && !s_mgr->hasInstance()) { + delete s_mgr; + s_mgr = nullptr; + } + if (!s_mgr) + s_mgr = new kp::Manager; + return s_mgr; +} #ifdef __linux__ __attribute__((constructor)) @@ -123,12 +135,11 @@ static std::string ggml_vk_getVendorName(uint32_t vendorID) { } std::vector ggml_vk_available_devices(size_t memoryRequired) { - std::vector results; - if (!mgr.hasVulkan()) + if (!komputeManager()->hasVulkan()) return results; - std::vector physicalDevices = mgr.listDevices(); + std::vector physicalDevices = komputeManager()->listDevices(); uint32_t deviceCount = physicalDevices.size(); if (deviceCount == 0) @@ -228,22 +239,33 @@ bool ggml_vk_init_device(const ggml_vk_device &device) { } bool ggml_vk_init_device(int device) { - mgr.initializeDevice(device, {}, + komputeManager()->initializeDevice(device, {}, {"VK_KHR_shader_float16_int8", "VK_KHR_8bit_storage", "VK_KHR_16bit_storage", "VK_KHR_storage_buffer_storage_class"}); return ggml_vk_has_device(); } +bool ggml_vk_free_device() { + if (!ggml_vk_has_device()) + return false; + komputeManager()->destroy(); + return true; +} + +bool ggml_vk_has_vulkan() { + return komputeManager()->hasVulkan(); +} + bool ggml_vk_has_device() { - return mgr.hasDevice(); + return komputeManager()->hasDevice(); } ggml_vk_device ggml_vk_current_device() { - if (!mgr.hasDevice()) + if (!komputeManager()->hasDevice()) return ggml_vk_device(); std::vector devices = ggml_vk_available_devices(0); - ggml_vk_filterByName(devices, mgr.physicalDevice()->getProperties().deviceName); + ggml_vk_filterByName(devices, komputeManager()->physicalDevice()->getProperties().deviceName); return devices.front(); } @@ -275,7 +297,7 @@ void ggml_vk_allocate_descriptor_pool(struct ggml_kompute_context * ctx, size_t descriptorPoolSizes.data()); ctx->pool = std::make_shared(); - vk::Result r = mgr.device()->createDescriptorPool( + vk::Result r = komputeManager()->device()->createDescriptorPool( &descriptorPoolInfo, nullptr, ctx->pool.get()); if (r != vk::Result::eSuccess) std::cerr << "Error allocating descriptor pool" << vk::to_string(r); @@ -284,7 +306,7 @@ void ggml_vk_allocate_descriptor_pool(struct ggml_kompute_context * ctx, size_t static void ggml_vk_free_descriptor_pool(struct ggml_kompute_context * ctx) { if (ctx->pool) { - mgr.device()->destroy( + komputeManager()->device()->destroy( *ctx->pool, (vk::Optional)nullptr); ctx->pool = nullptr; @@ -301,7 +323,7 @@ vk::Buffer *ggml_vk_allocate_buffer(size_t size) { bufferCreateInfo.sharingMode = vk::SharingMode::eExclusive; vk::Buffer *vkBuffer = new vk::Buffer; - vk::Result r = mgr.device()->createBuffer(&bufferCreateInfo, nullptr, vkBuffer); + vk::Result r = komputeManager()->device()->createBuffer(&bufferCreateInfo, nullptr, vkBuffer); if (r != vk::Result::eSuccess) std::cerr << "Error allocating buffer" << vk::to_string(r); return vkBuffer; @@ -312,7 +334,7 @@ vk::DeviceMemory *ggml_vk_allocate(size_t size, vk::MemoryPropertyFlags flags, v uint32_t memoryTypeIndex = -1; bool memoryTypeIndexFound = false; - vk::PhysicalDeviceMemoryProperties memoryProperties = mgr.physicalDevice()->getMemoryProperties(); + vk::PhysicalDeviceMemoryProperties memoryProperties = komputeManager()->physicalDevice()->getMemoryProperties(); for (uint32_t i = 0; i < memoryProperties.memoryTypeCount; i++) { if (requirements.memoryTypeBits & (1 << i)) { if (((memoryProperties.memoryTypes[i]).propertyFlags & @@ -335,7 +357,7 @@ vk::DeviceMemory *ggml_vk_allocate(size_t size, vk::MemoryPropertyFlags flags, v allocInfo.allocationSize = size; allocInfo.memoryTypeIndex = memoryTypeIndex; vk::DeviceMemory *vkDeviceMemory = new vk::DeviceMemory; - vk::Result r = mgr.device()->allocateMemory(&allocInfo, nullptr, vkDeviceMemory); + vk::Result r = komputeManager()->device()->allocateMemory(&allocInfo, nullptr, vkDeviceMemory); if (r != vk::Result::eSuccess) std::cerr << "Error allocating memory" << vk::to_string(r); return vkDeviceMemory; @@ -346,7 +368,7 @@ size_t ggml_vk_aligned_offset(size_t offset) { static size_t minStorageBufferOffsetAlignment = 0; if (minStorageBufferOffsetAlignment == 0) { vk::PhysicalDeviceProperties deviceProperties; - deviceProperties = mgr.physicalDevice()->getProperties(); + deviceProperties = komputeManager()->physicalDevice()->getProperties(); vk::PhysicalDeviceLimits deviceLimits = deviceProperties.limits; minStorageBufferOffsetAlignment = deviceLimits.minStorageBufferOffsetAlignment; } @@ -362,12 +384,12 @@ size_t ggml_vk_aligned_offset(size_t offset) { static void ggml_vk_h2d_buffer(const ggml_vk_memory &memory) { if (memory.stagingBuffer) - mgr.sequence()->eval(memory.primaryBuffer, memory.stagingBuffer, memory.size); + komputeManager()->sequence()->eval(memory.primaryBuffer, memory.stagingBuffer, memory.size); } static void ggml_vk_d2h_buffer(const ggml_vk_memory &memory) { if (memory.stagingBuffer) - mgr.sequence()->eval(memory.primaryBuffer, memory.stagingBuffer, memory.size); + komputeManager()->sequence()->eval(memory.primaryBuffer, memory.stagingBuffer, memory.size); } ggml_vk_memory ggml_vk_allocate(size_t size) { @@ -375,12 +397,12 @@ ggml_vk_memory ggml_vk_allocate(size_t size) { bool isHostVisible = false; { memory.primaryBuffer = ggml_vk_allocate_buffer(size); - vk::MemoryRequirements memoryRequirements = mgr.device()->getBufferMemoryRequirements(*memory.primaryBuffer); + vk::MemoryRequirements memoryRequirements = komputeManager()->device()->getBufferMemoryRequirements(*memory.primaryBuffer); vk::MemoryPropertyFlags memoryPropertyFlags = vk::MemoryPropertyFlagBits::eDeviceLocal; memory.primaryMemory = ggml_vk_allocate(size, memoryPropertyFlags, memoryRequirements, &isHostVisible); - mgr.device()->bindBufferMemory(*memory.primaryBuffer, *memory.primaryMemory, 0); + komputeManager()->device()->bindBufferMemory(*memory.primaryBuffer, *memory.primaryMemory, 0); if (isHostVisible) { - vk::Result r = mgr.device()->mapMemory(*memory.primaryMemory, 0, size, vk::MemoryMapFlags(), &memory.data); + vk::Result r = komputeManager()->device()->mapMemory(*memory.primaryMemory, 0, size, vk::MemoryMapFlags(), &memory.data); if (r != vk::Result::eSuccess) std::cerr << "Error mapping memory" << vk::to_string(r); } @@ -388,13 +410,13 @@ ggml_vk_memory ggml_vk_allocate(size_t size) { if (!isHostVisible) { memory.stagingBuffer = ggml_vk_allocate_buffer(size); - vk::MemoryRequirements memoryRequirements = mgr.device()->getBufferMemoryRequirements(*memory.stagingBuffer); + vk::MemoryRequirements memoryRequirements = komputeManager()->device()->getBufferMemoryRequirements(*memory.stagingBuffer); vk::MemoryPropertyFlags memoryPropertyFlags = vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached; memory.stagingMemory = ggml_vk_allocate(size, memoryPropertyFlags, memoryRequirements, &isHostVisible); - mgr.device()->bindBufferMemory(*memory.stagingBuffer, *memory.stagingMemory, 0); - vk::Result r = mgr.device()->mapMemory(*memory.stagingMemory, 0, size, vk::MemoryMapFlags(), &memory.data); + komputeManager()->device()->bindBufferMemory(*memory.stagingBuffer, *memory.stagingMemory, 0); + vk::Result r = komputeManager()->device()->mapMemory(*memory.stagingMemory, 0, size, vk::MemoryMapFlags(), &memory.data); if (r != vk::Result::eSuccess) std::cerr << "Error mapping memory" << vk::to_string(r); } @@ -405,19 +427,19 @@ ggml_vk_memory ggml_vk_allocate(size_t size) { void ggml_vk_free_memory(ggml_vk_memory &memory) { - mgr.device()->destroy( + komputeManager()->device()->destroy( *memory.primaryBuffer, (vk::Optional)nullptr); if (memory.stagingBuffer) { - mgr.device()->destroy( + komputeManager()->device()->destroy( *memory.stagingBuffer, (vk::Optional)nullptr); } - mgr.device()->freeMemory( + komputeManager()->device()->freeMemory( *memory.primaryMemory, (vk::Optional)nullptr); if (memory.stagingMemory) { - mgr.device()->freeMemory( + komputeManager()->device()->freeMemory( *memory.stagingMemory, (vk::Optional)nullptr); } @@ -457,7 +479,7 @@ const std::shared_ptr ggml_vk_get_tensor(struct ggml_kompute_context nbytes += *alignedOffset; } - return mgr.tensor( + return komputeManager()->tensor( t->data, nelements, nbytes, kp::Tensor::TensorDataTypes::eFloat, @@ -476,7 +498,7 @@ void ggml_vk_add_buffer( void ggml_vk_h2d_tensor(struct ggml_kompute_context * ctx, struct ggml_tensor * t) { const auto res = ggml_vk_get_tensor(ctx, t, nullptr); GGML_ASSERT(res); - mgr.sequence()->eval({res}); + komputeManager()->sequence()->eval({res}); } void ggml_vk_h2d_all(struct ggml_kompute_context * ctx) { @@ -496,7 +518,7 @@ void ggml_vk_d2h_tensor(struct ggml_kompute_context * ctx, struct ggml_tensor * const auto res = ggml_vk_get_tensor(ctx, t, nullptr); GGML_ASSERT(res); - mgr.sequence()->eval({res}); + komputeManager()->sequence()->eval({res}); } std::vector getSpirvShader(const unsigned char* rawData, size_t size) { @@ -537,10 +559,11 @@ void ggml_vk_add(kp::Sequence& seq, safe_divide(inAOff, 4), safe_divide(inBOff, 4), safe_divide(outOff, 4) }; - static std::shared_ptr s_algo = nullptr; - if (!s_algo) - s_algo = mgr.algorithm(ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts}); + std::shared_ptr s_algo = nullptr; + if (!komputeManager()->hasAlgorithm(__func__)) + s_algo = komputeManager()->algorithm(__func__, ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts}); else { + s_algo = komputeManager()->getAlgorithm(__func__); s_algo->setTensors({inA, inB, out}); s_algo->setWorkgroup({size}); s_algo->setPushConstants({pushConsts}); @@ -567,10 +590,11 @@ void ggml_vk_addrow(kp::Sequence& seq, row }; - static std::shared_ptr s_algo = nullptr; - if (!s_algo) - s_algo = mgr.algorithm(ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts}); + std::shared_ptr s_algo = nullptr; + if (!komputeManager()->hasAlgorithm(__func__)) + s_algo = komputeManager()->algorithm(__func__, ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts}); else { + s_algo = komputeManager()->getAlgorithm(__func__); s_algo->setTensors({inA, inB, out}); s_algo->setWorkgroup({size}); s_algo->setPushConstants({pushConsts}); @@ -595,10 +619,11 @@ void ggml_vk_mul(kp::Sequence& seq, safe_divide(inAOff, 4), safe_divide(inBOff, 4), safe_divide(outOff, 4) }; - static std::shared_ptr s_algo = nullptr; - if (!s_algo) - s_algo = mgr.algorithm(ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts}); + std::shared_ptr s_algo = nullptr; + if (!komputeManager()->hasAlgorithm(__func__)) + s_algo = komputeManager()->algorithm(__func__, ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts}); else { + s_algo = komputeManager()->getAlgorithm(__func__); s_algo->setTensors({inA, inB, out}); s_algo->setWorkgroup({size}); s_algo->setPushConstants({pushConsts}); @@ -625,10 +650,11 @@ void ggml_vk_mulrow(kp::Sequence& seq, row }; - static std::shared_ptr s_algo = nullptr; - if (!s_algo) - s_algo = mgr.algorithm(ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts}); + std::shared_ptr s_algo = nullptr; + if (!komputeManager()->hasAlgorithm(__func__)) + s_algo = komputeManager()->algorithm(__func__, ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts}); else { + s_algo = komputeManager()->getAlgorithm(__func__); s_algo->setTensors({inA, inB, out}); s_algo->setWorkgroup({size}); s_algo->setPushConstants({pushConsts}); @@ -653,10 +679,11 @@ void ggml_vk_scale(kp::Sequence& seq, scale }; - static std::shared_ptr s_algo = nullptr; - if (!s_algo) - s_algo = mgr.algorithm(ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {size}, {}, {pushConsts}); + std::shared_ptr s_algo = nullptr; + if (!komputeManager()->hasAlgorithm(__func__)) + s_algo = komputeManager()->algorithm(__func__, ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {size}, {}, {pushConsts}); else { + s_algo = komputeManager()->getAlgorithm(__func__); s_algo->setTensors({in, out}); s_algo->setWorkgroup({size}); s_algo->setPushConstants({pushConsts}); @@ -676,10 +703,11 @@ void ggml_vk_xxlu(const std::vector& spirv, kp::Sequence& seq, safe_divide(inOff, 4), safe_divide(outOff, 4), }; - static std::shared_ptr s_algo = nullptr; - if (!s_algo) - s_algo = mgr.algorithm(ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {size}, {}, {pushConsts}); + std::shared_ptr s_algo = nullptr; + if (!komputeManager()->hasAlgorithm(__func__)) + s_algo = komputeManager()->algorithm(__func__, ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {size}, {}, {pushConsts}); else { + s_algo = komputeManager()->getAlgorithm(__func__); s_algo->setTensors({in, out}); s_algo->setWorkgroup({size}); s_algo->setPushConstants({pushConsts}); @@ -729,10 +757,11 @@ void ggml_vk_soft_max(kp::Sequence& seq, ne00, ne01, ne02 }; - static std::shared_ptr s_algo = nullptr; - if (!s_algo) - s_algo = mgr.algorithm(ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {unsigned(ne01), unsigned(ne02), unsigned(ne03)}, {}, {pushConsts}); + std::shared_ptr s_algo = nullptr; + if (!komputeManager()->hasAlgorithm(__func__)) + s_algo = komputeManager()->algorithm(__func__, ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {unsigned(ne01), unsigned(ne02), unsigned(ne03)}, {}, {pushConsts}); else { + s_algo = komputeManager()->getAlgorithm(__func__); s_algo->setTensors({in, out}); s_algo->setWorkgroup({unsigned(ne01), unsigned(ne02), unsigned(ne03)}); s_algo->setPushConstants({pushConsts}); @@ -761,10 +790,11 @@ void ggml_vk_norm_(const std::vector& spirv, kp::Sequence& seq, (uint32_t)ne00, (uint32_t)nb01, epsilon }; - static std::shared_ptr s_algo = nullptr; - if (!s_algo) - s_algo = mgr.algorithm(ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {(uint32_t)nrows}, {}, {pushConsts}); + std::shared_ptr s_algo = nullptr; + if (!komputeManager()->hasAlgorithm(__func__)) + s_algo = komputeManager()->algorithm(__func__, ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {(uint32_t)nrows}, {}, {pushConsts}); else { + s_algo = komputeManager()->getAlgorithm(__func__); s_algo->setTensors({in, out}); s_algo->setWorkgroup({(uint32_t)nrows}); s_algo->setPushConstants({pushConsts}); @@ -808,10 +838,11 @@ void ggml_vk_diag_mask_inf(kp::Sequence& seq, ne00, ne01 }; - static std::shared_ptr s_algo = nullptr; - if (!s_algo) - s_algo = mgr.algorithm(ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {unsigned(ne00), unsigned(ne01), unsigned(ne02)}, {}, {pushConsts}); + std::shared_ptr s_algo = nullptr; + if (!komputeManager()->hasAlgorithm(__func__)) + s_algo = komputeManager()->algorithm(__func__, ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {unsigned(ne00), unsigned(ne01), unsigned(ne02)}, {}, {pushConsts}); else { + s_algo = komputeManager()->getAlgorithm(__func__); s_algo->setTensors({in, out}); s_algo->setWorkgroup({unsigned(ne00), unsigned(ne01), unsigned(ne02)}); s_algo->setPushConstants({pushConsts}); @@ -844,10 +875,11 @@ void ggml_vk_mul_mat_f16(kp::Sequence& seq, ne00, nb01, nb02, nb11, nb12, ne0, ne1, }; - static std::shared_ptr s_algo = nullptr; - if (!s_algo) - s_algo = mgr.algorithm(ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {unsigned(ne01), unsigned(ne11), unsigned(ne12)}, {}, {pushConsts}); + std::shared_ptr s_algo = nullptr; + if (!komputeManager()->hasAlgorithm(__func__)) + s_algo = komputeManager()->algorithm(__func__, ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {unsigned(ne01), unsigned(ne11), unsigned(ne12)}, {}, {pushConsts}); else { + s_algo = komputeManager()->getAlgorithm(__func__); s_algo->setTensors({inA, inB, out}); s_algo->setWorkgroup({unsigned(ne01), unsigned(ne11), unsigned(ne12)}); s_algo->setPushConstants({pushConsts}); @@ -871,10 +903,11 @@ void ggml_vk_mul_mat_q4_x(const std::vector& spirv, uint32_t block_siz ne00, ne10, ne0, }; - static std::shared_ptr s_algo = nullptr; - if (!s_algo) - s_algo = mgr.algorithm(ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {unsigned(ne01), unsigned(ne11)}, {}, {pushConsts}); + std::shared_ptr s_algo = nullptr; + if (!komputeManager()->hasAlgorithm(__func__)) + s_algo = komputeManager()->algorithm(__func__, ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {unsigned(ne01), unsigned(ne11)}, {}, {pushConsts}); else { + s_algo = komputeManager()->getAlgorithm(__func__); s_algo->setTensors({inA, inB, out}); s_algo->setWorkgroup({unsigned(ne01), unsigned(ne11)}); s_algo->setPushConstants({pushConsts}); @@ -921,10 +954,11 @@ void ggml_vk_get_rows(const std::vector& spirv, ne00, nb01, nb1 }; - static std::shared_ptr s_algo = nullptr; - if (!s_algo) - s_algo = mgr.algorithm(ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts}); + std::shared_ptr s_algo = nullptr; + if (!komputeManager()->hasAlgorithm(__func__)) + s_algo = komputeManager()->algorithm(__func__, ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts}); else { + s_algo = komputeManager()->getAlgorithm(__func__); s_algo->setTensors({inA, inB, out}); s_algo->setWorkgroup({size}); s_algo->setPushConstants({pushConsts}); @@ -996,10 +1030,11 @@ void ggml_vk_rope(kp::Sequence& seq, nb0, nb1, nb2, nb3 }; - static std::shared_ptr s_algo = nullptr; - if (!s_algo) - s_algo = mgr.algorithm(ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {unsigned(ne01), unsigned(ne02), unsigned(ne03)}, {}, {pushConsts}); + std::shared_ptr s_algo = nullptr; + if (!komputeManager()->hasAlgorithm(__func__)) + s_algo = komputeManager()->algorithm(__func__, ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {unsigned(ne01), unsigned(ne02), unsigned(ne03)}, {}, {pushConsts}); else { + s_algo = komputeManager()->getAlgorithm(__func__); s_algo->setTensors({in, out}); s_algo->setWorkgroup({unsigned(ne01), unsigned(ne02), unsigned(ne03)}); s_algo->setPushConstants({pushConsts}); @@ -1032,10 +1067,14 @@ void ggml_vk_cpy(const std::vector& spirv, nb0, nb1, nb2, nb3 }; - static std::shared_ptr s_algo = nullptr; - if (!s_algo) - s_algo = mgr.algorithm(ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {unsigned(ne01), unsigned(ne02), unsigned(ne03)}, {}, {pushConsts}); + static std::string unique_name = std::string(__func__) + + "_i_" + std::to_string(in_element_size) + + "_o_" + std::to_string(out_element_size); + std::shared_ptr s_algo = nullptr; + if (!komputeManager()->hasAlgorithm(unique_name)) + s_algo = komputeManager()->algorithm(unique_name, ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {unsigned(ne01), unsigned(ne02), unsigned(ne03)}, {}, {pushConsts}); else { + s_algo = komputeManager()->getAlgorithm(unique_name); s_algo->setTensors({in, out}); s_algo->setWorkgroup({unsigned(ne01), unsigned(ne02), unsigned(ne03)}); s_algo->setPushConstants({pushConsts}); @@ -1082,7 +1121,7 @@ void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml_cgraph std::vector> sequences(n_seq); for (auto& sequence : sequences) { - sequence = mgr.sequence(); + sequence = komputeManager()->sequence(); } for (int seq_idx = 0; seq_idx < n_seq; ++seq_idx) { const int n_nodes_per_seq = (gf->n_nodes + n_seq - 1) / n_seq; diff --git a/ggml-vulkan.h b/ggml-vulkan.h index d13ed4184..e1d20e388 100644 --- a/ggml-vulkan.h +++ b/ggml-vulkan.h @@ -40,6 +40,7 @@ std::vector ggml_vk_available_devices(size_t memoryRequired); bool ggml_vk_init_device(size_t memoryRequired, const std::string &device); bool ggml_vk_init_device(const ggml_vk_device &device); bool ggml_vk_init_device(int device); +bool ggml_vk_free_device(); bool ggml_vk_has_vulkan(); bool ggml_vk_has_device(); ggml_vk_device ggml_vk_current_device(); diff --git a/kompute/src/Algorithm.cpp b/kompute/src/Algorithm.cpp index 9c41ec90f..ea81fd97b 100644 --- a/kompute/src/Algorithm.cpp +++ b/kompute/src/Algorithm.cpp @@ -58,18 +58,6 @@ Algorithm::destroy() this->mPipeline = nullptr; } - if (this->mFreePipelineCache && this->mPipelineCache) { - KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline cache"); - if (!this->mPipelineCache) { - KP_LOG_WARN("Kompute Algorithm Error requested to destroy " - "pipeline cache but it is null"); - } - this->mDevice->destroy( - *this->mPipelineCache, - (vk::Optional)nullptr); - this->mPipelineCache = nullptr; - } - if (this->mFreePipelineLayout && this->mPipelineLayout) { KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline layout"); if (!this->mPipelineLayout) { @@ -317,16 +305,6 @@ Algorithm::createPipeline() "main", &specializationInfo); - static std::shared_ptr globalPipelineCache = std::make_shared(); - if(!*globalPipelineCache) { - vk::PipelineCacheCreateInfo pipelineCacheInfo = - vk::PipelineCacheCreateInfo(); - this->mPipelineCache = globalPipelineCache; - this->mFreePipelineCache = true; - this->mDevice->createPipelineCache( - &pipelineCacheInfo, nullptr, globalPipelineCache.get()); - } - vk::ComputePipelineCreateInfo pipelineInfo(vk::PipelineCreateFlags(), shaderStage, *this->mPipelineLayout, @@ -335,7 +313,7 @@ Algorithm::createPipeline() #ifdef KOMPUTE_CREATE_PIPELINE_RESULT_VALUE vk::ResultValue pipelineResult = - this->mDevice->createComputePipeline(*globalPipelineCache, pipelineInfo); + this->mDevice->createComputePipeline(*mPipelineCache, pipelineInfo); if (pipelineResult.result != vk::Result::eSuccess) { throw std::runtime_error("Failed to create pipeline result: " + @@ -347,7 +325,7 @@ Algorithm::createPipeline() this->mFreePipeline = true; #else vk::Pipeline pipeline = - this->mDevice->createComputePipeline(*globalPipelineCache, pipelineInfo) + this->mDevice->createComputePipeline(*mPipelineCache, pipelineInfo) .value; this->mPipeline = std::make_shared(pipeline); this->mFreePipeline = true; diff --git a/kompute/src/Manager.cpp b/kompute/src/Manager.cpp index 2c86b6e10..2a02b7b10 100644 --- a/kompute/src/Manager.cpp +++ b/kompute/src/Manager.cpp @@ -88,15 +88,14 @@ Manager::destroy() this->mManagedSequences.clear(); } - if (this->mManageResources && this->mManagedAlgorithms.size()) { + if (this->mManageResources && !this->mManagedAlgorithmsMap.empty()) { KP_LOG_DEBUG("Kompute Manager explicitly freeing algorithms"); - for (const std::weak_ptr& weakAlgorithm : - this->mManagedAlgorithms) { - if (std::shared_ptr algorithm = weakAlgorithm.lock()) { + for (const auto& kv : this->mManagedAlgorithmsMap) { + if (std::shared_ptr algorithm = kv.second) { algorithm->destroy(); } } - this->mManagedAlgorithms.clear(); + this->mManagedAlgorithmsMap.clear(); } if (this->mManageResources && this->mManagedTensors.size()) { @@ -109,6 +108,18 @@ Manager::destroy() this->mManagedTensors.clear(); } + if (this->mPipelineCache) { + KP_LOG_DEBUG("Kompute Manager Destroying pipeline cache"); + if (!this->mPipelineCache) { + KP_LOG_WARN("Kompute Manager Error requested to destroy " + "pipeline cache but it is null"); + } + this->mDevice->destroy( + *this->mPipelineCache, + (vk::Optional)nullptr); + this->mPipelineCache = nullptr; + } + if (this->mFreeDevice) { KP_LOG_INFO("Destroying device"); this->mDevice->destroy( @@ -269,12 +280,14 @@ Manager::clear() end(this->mManagedTensors), [](std::weak_ptr t) { return t.expired(); }), end(this->mManagedTensors)); - this->mManagedAlgorithms.erase( - std::remove_if( - begin(this->mManagedAlgorithms), - end(this->mManagedAlgorithms), - [](std::weak_ptr t) { return t.expired(); }), - end(this->mManagedAlgorithms)); + for (auto it = this->mManagedAlgorithmsMap.begin(); + it != this->mManagedAlgorithmsMap.end();) { + if (it->second) { + it = this->mManagedAlgorithmsMap.erase(it); + } else { + ++it; + } + } this->mManagedSequences.erase( std::remove_if(begin(this->mManagedSequences), end(this->mManagedSequences), @@ -452,6 +465,12 @@ Manager::createDevice(const std::vector& familyQueueIndices, } KP_LOG_DEBUG("Kompute Manager compute queue obtained"); + + mPipelineCache = std::make_shared(); + vk::PipelineCacheCreateInfo pipelineCacheInfo = + vk::PipelineCacheCreateInfo(); + this->mDevice->createPipelineCache( + &pipelineCacheInfo, nullptr, mPipelineCache.get()); } std::shared_ptr diff --git a/kompute/src/include/kompute/Algorithm.hpp b/kompute/src/include/kompute/Algorithm.hpp index 90fe48fef..ef11234ee 100644 --- a/kompute/src/include/kompute/Algorithm.hpp +++ b/kompute/src/include/kompute/Algorithm.hpp @@ -45,6 +45,7 @@ class Algorithm */ template Algorithm(std::shared_ptr device, + vk::PipelineCache *pipelineCache, vk::DescriptorPool *pool, const std::vector>& tensors = {}, const std::vector& spirv = {}, @@ -55,6 +56,7 @@ class Algorithm KP_LOG_DEBUG("Kompute Algorithm Constructor with device"); this->mDevice = device; + this->mPipelineCache = pipelineCache; this->mDescriptorPool = pool; if (tensors.size() && spirv.size()) { @@ -310,8 +312,7 @@ class Algorithm bool mFreeShaderModule = false; std::shared_ptr mPipelineLayout; bool mFreePipelineLayout = false; - std::shared_ptr mPipelineCache; - bool mFreePipelineCache = false; + vk::PipelineCache *mPipelineCache = nullptr; std::shared_ptr mPipeline; bool mFreePipeline = false; diff --git a/kompute/src/include/kompute/Manager.hpp b/kompute/src/include/kompute/Manager.hpp index 42336f4e8..e910b2b81 100644 --- a/kompute/src/include/kompute/Manager.hpp +++ b/kompute/src/include/kompute/Manager.hpp @@ -39,6 +39,10 @@ class Manager */ ~Manager(); + bool hasInstance() const { + return this->mInstance.get(); + } + bool hasDevice() const { return this->mDevice.get(); } @@ -149,6 +153,7 @@ class Manager * @returns Shared pointer with initialised algorithm */ std::shared_ptr algorithm( + const std::string &name, vk::DescriptorPool *pool, const std::vector>& tensors = {}, const std::vector& spirv = {}, @@ -157,7 +162,7 @@ class Manager const std::vector& pushConstants = {}) { return this->algorithm<>( - pool, tensors, spirv, workgroup, specializationConstants, pushConstants); + name, pool, tensors, spirv, workgroup, specializationConstants, pushConstants); } /** @@ -176,6 +181,7 @@ class Manager */ template std::shared_ptr algorithm( + const std::string &name, vk::DescriptorPool *pool, const std::vector>& tensors, const std::vector& spirv, @@ -188,6 +194,7 @@ class Manager std::shared_ptr algorithm{ new kp::Algorithm( this->mDevice, + mPipelineCache.get(), pool, tensors, spirv, @@ -196,12 +203,24 @@ class Manager pushConstants) }; if (this->mManageResources) { - this->mManagedAlgorithms.push_back(algorithm); + this->mManagedAlgorithmsMap.insert({name, algorithm}); } return algorithm; } + bool hasAlgorithm(const std::string &name) const { + return mManagedAlgorithmsMap.find(name) != mManagedAlgorithmsMap.end(); + } + + std::shared_ptr getAlgorithm(const std::string &name) const { + auto it = mManagedAlgorithmsMap.find(name); + if (it != mManagedAlgorithmsMap.end()) { + return it->second; + } + return nullptr; + } + /** * Destroy the GPU resources and all managed resources by manager. **/ @@ -237,6 +256,7 @@ class Manager std::shared_ptr device() const { return mDevice; } std::shared_ptr physicalDevice() const { return mPhysicalDevice; } + std::shared_ptr pipelineCache() const { return mPipelineCache; } private: // -------------- OPTIONALLY OWNED RESOURCES @@ -250,10 +270,11 @@ class Manager // -------------- ALWAYS OWNED RESOURCES std::vector> mManagedTensors; std::vector> mManagedSequences; - std::vector> mManagedAlgorithms; + std::unordered_map> mManagedAlgorithmsMap; std::vector mComputeQueueFamilyIndices; std::vector> mComputeQueues; + std::shared_ptr mPipelineCache; bool mManageResources = false;