diff --git a/ggml-vulkan.cpp b/ggml-vulkan.cpp index 055b1124d..89de70fa4 100644 --- a/ggml-vulkan.cpp +++ b/ggml-vulkan.cpp @@ -65,9 +65,21 @@ struct ggml_kompute_context { } }; +// FIXME: It would be good to consolidate the kompute manager and the kompute context into one object +// and consolidate the init functions and simplify object lifetime management. As it currently stands, +// we *have* to have the kompute manager no matter what for device discovery, but the kompute context +// is only created when a device is set and vulkan is explicitly turned on. ggml_kompute_context *ggml_kompute_context::instance; - -kp::Manager mgr; +kp::Manager *komputeManager() { + static kp::Manager *s_mgr = nullptr; + if (s_mgr && !s_mgr->hasInstance()) { + delete s_mgr; + s_mgr = nullptr; + } + if (!s_mgr) + s_mgr = new kp::Manager; + return s_mgr; +} #ifdef __linux__ __attribute__((constructor)) @@ -123,12 +135,11 @@ static std::string ggml_vk_getVendorName(uint32_t vendorID) { } std::vector ggml_vk_available_devices(size_t memoryRequired) { - std::vector results; - if (!mgr.hasVulkan()) + if (!komputeManager()->hasVulkan()) return results; - std::vector physicalDevices = mgr.listDevices(); + std::vector physicalDevices = komputeManager()->listDevices(); uint32_t deviceCount = physicalDevices.size(); if (deviceCount == 0) @@ -228,22 +239,33 @@ bool ggml_vk_init_device(const ggml_vk_device &device) { } bool ggml_vk_init_device(int device) { - mgr.initializeDevice(device, {}, + komputeManager()->initializeDevice(device, {}, {"VK_KHR_shader_float16_int8", "VK_KHR_8bit_storage", "VK_KHR_16bit_storage", "VK_KHR_storage_buffer_storage_class"}); return ggml_vk_has_device(); } +bool ggml_vk_free_device() { + if (!ggml_vk_has_device()) + return false; + komputeManager()->destroy(); + return true; +} + +bool ggml_vk_has_vulkan() { + return komputeManager()->hasVulkan(); +} + bool ggml_vk_has_device() { - return mgr.hasDevice(); + return komputeManager()->hasDevice(); } ggml_vk_device ggml_vk_current_device() { - if (!mgr.hasDevice()) + if (!komputeManager()->hasDevice()) return ggml_vk_device(); std::vector devices = ggml_vk_available_devices(0); - ggml_vk_filterByName(devices, mgr.physicalDevice()->getProperties().deviceName); + ggml_vk_filterByName(devices, komputeManager()->physicalDevice()->getProperties().deviceName); return devices.front(); } @@ -275,7 +297,7 @@ void ggml_vk_allocate_descriptor_pool(struct ggml_kompute_context * ctx, size_t descriptorPoolSizes.data()); ctx->pool = std::make_shared(); - vk::Result r = mgr.device()->createDescriptorPool( + vk::Result r = komputeManager()->device()->createDescriptorPool( &descriptorPoolInfo, nullptr, ctx->pool.get()); if (r != vk::Result::eSuccess) std::cerr << "Error allocating descriptor pool" << vk::to_string(r); @@ -284,7 +306,7 @@ void ggml_vk_allocate_descriptor_pool(struct ggml_kompute_context * ctx, size_t static void ggml_vk_free_descriptor_pool(struct ggml_kompute_context * ctx) { if (ctx->pool) { - mgr.device()->destroy( + komputeManager()->device()->destroy( *ctx->pool, (vk::Optional)nullptr); ctx->pool = nullptr; @@ -301,7 +323,7 @@ vk::Buffer *ggml_vk_allocate_buffer(size_t size) { bufferCreateInfo.sharingMode = vk::SharingMode::eExclusive; vk::Buffer *vkBuffer = new vk::Buffer; - vk::Result r = mgr.device()->createBuffer(&bufferCreateInfo, nullptr, vkBuffer); + vk::Result r = komputeManager()->device()->createBuffer(&bufferCreateInfo, nullptr, vkBuffer); if (r != vk::Result::eSuccess) std::cerr << "Error allocating buffer" << vk::to_string(r); return vkBuffer; @@ -312,7 +334,7 @@ vk::DeviceMemory *ggml_vk_allocate(size_t size, vk::MemoryPropertyFlags flags, v uint32_t memoryTypeIndex = -1; bool memoryTypeIndexFound = false; - vk::PhysicalDeviceMemoryProperties memoryProperties = mgr.physicalDevice()->getMemoryProperties(); + vk::PhysicalDeviceMemoryProperties memoryProperties = komputeManager()->physicalDevice()->getMemoryProperties(); for (uint32_t i = 0; i < memoryProperties.memoryTypeCount; i++) { if (requirements.memoryTypeBits & (1 << i)) { if (((memoryProperties.memoryTypes[i]).propertyFlags & @@ -335,7 +357,7 @@ vk::DeviceMemory *ggml_vk_allocate(size_t size, vk::MemoryPropertyFlags flags, v allocInfo.allocationSize = size; allocInfo.memoryTypeIndex = memoryTypeIndex; vk::DeviceMemory *vkDeviceMemory = new vk::DeviceMemory; - vk::Result r = mgr.device()->allocateMemory(&allocInfo, nullptr, vkDeviceMemory); + vk::Result r = komputeManager()->device()->allocateMemory(&allocInfo, nullptr, vkDeviceMemory); if (r != vk::Result::eSuccess) std::cerr << "Error allocating memory" << vk::to_string(r); return vkDeviceMemory; @@ -346,7 +368,7 @@ size_t ggml_vk_aligned_offset(size_t offset) { static size_t minStorageBufferOffsetAlignment = 0; if (minStorageBufferOffsetAlignment == 0) { vk::PhysicalDeviceProperties deviceProperties; - deviceProperties = mgr.physicalDevice()->getProperties(); + deviceProperties = komputeManager()->physicalDevice()->getProperties(); vk::PhysicalDeviceLimits deviceLimits = deviceProperties.limits; minStorageBufferOffsetAlignment = deviceLimits.minStorageBufferOffsetAlignment; } @@ -362,12 +384,12 @@ size_t ggml_vk_aligned_offset(size_t offset) { static void ggml_vk_h2d_buffer(const ggml_vk_memory &memory) { if (memory.stagingBuffer) - mgr.sequence()->eval(memory.primaryBuffer, memory.stagingBuffer, memory.size); + komputeManager()->sequence()->eval(memory.primaryBuffer, memory.stagingBuffer, memory.size); } static void ggml_vk_d2h_buffer(const ggml_vk_memory &memory) { if (memory.stagingBuffer) - mgr.sequence()->eval(memory.primaryBuffer, memory.stagingBuffer, memory.size); + komputeManager()->sequence()->eval(memory.primaryBuffer, memory.stagingBuffer, memory.size); } ggml_vk_memory ggml_vk_allocate(size_t size) { @@ -375,12 +397,12 @@ ggml_vk_memory ggml_vk_allocate(size_t size) { bool isHostVisible = false; { memory.primaryBuffer = ggml_vk_allocate_buffer(size); - vk::MemoryRequirements memoryRequirements = mgr.device()->getBufferMemoryRequirements(*memory.primaryBuffer); + vk::MemoryRequirements memoryRequirements = komputeManager()->device()->getBufferMemoryRequirements(*memory.primaryBuffer); vk::MemoryPropertyFlags memoryPropertyFlags = vk::MemoryPropertyFlagBits::eDeviceLocal; memory.primaryMemory = ggml_vk_allocate(size, memoryPropertyFlags, memoryRequirements, &isHostVisible); - mgr.device()->bindBufferMemory(*memory.primaryBuffer, *memory.primaryMemory, 0); + komputeManager()->device()->bindBufferMemory(*memory.primaryBuffer, *memory.primaryMemory, 0); if (isHostVisible) { - vk::Result r = mgr.device()->mapMemory(*memory.primaryMemory, 0, size, vk::MemoryMapFlags(), &memory.data); + vk::Result r = komputeManager()->device()->mapMemory(*memory.primaryMemory, 0, size, vk::MemoryMapFlags(), &memory.data); if (r != vk::Result::eSuccess) std::cerr << "Error mapping memory" << vk::to_string(r); } @@ -388,13 +410,13 @@ ggml_vk_memory ggml_vk_allocate(size_t size) { if (!isHostVisible) { memory.stagingBuffer = ggml_vk_allocate_buffer(size); - vk::MemoryRequirements memoryRequirements = mgr.device()->getBufferMemoryRequirements(*memory.stagingBuffer); + vk::MemoryRequirements memoryRequirements = komputeManager()->device()->getBufferMemoryRequirements(*memory.stagingBuffer); vk::MemoryPropertyFlags memoryPropertyFlags = vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached; memory.stagingMemory = ggml_vk_allocate(size, memoryPropertyFlags, memoryRequirements, &isHostVisible); - mgr.device()->bindBufferMemory(*memory.stagingBuffer, *memory.stagingMemory, 0); - vk::Result r = mgr.device()->mapMemory(*memory.stagingMemory, 0, size, vk::MemoryMapFlags(), &memory.data); + komputeManager()->device()->bindBufferMemory(*memory.stagingBuffer, *memory.stagingMemory, 0); + vk::Result r = komputeManager()->device()->mapMemory(*memory.stagingMemory, 0, size, vk::MemoryMapFlags(), &memory.data); if (r != vk::Result::eSuccess) std::cerr << "Error mapping memory" << vk::to_string(r); } @@ -405,19 +427,19 @@ ggml_vk_memory ggml_vk_allocate(size_t size) { void ggml_vk_free_memory(ggml_vk_memory &memory) { - mgr.device()->destroy( + komputeManager()->device()->destroy( *memory.primaryBuffer, (vk::Optional)nullptr); if (memory.stagingBuffer) { - mgr.device()->destroy( + komputeManager()->device()->destroy( *memory.stagingBuffer, (vk::Optional)nullptr); } - mgr.device()->freeMemory( + komputeManager()->device()->freeMemory( *memory.primaryMemory, (vk::Optional)nullptr); if (memory.stagingMemory) { - mgr.device()->freeMemory( + komputeManager()->device()->freeMemory( *memory.stagingMemory, (vk::Optional)nullptr); } @@ -457,7 +479,7 @@ const std::shared_ptr ggml_vk_get_tensor(struct ggml_kompute_context nbytes += *alignedOffset; } - return mgr.tensor( + return komputeManager()->tensor( t->data, nelements, nbytes, kp::Tensor::TensorDataTypes::eFloat, @@ -476,7 +498,7 @@ void ggml_vk_add_buffer( void ggml_vk_h2d_tensor(struct ggml_kompute_context * ctx, struct ggml_tensor * t) { const auto res = ggml_vk_get_tensor(ctx, t, nullptr); GGML_ASSERT(res); - mgr.sequence()->eval({res}); + komputeManager()->sequence()->eval({res}); } void ggml_vk_h2d_all(struct ggml_kompute_context * ctx) { @@ -496,7 +518,7 @@ void ggml_vk_d2h_tensor(struct ggml_kompute_context * ctx, struct ggml_tensor * const auto res = ggml_vk_get_tensor(ctx, t, nullptr); GGML_ASSERT(res); - mgr.sequence()->eval({res}); + komputeManager()->sequence()->eval({res}); } std::vector getSpirvShader(const unsigned char* rawData, size_t size) { @@ -537,10 +559,11 @@ void ggml_vk_add(kp::Sequence& seq, safe_divide(inAOff, 4), safe_divide(inBOff, 4), safe_divide(outOff, 4) }; - static std::shared_ptr s_algo = nullptr; - if (!s_algo) - s_algo = mgr.algorithm(ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts}); + std::shared_ptr s_algo = nullptr; + if (!komputeManager()->hasAlgorithm(__func__)) + s_algo = komputeManager()->algorithm(__func__, ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts}); else { + s_algo = komputeManager()->getAlgorithm(__func__); s_algo->setTensors({inA, inB, out}); s_algo->setWorkgroup({size}); s_algo->setPushConstants({pushConsts}); @@ -567,10 +590,11 @@ void ggml_vk_addrow(kp::Sequence& seq, row }; - static std::shared_ptr s_algo = nullptr; - if (!s_algo) - s_algo = mgr.algorithm(ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts}); + std::shared_ptr s_algo = nullptr; + if (!komputeManager()->hasAlgorithm(__func__)) + s_algo = komputeManager()->algorithm(__func__, ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts}); else { + s_algo = komputeManager()->getAlgorithm(__func__); s_algo->setTensors({inA, inB, out}); s_algo->setWorkgroup({size}); s_algo->setPushConstants({pushConsts}); @@ -595,10 +619,11 @@ void ggml_vk_mul(kp::Sequence& seq, safe_divide(inAOff, 4), safe_divide(inBOff, 4), safe_divide(outOff, 4) }; - static std::shared_ptr s_algo = nullptr; - if (!s_algo) - s_algo = mgr.algorithm(ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts}); + std::shared_ptr s_algo = nullptr; + if (!komputeManager()->hasAlgorithm(__func__)) + s_algo = komputeManager()->algorithm(__func__, ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts}); else { + s_algo = komputeManager()->getAlgorithm(__func__); s_algo->setTensors({inA, inB, out}); s_algo->setWorkgroup({size}); s_algo->setPushConstants({pushConsts}); @@ -625,10 +650,11 @@ void ggml_vk_mulrow(kp::Sequence& seq, row }; - static std::shared_ptr s_algo = nullptr; - if (!s_algo) - s_algo = mgr.algorithm(ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts}); + std::shared_ptr s_algo = nullptr; + if (!komputeManager()->hasAlgorithm(__func__)) + s_algo = komputeManager()->algorithm(__func__, ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts}); else { + s_algo = komputeManager()->getAlgorithm(__func__); s_algo->setTensors({inA, inB, out}); s_algo->setWorkgroup({size}); s_algo->setPushConstants({pushConsts}); @@ -653,10 +679,11 @@ void ggml_vk_scale(kp::Sequence& seq, scale }; - static std::shared_ptr s_algo = nullptr; - if (!s_algo) - s_algo = mgr.algorithm(ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {size}, {}, {pushConsts}); + std::shared_ptr s_algo = nullptr; + if (!komputeManager()->hasAlgorithm(__func__)) + s_algo = komputeManager()->algorithm(__func__, ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {size}, {}, {pushConsts}); else { + s_algo = komputeManager()->getAlgorithm(__func__); s_algo->setTensors({in, out}); s_algo->setWorkgroup({size}); s_algo->setPushConstants({pushConsts}); @@ -676,10 +703,11 @@ void ggml_vk_xxlu(const std::vector& spirv, kp::Sequence& seq, safe_divide(inOff, 4), safe_divide(outOff, 4), }; - static std::shared_ptr s_algo = nullptr; - if (!s_algo) - s_algo = mgr.algorithm(ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {size}, {}, {pushConsts}); + std::shared_ptr s_algo = nullptr; + if (!komputeManager()->hasAlgorithm(__func__)) + s_algo = komputeManager()->algorithm(__func__, ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {size}, {}, {pushConsts}); else { + s_algo = komputeManager()->getAlgorithm(__func__); s_algo->setTensors({in, out}); s_algo->setWorkgroup({size}); s_algo->setPushConstants({pushConsts}); @@ -729,10 +757,11 @@ void ggml_vk_soft_max(kp::Sequence& seq, ne00, ne01, ne02 }; - static std::shared_ptr s_algo = nullptr; - if (!s_algo) - s_algo = mgr.algorithm(ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {unsigned(ne01), unsigned(ne02), unsigned(ne03)}, {}, {pushConsts}); + std::shared_ptr s_algo = nullptr; + if (!komputeManager()->hasAlgorithm(__func__)) + s_algo = komputeManager()->algorithm(__func__, ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {unsigned(ne01), unsigned(ne02), unsigned(ne03)}, {}, {pushConsts}); else { + s_algo = komputeManager()->getAlgorithm(__func__); s_algo->setTensors({in, out}); s_algo->setWorkgroup({unsigned(ne01), unsigned(ne02), unsigned(ne03)}); s_algo->setPushConstants({pushConsts}); @@ -761,10 +790,11 @@ void ggml_vk_norm_(const std::vector& spirv, kp::Sequence& seq, (uint32_t)ne00, (uint32_t)nb01, epsilon }; - static std::shared_ptr s_algo = nullptr; - if (!s_algo) - s_algo = mgr.algorithm(ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {(uint32_t)nrows}, {}, {pushConsts}); + std::shared_ptr s_algo = nullptr; + if (!komputeManager()->hasAlgorithm(__func__)) + s_algo = komputeManager()->algorithm(__func__, ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {(uint32_t)nrows}, {}, {pushConsts}); else { + s_algo = komputeManager()->getAlgorithm(__func__); s_algo->setTensors({in, out}); s_algo->setWorkgroup({(uint32_t)nrows}); s_algo->setPushConstants({pushConsts}); @@ -808,10 +838,11 @@ void ggml_vk_diag_mask_inf(kp::Sequence& seq, ne00, ne01 }; - static std::shared_ptr s_algo = nullptr; - if (!s_algo) - s_algo = mgr.algorithm(ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {unsigned(ne00), unsigned(ne01), unsigned(ne02)}, {}, {pushConsts}); + std::shared_ptr s_algo = nullptr; + if (!komputeManager()->hasAlgorithm(__func__)) + s_algo = komputeManager()->algorithm(__func__, ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {unsigned(ne00), unsigned(ne01), unsigned(ne02)}, {}, {pushConsts}); else { + s_algo = komputeManager()->getAlgorithm(__func__); s_algo->setTensors({in, out}); s_algo->setWorkgroup({unsigned(ne00), unsigned(ne01), unsigned(ne02)}); s_algo->setPushConstants({pushConsts}); @@ -844,10 +875,11 @@ void ggml_vk_mul_mat_f16(kp::Sequence& seq, ne00, nb01, nb02, nb11, nb12, ne0, ne1, }; - static std::shared_ptr s_algo = nullptr; - if (!s_algo) - s_algo = mgr.algorithm(ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {unsigned(ne01), unsigned(ne11), unsigned(ne12)}, {}, {pushConsts}); + std::shared_ptr s_algo = nullptr; + if (!komputeManager()->hasAlgorithm(__func__)) + s_algo = komputeManager()->algorithm(__func__, ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {unsigned(ne01), unsigned(ne11), unsigned(ne12)}, {}, {pushConsts}); else { + s_algo = komputeManager()->getAlgorithm(__func__); s_algo->setTensors({inA, inB, out}); s_algo->setWorkgroup({unsigned(ne01), unsigned(ne11), unsigned(ne12)}); s_algo->setPushConstants({pushConsts}); @@ -871,10 +903,11 @@ void ggml_vk_mul_mat_q4_x(const std::vector& spirv, uint32_t block_siz ne00, ne10, ne0, }; - static std::shared_ptr s_algo = nullptr; - if (!s_algo) - s_algo = mgr.algorithm(ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {unsigned(ne01), unsigned(ne11)}, {}, {pushConsts}); + std::shared_ptr s_algo = nullptr; + if (!komputeManager()->hasAlgorithm(__func__)) + s_algo = komputeManager()->algorithm(__func__, ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {unsigned(ne01), unsigned(ne11)}, {}, {pushConsts}); else { + s_algo = komputeManager()->getAlgorithm(__func__); s_algo->setTensors({inA, inB, out}); s_algo->setWorkgroup({unsigned(ne01), unsigned(ne11)}); s_algo->setPushConstants({pushConsts}); @@ -921,10 +954,11 @@ void ggml_vk_get_rows(const std::vector& spirv, ne00, nb01, nb1 }; - static std::shared_ptr s_algo = nullptr; - if (!s_algo) - s_algo = mgr.algorithm(ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts}); + std::shared_ptr s_algo = nullptr; + if (!komputeManager()->hasAlgorithm(__func__)) + s_algo = komputeManager()->algorithm(__func__, ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts}); else { + s_algo = komputeManager()->getAlgorithm(__func__); s_algo->setTensors({inA, inB, out}); s_algo->setWorkgroup({size}); s_algo->setPushConstants({pushConsts}); @@ -996,10 +1030,11 @@ void ggml_vk_rope(kp::Sequence& seq, nb0, nb1, nb2, nb3 }; - static std::shared_ptr s_algo = nullptr; - if (!s_algo) - s_algo = mgr.algorithm(ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {unsigned(ne01), unsigned(ne02), unsigned(ne03)}, {}, {pushConsts}); + std::shared_ptr s_algo = nullptr; + if (!komputeManager()->hasAlgorithm(__func__)) + s_algo = komputeManager()->algorithm(__func__, ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {unsigned(ne01), unsigned(ne02), unsigned(ne03)}, {}, {pushConsts}); else { + s_algo = komputeManager()->getAlgorithm(__func__); s_algo->setTensors({in, out}); s_algo->setWorkgroup({unsigned(ne01), unsigned(ne02), unsigned(ne03)}); s_algo->setPushConstants({pushConsts}); @@ -1032,10 +1067,14 @@ void ggml_vk_cpy(const std::vector& spirv, nb0, nb1, nb2, nb3 }; - static std::shared_ptr s_algo = nullptr; - if (!s_algo) - s_algo = mgr.algorithm(ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {unsigned(ne01), unsigned(ne02), unsigned(ne03)}, {}, {pushConsts}); + static std::string unique_name = std::string(__func__) + + "_i_" + std::to_string(in_element_size) + + "_o_" + std::to_string(out_element_size); + std::shared_ptr s_algo = nullptr; + if (!komputeManager()->hasAlgorithm(unique_name)) + s_algo = komputeManager()->algorithm(unique_name, ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {unsigned(ne01), unsigned(ne02), unsigned(ne03)}, {}, {pushConsts}); else { + s_algo = komputeManager()->getAlgorithm(unique_name); s_algo->setTensors({in, out}); s_algo->setWorkgroup({unsigned(ne01), unsigned(ne02), unsigned(ne03)}); s_algo->setPushConstants({pushConsts}); @@ -1082,7 +1121,7 @@ void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml_cgraph std::vector> sequences(n_seq); for (auto& sequence : sequences) { - sequence = mgr.sequence(); + sequence = komputeManager()->sequence(); } for (int seq_idx = 0; seq_idx < n_seq; ++seq_idx) { const int n_nodes_per_seq = (gf->n_nodes + n_seq - 1) / n_seq; diff --git a/ggml-vulkan.h b/ggml-vulkan.h index d13ed4184..e1d20e388 100644 --- a/ggml-vulkan.h +++ b/ggml-vulkan.h @@ -40,6 +40,7 @@ std::vector ggml_vk_available_devices(size_t memoryRequired); bool ggml_vk_init_device(size_t memoryRequired, const std::string &device); bool ggml_vk_init_device(const ggml_vk_device &device); bool ggml_vk_init_device(int device); +bool ggml_vk_free_device(); bool ggml_vk_has_vulkan(); bool ggml_vk_has_device(); ggml_vk_device ggml_vk_current_device(); diff --git a/kompute/src/Algorithm.cpp b/kompute/src/Algorithm.cpp index 9c41ec90f..ea81fd97b 100644 --- a/kompute/src/Algorithm.cpp +++ b/kompute/src/Algorithm.cpp @@ -58,18 +58,6 @@ Algorithm::destroy() this->mPipeline = nullptr; } - if (this->mFreePipelineCache && this->mPipelineCache) { - KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline cache"); - if (!this->mPipelineCache) { - KP_LOG_WARN("Kompute Algorithm Error requested to destroy " - "pipeline cache but it is null"); - } - this->mDevice->destroy( - *this->mPipelineCache, - (vk::Optional)nullptr); - this->mPipelineCache = nullptr; - } - if (this->mFreePipelineLayout && this->mPipelineLayout) { KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline layout"); if (!this->mPipelineLayout) { @@ -317,16 +305,6 @@ Algorithm::createPipeline() "main", &specializationInfo); - static std::shared_ptr globalPipelineCache = std::make_shared(); - if(!*globalPipelineCache) { - vk::PipelineCacheCreateInfo pipelineCacheInfo = - vk::PipelineCacheCreateInfo(); - this->mPipelineCache = globalPipelineCache; - this->mFreePipelineCache = true; - this->mDevice->createPipelineCache( - &pipelineCacheInfo, nullptr, globalPipelineCache.get()); - } - vk::ComputePipelineCreateInfo pipelineInfo(vk::PipelineCreateFlags(), shaderStage, *this->mPipelineLayout, @@ -335,7 +313,7 @@ Algorithm::createPipeline() #ifdef KOMPUTE_CREATE_PIPELINE_RESULT_VALUE vk::ResultValue pipelineResult = - this->mDevice->createComputePipeline(*globalPipelineCache, pipelineInfo); + this->mDevice->createComputePipeline(*mPipelineCache, pipelineInfo); if (pipelineResult.result != vk::Result::eSuccess) { throw std::runtime_error("Failed to create pipeline result: " + @@ -347,7 +325,7 @@ Algorithm::createPipeline() this->mFreePipeline = true; #else vk::Pipeline pipeline = - this->mDevice->createComputePipeline(*globalPipelineCache, pipelineInfo) + this->mDevice->createComputePipeline(*mPipelineCache, pipelineInfo) .value; this->mPipeline = std::make_shared(pipeline); this->mFreePipeline = true; diff --git a/kompute/src/Manager.cpp b/kompute/src/Manager.cpp index 2c86b6e10..2a02b7b10 100644 --- a/kompute/src/Manager.cpp +++ b/kompute/src/Manager.cpp @@ -88,15 +88,14 @@ Manager::destroy() this->mManagedSequences.clear(); } - if (this->mManageResources && this->mManagedAlgorithms.size()) { + if (this->mManageResources && !this->mManagedAlgorithmsMap.empty()) { KP_LOG_DEBUG("Kompute Manager explicitly freeing algorithms"); - for (const std::weak_ptr& weakAlgorithm : - this->mManagedAlgorithms) { - if (std::shared_ptr algorithm = weakAlgorithm.lock()) { + for (const auto& kv : this->mManagedAlgorithmsMap) { + if (std::shared_ptr algorithm = kv.second) { algorithm->destroy(); } } - this->mManagedAlgorithms.clear(); + this->mManagedAlgorithmsMap.clear(); } if (this->mManageResources && this->mManagedTensors.size()) { @@ -109,6 +108,18 @@ Manager::destroy() this->mManagedTensors.clear(); } + if (this->mPipelineCache) { + KP_LOG_DEBUG("Kompute Manager Destroying pipeline cache"); + if (!this->mPipelineCache) { + KP_LOG_WARN("Kompute Manager Error requested to destroy " + "pipeline cache but it is null"); + } + this->mDevice->destroy( + *this->mPipelineCache, + (vk::Optional)nullptr); + this->mPipelineCache = nullptr; + } + if (this->mFreeDevice) { KP_LOG_INFO("Destroying device"); this->mDevice->destroy( @@ -269,12 +280,14 @@ Manager::clear() end(this->mManagedTensors), [](std::weak_ptr t) { return t.expired(); }), end(this->mManagedTensors)); - this->mManagedAlgorithms.erase( - std::remove_if( - begin(this->mManagedAlgorithms), - end(this->mManagedAlgorithms), - [](std::weak_ptr t) { return t.expired(); }), - end(this->mManagedAlgorithms)); + for (auto it = this->mManagedAlgorithmsMap.begin(); + it != this->mManagedAlgorithmsMap.end();) { + if (it->second) { + it = this->mManagedAlgorithmsMap.erase(it); + } else { + ++it; + } + } this->mManagedSequences.erase( std::remove_if(begin(this->mManagedSequences), end(this->mManagedSequences), @@ -452,6 +465,12 @@ Manager::createDevice(const std::vector& familyQueueIndices, } KP_LOG_DEBUG("Kompute Manager compute queue obtained"); + + mPipelineCache = std::make_shared(); + vk::PipelineCacheCreateInfo pipelineCacheInfo = + vk::PipelineCacheCreateInfo(); + this->mDevice->createPipelineCache( + &pipelineCacheInfo, nullptr, mPipelineCache.get()); } std::shared_ptr diff --git a/kompute/src/include/kompute/Algorithm.hpp b/kompute/src/include/kompute/Algorithm.hpp index 90fe48fef..ef11234ee 100644 --- a/kompute/src/include/kompute/Algorithm.hpp +++ b/kompute/src/include/kompute/Algorithm.hpp @@ -45,6 +45,7 @@ class Algorithm */ template Algorithm(std::shared_ptr device, + vk::PipelineCache *pipelineCache, vk::DescriptorPool *pool, const std::vector>& tensors = {}, const std::vector& spirv = {}, @@ -55,6 +56,7 @@ class Algorithm KP_LOG_DEBUG("Kompute Algorithm Constructor with device"); this->mDevice = device; + this->mPipelineCache = pipelineCache; this->mDescriptorPool = pool; if (tensors.size() && spirv.size()) { @@ -310,8 +312,7 @@ class Algorithm bool mFreeShaderModule = false; std::shared_ptr mPipelineLayout; bool mFreePipelineLayout = false; - std::shared_ptr mPipelineCache; - bool mFreePipelineCache = false; + vk::PipelineCache *mPipelineCache = nullptr; std::shared_ptr mPipeline; bool mFreePipeline = false; diff --git a/kompute/src/include/kompute/Manager.hpp b/kompute/src/include/kompute/Manager.hpp index 42336f4e8..e910b2b81 100644 --- a/kompute/src/include/kompute/Manager.hpp +++ b/kompute/src/include/kompute/Manager.hpp @@ -39,6 +39,10 @@ class Manager */ ~Manager(); + bool hasInstance() const { + return this->mInstance.get(); + } + bool hasDevice() const { return this->mDevice.get(); } @@ -149,6 +153,7 @@ class Manager * @returns Shared pointer with initialised algorithm */ std::shared_ptr algorithm( + const std::string &name, vk::DescriptorPool *pool, const std::vector>& tensors = {}, const std::vector& spirv = {}, @@ -157,7 +162,7 @@ class Manager const std::vector& pushConstants = {}) { return this->algorithm<>( - pool, tensors, spirv, workgroup, specializationConstants, pushConstants); + name, pool, tensors, spirv, workgroup, specializationConstants, pushConstants); } /** @@ -176,6 +181,7 @@ class Manager */ template std::shared_ptr algorithm( + const std::string &name, vk::DescriptorPool *pool, const std::vector>& tensors, const std::vector& spirv, @@ -188,6 +194,7 @@ class Manager std::shared_ptr algorithm{ new kp::Algorithm( this->mDevice, + mPipelineCache.get(), pool, tensors, spirv, @@ -196,12 +203,24 @@ class Manager pushConstants) }; if (this->mManageResources) { - this->mManagedAlgorithms.push_back(algorithm); + this->mManagedAlgorithmsMap.insert({name, algorithm}); } return algorithm; } + bool hasAlgorithm(const std::string &name) const { + return mManagedAlgorithmsMap.find(name) != mManagedAlgorithmsMap.end(); + } + + std::shared_ptr getAlgorithm(const std::string &name) const { + auto it = mManagedAlgorithmsMap.find(name); + if (it != mManagedAlgorithmsMap.end()) { + return it->second; + } + return nullptr; + } + /** * Destroy the GPU resources and all managed resources by manager. **/ @@ -237,6 +256,7 @@ class Manager std::shared_ptr device() const { return mDevice; } std::shared_ptr physicalDevice() const { return mPhysicalDevice; } + std::shared_ptr pipelineCache() const { return mPipelineCache; } private: // -------------- OPTIONALLY OWNED RESOURCES @@ -250,10 +270,11 @@ class Manager // -------------- ALWAYS OWNED RESOURCES std::vector> mManagedTensors; std::vector> mManagedSequences; - std::vector> mManagedAlgorithms; + std::unordered_map> mManagedAlgorithmsMap; std::vector mComputeQueueFamilyIndices; std::vector> mComputeQueues; + std::shared_ptr mPipelineCache; bool mManageResources = false;