From 704359e29985a06a389337a2617b7f3fa8eff908 Mon Sep 17 00:00:00 2001 From: Neuman Vong Date: Thu, 15 Feb 2024 17:11:15 +1100 Subject: [PATCH] vulkan: Find optimal memory type but with fallback (#5381) * @0cc4m feedback * More feedback @0cc4m --- ggml-vulkan.cpp | 65 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 23 deletions(-) diff --git a/ggml-vulkan.cpp b/ggml-vulkan.cpp index 7834e635c..1fad24fd1 100644 --- a/ggml-vulkan.cpp +++ b/ggml-vulkan.cpp @@ -707,9 +707,21 @@ static void ggml_vk_queue_cleanup(ggml_backend_vk_context * ctx, vk_queue& q) { q.cmd_buffer_idx = 0; } -static vk_buffer ggml_vk_create_buffer(ggml_backend_vk_context * ctx, size_t size, vk::MemoryPropertyFlags req_flags) { +static uint32_t find_properties(const vk::PhysicalDeviceMemoryProperties* mem_props, vk::MemoryRequirements* mem_req, vk::MemoryPropertyFlags flags) { + for (uint32_t i = 0; i < mem_props->memoryTypeCount; ++i) { + vk::MemoryType memory_type = mem_props->memoryTypes[i]; + if ((mem_req->memoryTypeBits & ((uint64_t)1 << i)) && + (flags & memory_type.propertyFlags) == flags && + mem_props->memoryHeaps[memory_type.heapIndex].size >= mem_req->size) { + return static_cast(i); + } + } + return UINT32_MAX; +} + +static vk_buffer ggml_vk_create_buffer(ggml_backend_vk_context * ctx, size_t size, vk::MemoryPropertyFlags req_flags, vk::MemoryPropertyFlags fallback_flags = vk::MemoryPropertyFlags(0)) { #ifdef GGML_VULKAN_DEBUG - std::cerr << "ggml_vk_create_buffer(" << size << ", " << to_string(req_flags) << ")" << std::endl; + std::cerr << "ggml_vk_create_buffer(" << size << ", " << to_string(req_flags) << ", " << to_string(fallback_flags) << ")" << std::endl; #endif vk_buffer buf = std::make_shared(); @@ -736,15 +748,15 @@ static vk_buffer ggml_vk_create_buffer(ggml_backend_vk_context * ctx, size_t siz uint32_t memory_type_index = UINT32_MAX; - for (uint32_t i = 0; i < mem_props.memoryTypeCount; ++i) { - vk::MemoryType memory_type = mem_props.memoryTypes[i]; - if ((mem_req.memoryTypeBits & ((uint64_t)1 << i)) && (req_flags & memory_type.propertyFlags) == req_flags && mem_props.memoryHeaps[memory_type.heapIndex].size >= mem_req.size) { - memory_type_index = i; - break; - } + memory_type_index = find_properties(&mem_props, &mem_req, req_flags); + buf->memory_property_flags = req_flags; + + if (memory_type_index == UINT32_MAX && fallback_flags) { + memory_type_index = find_properties(&mem_props, &mem_req, fallback_flags); + buf->memory_property_flags = fallback_flags; } - if (memory_type_index >= mem_props.memoryTypeCount) { + if (memory_type_index == UINT32_MAX) { ctx->device.lock()->device.destroyBuffer(buf->buffer); buf->size = 0; throw vk::OutOfDeviceMemoryError("No suitable memory type found"); @@ -758,10 +770,9 @@ static vk_buffer ggml_vk_create_buffer(ggml_backend_vk_context * ctx, size_t siz buf->size = 0; throw e; } - buf->memory_property_flags = req_flags; buf->ptr = nullptr; - if (req_flags & vk::MemoryPropertyFlagBits::eHostVisible) { + if (buf->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible) { buf->ptr = ctx->device.lock()->device.mapMemory(buf->device_memory, 0, VK_WHOLE_SIZE); } @@ -778,9 +789,9 @@ static vk_buffer ggml_vk_create_buffer(ggml_backend_vk_context * ctx, size_t siz return buf; } -static vk_buffer ggml_vk_create_buffer_check(ggml_backend_vk_context * ctx, size_t size, vk::MemoryPropertyFlags req_flags) { +static vk_buffer ggml_vk_create_buffer_check(ggml_backend_vk_context * ctx, size_t size, vk::MemoryPropertyFlags req_flags, vk::MemoryPropertyFlags fallback_flags = vk::MemoryPropertyFlags(0)) { try { - return ggml_vk_create_buffer(ctx, size, req_flags); + return ggml_vk_create_buffer(ctx, size, req_flags, fallback_flags); } catch (const vk::SystemError& e) { std::cerr << "ggml_vulkan: Memory allocation of size " << size << " failed." << std::endl; std::cerr << "ggml_vulkan: " << e.what() << std::endl; @@ -791,16 +802,16 @@ static vk_buffer ggml_vk_create_buffer_check(ggml_backend_vk_context * ctx, size static vk_buffer ggml_vk_create_buffer_device(ggml_backend_vk_context * ctx, size_t size) { vk_buffer buf; try { - buf = ggml_vk_create_buffer(ctx, size, vk::MemoryPropertyFlagBits::eDeviceLocal); - } catch (const vk::SystemError& e) { if (ctx->device.lock()->uma) { // Fall back to host memory type - buf = ggml_vk_create_buffer_check(ctx, size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent); + buf = ggml_vk_create_buffer(ctx, size, vk::MemoryPropertyFlagBits::eDeviceLocal, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent); } else { - std::cerr << "ggml_vulkan: Device memory allocation of size " << size << " failed." << std::endl; - std::cerr << "ggml_vulkan: " << e.what() << std::endl; - throw e; + buf = ggml_vk_create_buffer(ctx, size, vk::MemoryPropertyFlagBits::eDeviceLocal); } + } catch (const vk::SystemError& e) { + std::cerr << "ggml_vulkan: Device memory allocation of size " << size << " failed." << std::endl; + std::cerr << "ggml_vulkan: " << e.what() << std::endl; + throw e; } return buf; @@ -1422,7 +1433,9 @@ static void * ggml_vk_host_malloc(ggml_backend_vk_context * ctx, size_t size) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_vk_host_malloc(" << size << ")" << std::endl; #endif - vk_buffer buf = ggml_vk_create_buffer(ctx, size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached); + vk_buffer buf = ggml_vk_create_buffer(ctx, size, + vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached, + vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent); if(!(buf->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible)) { fprintf(stderr, "WARNING: failed to allocate %.2f MB of pinned memory\n", @@ -1568,7 +1581,9 @@ static void deferred_memcpy(void * dst, const void * src, size_t size, std::vect static void ggml_vk_ensure_sync_staging_buffer(ggml_backend_vk_context * ctx, size_t size) { if (ctx->sync_staging == nullptr || ctx->sync_staging->size < size) { ggml_vk_destroy_buffer(ctx->sync_staging); - ctx->sync_staging = ggml_vk_create_buffer_check(ctx, size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached); + ctx->sync_staging = ggml_vk_create_buffer_check(ctx, size, + vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached, + vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent); } } @@ -4082,7 +4097,9 @@ static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx) { std::cerr << "ggml_vk_preallocate_buffers(qx_size: " << ctx->prealloc_size_qx << " qy_size: " << ctx->prealloc_size_qy << " x_size: " << ctx->prealloc_size_x << " y_size: " << ctx->prealloc_size_y << " split_k_size: " << ctx->prealloc_size_split_k << ")" << std::endl; #endif #if defined(GGML_VULKAN_RUN_TESTS) - ctx->staging = ggml_vk_create_buffer_check(ctx, 100ul * 1024ul * 1024ul, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached); + ctx->staging = ggml_vk_create_buffer_check(ctx, 100ul * 1024ul * 1024ul, + vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached + vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent); ggml_vk_test_transfer(ctx, 8192 * 1000, false); ggml_vk_test_transfer(ctx, 8192 * 1000, true); @@ -4174,7 +4191,9 @@ static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx) { if (ctx->staging != nullptr) { ggml_vk_destroy_buffer(ctx->staging); } - ctx->staging = ggml_vk_create_buffer_check(ctx, ctx->staging_size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached); + ctx->staging = ggml_vk_create_buffer_check(ctx, ctx->staging_size, + vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached, + vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent); } }