mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-28 21:07:06 +01:00
Completely revamp how we do object management with the vulkan backend and
stop using so many static objects so we can tear down and bring up vulkan on new devices in the same runtime.
This commit is contained in:
parent
45c8778b49
commit
b7e2e691d4
185
ggml-vulkan.cpp
185
ggml-vulkan.cpp
@ -65,9 +65,21 @@ struct ggml_kompute_context {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// FIXME: It would be good to consolidate the kompute manager and the kompute context into one object
|
||||||
|
// and consolidate the init functions and simplify object lifetime management. As it currently stands,
|
||||||
|
// we *have* to have the kompute manager no matter what for device discovery, but the kompute context
|
||||||
|
// is only created when a device is set and vulkan is explicitly turned on.
|
||||||
ggml_kompute_context *ggml_kompute_context::instance;
|
ggml_kompute_context *ggml_kompute_context::instance;
|
||||||
|
kp::Manager *komputeManager() {
|
||||||
kp::Manager mgr;
|
static kp::Manager *s_mgr = nullptr;
|
||||||
|
if (s_mgr && !s_mgr->hasInstance()) {
|
||||||
|
delete s_mgr;
|
||||||
|
s_mgr = nullptr;
|
||||||
|
}
|
||||||
|
if (!s_mgr)
|
||||||
|
s_mgr = new kp::Manager;
|
||||||
|
return s_mgr;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
__attribute__((constructor))
|
__attribute__((constructor))
|
||||||
@ -123,12 +135,11 @@ static std::string ggml_vk_getVendorName(uint32_t vendorID) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::vector<ggml_vk_device> ggml_vk_available_devices(size_t memoryRequired) {
|
std::vector<ggml_vk_device> ggml_vk_available_devices(size_t memoryRequired) {
|
||||||
|
|
||||||
std::vector<ggml_vk_device> results;
|
std::vector<ggml_vk_device> results;
|
||||||
if (!mgr.hasVulkan())
|
if (!komputeManager()->hasVulkan())
|
||||||
return results;
|
return results;
|
||||||
|
|
||||||
std::vector<vk::PhysicalDevice> physicalDevices = mgr.listDevices();
|
std::vector<vk::PhysicalDevice> physicalDevices = komputeManager()->listDevices();
|
||||||
uint32_t deviceCount = physicalDevices.size();
|
uint32_t deviceCount = physicalDevices.size();
|
||||||
|
|
||||||
if (deviceCount == 0)
|
if (deviceCount == 0)
|
||||||
@ -228,22 +239,33 @@ bool ggml_vk_init_device(const ggml_vk_device &device) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool ggml_vk_init_device(int device) {
|
bool ggml_vk_init_device(int device) {
|
||||||
mgr.initializeDevice(device, {},
|
komputeManager()->initializeDevice(device, {},
|
||||||
{"VK_KHR_shader_float16_int8", "VK_KHR_8bit_storage",
|
{"VK_KHR_shader_float16_int8", "VK_KHR_8bit_storage",
|
||||||
"VK_KHR_16bit_storage", "VK_KHR_storage_buffer_storage_class"});
|
"VK_KHR_16bit_storage", "VK_KHR_storage_buffer_storage_class"});
|
||||||
return ggml_vk_has_device();
|
return ggml_vk_has_device();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ggml_vk_free_device() {
|
||||||
|
if (!ggml_vk_has_device())
|
||||||
|
return false;
|
||||||
|
komputeManager()->destroy();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ggml_vk_has_vulkan() {
|
||||||
|
return komputeManager()->hasVulkan();
|
||||||
|
}
|
||||||
|
|
||||||
bool ggml_vk_has_device() {
|
bool ggml_vk_has_device() {
|
||||||
return mgr.hasDevice();
|
return komputeManager()->hasDevice();
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_vk_device ggml_vk_current_device() {
|
ggml_vk_device ggml_vk_current_device() {
|
||||||
if (!mgr.hasDevice())
|
if (!komputeManager()->hasDevice())
|
||||||
return ggml_vk_device();
|
return ggml_vk_device();
|
||||||
|
|
||||||
std::vector<ggml_vk_device> devices = ggml_vk_available_devices(0);
|
std::vector<ggml_vk_device> devices = ggml_vk_available_devices(0);
|
||||||
ggml_vk_filterByName(devices, mgr.physicalDevice()->getProperties().deviceName);
|
ggml_vk_filterByName(devices, komputeManager()->physicalDevice()->getProperties().deviceName);
|
||||||
return devices.front();
|
return devices.front();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -275,7 +297,7 @@ void ggml_vk_allocate_descriptor_pool(struct ggml_kompute_context * ctx, size_t
|
|||||||
descriptorPoolSizes.data());
|
descriptorPoolSizes.data());
|
||||||
|
|
||||||
ctx->pool = std::make_shared<vk::DescriptorPool>();
|
ctx->pool = std::make_shared<vk::DescriptorPool>();
|
||||||
vk::Result r = mgr.device()->createDescriptorPool(
|
vk::Result r = komputeManager()->device()->createDescriptorPool(
|
||||||
&descriptorPoolInfo, nullptr, ctx->pool.get());
|
&descriptorPoolInfo, nullptr, ctx->pool.get());
|
||||||
if (r != vk::Result::eSuccess)
|
if (r != vk::Result::eSuccess)
|
||||||
std::cerr << "Error allocating descriptor pool" << vk::to_string(r);
|
std::cerr << "Error allocating descriptor pool" << vk::to_string(r);
|
||||||
@ -284,7 +306,7 @@ void ggml_vk_allocate_descriptor_pool(struct ggml_kompute_context * ctx, size_t
|
|||||||
static
|
static
|
||||||
void ggml_vk_free_descriptor_pool(struct ggml_kompute_context * ctx) {
|
void ggml_vk_free_descriptor_pool(struct ggml_kompute_context * ctx) {
|
||||||
if (ctx->pool) {
|
if (ctx->pool) {
|
||||||
mgr.device()->destroy(
|
komputeManager()->device()->destroy(
|
||||||
*ctx->pool,
|
*ctx->pool,
|
||||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||||
ctx->pool = nullptr;
|
ctx->pool = nullptr;
|
||||||
@ -301,7 +323,7 @@ vk::Buffer *ggml_vk_allocate_buffer(size_t size) {
|
|||||||
bufferCreateInfo.sharingMode = vk::SharingMode::eExclusive;
|
bufferCreateInfo.sharingMode = vk::SharingMode::eExclusive;
|
||||||
|
|
||||||
vk::Buffer *vkBuffer = new vk::Buffer;
|
vk::Buffer *vkBuffer = new vk::Buffer;
|
||||||
vk::Result r = mgr.device()->createBuffer(&bufferCreateInfo, nullptr, vkBuffer);
|
vk::Result r = komputeManager()->device()->createBuffer(&bufferCreateInfo, nullptr, vkBuffer);
|
||||||
if (r != vk::Result::eSuccess)
|
if (r != vk::Result::eSuccess)
|
||||||
std::cerr << "Error allocating buffer" << vk::to_string(r);
|
std::cerr << "Error allocating buffer" << vk::to_string(r);
|
||||||
return vkBuffer;
|
return vkBuffer;
|
||||||
@ -312,7 +334,7 @@ vk::DeviceMemory *ggml_vk_allocate(size_t size, vk::MemoryPropertyFlags flags, v
|
|||||||
|
|
||||||
uint32_t memoryTypeIndex = -1;
|
uint32_t memoryTypeIndex = -1;
|
||||||
bool memoryTypeIndexFound = false;
|
bool memoryTypeIndexFound = false;
|
||||||
vk::PhysicalDeviceMemoryProperties memoryProperties = mgr.physicalDevice()->getMemoryProperties();
|
vk::PhysicalDeviceMemoryProperties memoryProperties = komputeManager()->physicalDevice()->getMemoryProperties();
|
||||||
for (uint32_t i = 0; i < memoryProperties.memoryTypeCount; i++) {
|
for (uint32_t i = 0; i < memoryProperties.memoryTypeCount; i++) {
|
||||||
if (requirements.memoryTypeBits & (1 << i)) {
|
if (requirements.memoryTypeBits & (1 << i)) {
|
||||||
if (((memoryProperties.memoryTypes[i]).propertyFlags &
|
if (((memoryProperties.memoryTypes[i]).propertyFlags &
|
||||||
@ -335,7 +357,7 @@ vk::DeviceMemory *ggml_vk_allocate(size_t size, vk::MemoryPropertyFlags flags, v
|
|||||||
allocInfo.allocationSize = size;
|
allocInfo.allocationSize = size;
|
||||||
allocInfo.memoryTypeIndex = memoryTypeIndex;
|
allocInfo.memoryTypeIndex = memoryTypeIndex;
|
||||||
vk::DeviceMemory *vkDeviceMemory = new vk::DeviceMemory;
|
vk::DeviceMemory *vkDeviceMemory = new vk::DeviceMemory;
|
||||||
vk::Result r = mgr.device()->allocateMemory(&allocInfo, nullptr, vkDeviceMemory);
|
vk::Result r = komputeManager()->device()->allocateMemory(&allocInfo, nullptr, vkDeviceMemory);
|
||||||
if (r != vk::Result::eSuccess)
|
if (r != vk::Result::eSuccess)
|
||||||
std::cerr << "Error allocating memory" << vk::to_string(r);
|
std::cerr << "Error allocating memory" << vk::to_string(r);
|
||||||
return vkDeviceMemory;
|
return vkDeviceMemory;
|
||||||
@ -346,7 +368,7 @@ size_t ggml_vk_aligned_offset(size_t offset) {
|
|||||||
static size_t minStorageBufferOffsetAlignment = 0;
|
static size_t minStorageBufferOffsetAlignment = 0;
|
||||||
if (minStorageBufferOffsetAlignment == 0) {
|
if (minStorageBufferOffsetAlignment == 0) {
|
||||||
vk::PhysicalDeviceProperties deviceProperties;
|
vk::PhysicalDeviceProperties deviceProperties;
|
||||||
deviceProperties = mgr.physicalDevice()->getProperties();
|
deviceProperties = komputeManager()->physicalDevice()->getProperties();
|
||||||
vk::PhysicalDeviceLimits deviceLimits = deviceProperties.limits;
|
vk::PhysicalDeviceLimits deviceLimits = deviceProperties.limits;
|
||||||
minStorageBufferOffsetAlignment = deviceLimits.minStorageBufferOffsetAlignment;
|
minStorageBufferOffsetAlignment = deviceLimits.minStorageBufferOffsetAlignment;
|
||||||
}
|
}
|
||||||
@ -362,12 +384,12 @@ size_t ggml_vk_aligned_offset(size_t offset) {
|
|||||||
|
|
||||||
static void ggml_vk_h2d_buffer(const ggml_vk_memory &memory) {
|
static void ggml_vk_h2d_buffer(const ggml_vk_memory &memory) {
|
||||||
if (memory.stagingBuffer)
|
if (memory.stagingBuffer)
|
||||||
mgr.sequence()->eval<kp::OpBufferSyncDevice>(memory.primaryBuffer, memory.stagingBuffer, memory.size);
|
komputeManager()->sequence()->eval<kp::OpBufferSyncDevice>(memory.primaryBuffer, memory.stagingBuffer, memory.size);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_d2h_buffer(const ggml_vk_memory &memory) {
|
static void ggml_vk_d2h_buffer(const ggml_vk_memory &memory) {
|
||||||
if (memory.stagingBuffer)
|
if (memory.stagingBuffer)
|
||||||
mgr.sequence()->eval<kp::OpBufferSyncLocal>(memory.primaryBuffer, memory.stagingBuffer, memory.size);
|
komputeManager()->sequence()->eval<kp::OpBufferSyncLocal>(memory.primaryBuffer, memory.stagingBuffer, memory.size);
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_vk_memory ggml_vk_allocate(size_t size) {
|
ggml_vk_memory ggml_vk_allocate(size_t size) {
|
||||||
@ -375,12 +397,12 @@ ggml_vk_memory ggml_vk_allocate(size_t size) {
|
|||||||
bool isHostVisible = false;
|
bool isHostVisible = false;
|
||||||
{
|
{
|
||||||
memory.primaryBuffer = ggml_vk_allocate_buffer(size);
|
memory.primaryBuffer = ggml_vk_allocate_buffer(size);
|
||||||
vk::MemoryRequirements memoryRequirements = mgr.device()->getBufferMemoryRequirements(*memory.primaryBuffer);
|
vk::MemoryRequirements memoryRequirements = komputeManager()->device()->getBufferMemoryRequirements(*memory.primaryBuffer);
|
||||||
vk::MemoryPropertyFlags memoryPropertyFlags = vk::MemoryPropertyFlagBits::eDeviceLocal;
|
vk::MemoryPropertyFlags memoryPropertyFlags = vk::MemoryPropertyFlagBits::eDeviceLocal;
|
||||||
memory.primaryMemory = ggml_vk_allocate(size, memoryPropertyFlags, memoryRequirements, &isHostVisible);
|
memory.primaryMemory = ggml_vk_allocate(size, memoryPropertyFlags, memoryRequirements, &isHostVisible);
|
||||||
mgr.device()->bindBufferMemory(*memory.primaryBuffer, *memory.primaryMemory, 0);
|
komputeManager()->device()->bindBufferMemory(*memory.primaryBuffer, *memory.primaryMemory, 0);
|
||||||
if (isHostVisible) {
|
if (isHostVisible) {
|
||||||
vk::Result r = mgr.device()->mapMemory(*memory.primaryMemory, 0, size, vk::MemoryMapFlags(), &memory.data);
|
vk::Result r = komputeManager()->device()->mapMemory(*memory.primaryMemory, 0, size, vk::MemoryMapFlags(), &memory.data);
|
||||||
if (r != vk::Result::eSuccess)
|
if (r != vk::Result::eSuccess)
|
||||||
std::cerr << "Error mapping memory" << vk::to_string(r);
|
std::cerr << "Error mapping memory" << vk::to_string(r);
|
||||||
}
|
}
|
||||||
@ -388,13 +410,13 @@ ggml_vk_memory ggml_vk_allocate(size_t size) {
|
|||||||
|
|
||||||
if (!isHostVisible) {
|
if (!isHostVisible) {
|
||||||
memory.stagingBuffer = ggml_vk_allocate_buffer(size);
|
memory.stagingBuffer = ggml_vk_allocate_buffer(size);
|
||||||
vk::MemoryRequirements memoryRequirements = mgr.device()->getBufferMemoryRequirements(*memory.stagingBuffer);
|
vk::MemoryRequirements memoryRequirements = komputeManager()->device()->getBufferMemoryRequirements(*memory.stagingBuffer);
|
||||||
vk::MemoryPropertyFlags memoryPropertyFlags = vk::MemoryPropertyFlagBits::eHostVisible |
|
vk::MemoryPropertyFlags memoryPropertyFlags = vk::MemoryPropertyFlagBits::eHostVisible |
|
||||||
vk::MemoryPropertyFlagBits::eHostCoherent |
|
vk::MemoryPropertyFlagBits::eHostCoherent |
|
||||||
vk::MemoryPropertyFlagBits::eHostCached;
|
vk::MemoryPropertyFlagBits::eHostCached;
|
||||||
memory.stagingMemory = ggml_vk_allocate(size, memoryPropertyFlags, memoryRequirements, &isHostVisible);
|
memory.stagingMemory = ggml_vk_allocate(size, memoryPropertyFlags, memoryRequirements, &isHostVisible);
|
||||||
mgr.device()->bindBufferMemory(*memory.stagingBuffer, *memory.stagingMemory, 0);
|
komputeManager()->device()->bindBufferMemory(*memory.stagingBuffer, *memory.stagingMemory, 0);
|
||||||
vk::Result r = mgr.device()->mapMemory(*memory.stagingMemory, 0, size, vk::MemoryMapFlags(), &memory.data);
|
vk::Result r = komputeManager()->device()->mapMemory(*memory.stagingMemory, 0, size, vk::MemoryMapFlags(), &memory.data);
|
||||||
if (r != vk::Result::eSuccess)
|
if (r != vk::Result::eSuccess)
|
||||||
std::cerr << "Error mapping memory" << vk::to_string(r);
|
std::cerr << "Error mapping memory" << vk::to_string(r);
|
||||||
}
|
}
|
||||||
@ -405,19 +427,19 @@ ggml_vk_memory ggml_vk_allocate(size_t size) {
|
|||||||
|
|
||||||
void ggml_vk_free_memory(ggml_vk_memory &memory)
|
void ggml_vk_free_memory(ggml_vk_memory &memory)
|
||||||
{
|
{
|
||||||
mgr.device()->destroy(
|
komputeManager()->device()->destroy(
|
||||||
*memory.primaryBuffer,
|
*memory.primaryBuffer,
|
||||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||||
if (memory.stagingBuffer) {
|
if (memory.stagingBuffer) {
|
||||||
mgr.device()->destroy(
|
komputeManager()->device()->destroy(
|
||||||
*memory.stagingBuffer,
|
*memory.stagingBuffer,
|
||||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||||
}
|
}
|
||||||
mgr.device()->freeMemory(
|
komputeManager()->device()->freeMemory(
|
||||||
*memory.primaryMemory,
|
*memory.primaryMemory,
|
||||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||||
if (memory.stagingMemory) {
|
if (memory.stagingMemory) {
|
||||||
mgr.device()->freeMemory(
|
komputeManager()->device()->freeMemory(
|
||||||
*memory.stagingMemory,
|
*memory.stagingMemory,
|
||||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||||
}
|
}
|
||||||
@ -457,7 +479,7 @@ const std::shared_ptr<kp::Tensor> ggml_vk_get_tensor(struct ggml_kompute_context
|
|||||||
nbytes += *alignedOffset;
|
nbytes += *alignedOffset;
|
||||||
}
|
}
|
||||||
|
|
||||||
return mgr.tensor(
|
return komputeManager()->tensor(
|
||||||
t->data,
|
t->data,
|
||||||
nelements,
|
nelements,
|
||||||
nbytes, kp::Tensor::TensorDataTypes::eFloat,
|
nbytes, kp::Tensor::TensorDataTypes::eFloat,
|
||||||
@ -476,7 +498,7 @@ void ggml_vk_add_buffer(
|
|||||||
void ggml_vk_h2d_tensor(struct ggml_kompute_context * ctx, struct ggml_tensor * t) {
|
void ggml_vk_h2d_tensor(struct ggml_kompute_context * ctx, struct ggml_tensor * t) {
|
||||||
const auto res = ggml_vk_get_tensor(ctx, t, nullptr);
|
const auto res = ggml_vk_get_tensor(ctx, t, nullptr);
|
||||||
GGML_ASSERT(res);
|
GGML_ASSERT(res);
|
||||||
mgr.sequence()->eval<kp::OpTensorSyncDevice>({res});
|
komputeManager()->sequence()->eval<kp::OpTensorSyncDevice>({res});
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_vk_h2d_all(struct ggml_kompute_context * ctx) {
|
void ggml_vk_h2d_all(struct ggml_kompute_context * ctx) {
|
||||||
@ -496,7 +518,7 @@ void ggml_vk_d2h_tensor(struct ggml_kompute_context * ctx, struct ggml_tensor *
|
|||||||
const auto res = ggml_vk_get_tensor(ctx, t, nullptr);
|
const auto res = ggml_vk_get_tensor(ctx, t, nullptr);
|
||||||
|
|
||||||
GGML_ASSERT(res);
|
GGML_ASSERT(res);
|
||||||
mgr.sequence()->eval<kp::OpTensorSyncLocal>({res});
|
komputeManager()->sequence()->eval<kp::OpTensorSyncLocal>({res});
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<uint32_t> getSpirvShader(const unsigned char* rawData, size_t size) {
|
std::vector<uint32_t> getSpirvShader(const unsigned char* rawData, size_t size) {
|
||||||
@ -537,10 +559,11 @@ void ggml_vk_add(kp::Sequence& seq,
|
|||||||
safe_divide(inAOff, 4), safe_divide(inBOff, 4), safe_divide(outOff, 4)
|
safe_divide(inAOff, 4), safe_divide(inBOff, 4), safe_divide(outOff, 4)
|
||||||
};
|
};
|
||||||
|
|
||||||
static std::shared_ptr<kp::Algorithm> s_algo = nullptr;
|
std::shared_ptr<kp::Algorithm> s_algo = nullptr;
|
||||||
if (!s_algo)
|
if (!komputeManager()->hasAlgorithm(__func__))
|
||||||
s_algo = mgr.algorithm<float, PushConstants>(ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts});
|
s_algo = komputeManager()->algorithm<float, PushConstants>(__func__, ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts});
|
||||||
else {
|
else {
|
||||||
|
s_algo = komputeManager()->getAlgorithm(__func__);
|
||||||
s_algo->setTensors({inA, inB, out});
|
s_algo->setTensors({inA, inB, out});
|
||||||
s_algo->setWorkgroup({size});
|
s_algo->setWorkgroup({size});
|
||||||
s_algo->setPushConstants<PushConstants>({pushConsts});
|
s_algo->setPushConstants<PushConstants>({pushConsts});
|
||||||
@ -567,10 +590,11 @@ void ggml_vk_addrow(kp::Sequence& seq,
|
|||||||
row
|
row
|
||||||
};
|
};
|
||||||
|
|
||||||
static std::shared_ptr<kp::Algorithm> s_algo = nullptr;
|
std::shared_ptr<kp::Algorithm> s_algo = nullptr;
|
||||||
if (!s_algo)
|
if (!komputeManager()->hasAlgorithm(__func__))
|
||||||
s_algo = mgr.algorithm<float, PushConstants>(ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts});
|
s_algo = komputeManager()->algorithm<float, PushConstants>(__func__, ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts});
|
||||||
else {
|
else {
|
||||||
|
s_algo = komputeManager()->getAlgorithm(__func__);
|
||||||
s_algo->setTensors({inA, inB, out});
|
s_algo->setTensors({inA, inB, out});
|
||||||
s_algo->setWorkgroup({size});
|
s_algo->setWorkgroup({size});
|
||||||
s_algo->setPushConstants<PushConstants>({pushConsts});
|
s_algo->setPushConstants<PushConstants>({pushConsts});
|
||||||
@ -595,10 +619,11 @@ void ggml_vk_mul(kp::Sequence& seq,
|
|||||||
safe_divide(inAOff, 4), safe_divide(inBOff, 4), safe_divide(outOff, 4)
|
safe_divide(inAOff, 4), safe_divide(inBOff, 4), safe_divide(outOff, 4)
|
||||||
};
|
};
|
||||||
|
|
||||||
static std::shared_ptr<kp::Algorithm> s_algo = nullptr;
|
std::shared_ptr<kp::Algorithm> s_algo = nullptr;
|
||||||
if (!s_algo)
|
if (!komputeManager()->hasAlgorithm(__func__))
|
||||||
s_algo = mgr.algorithm<float, PushConstants>(ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts});
|
s_algo = komputeManager()->algorithm<float, PushConstants>(__func__, ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts});
|
||||||
else {
|
else {
|
||||||
|
s_algo = komputeManager()->getAlgorithm(__func__);
|
||||||
s_algo->setTensors({inA, inB, out});
|
s_algo->setTensors({inA, inB, out});
|
||||||
s_algo->setWorkgroup({size});
|
s_algo->setWorkgroup({size});
|
||||||
s_algo->setPushConstants<PushConstants>({pushConsts});
|
s_algo->setPushConstants<PushConstants>({pushConsts});
|
||||||
@ -625,10 +650,11 @@ void ggml_vk_mulrow(kp::Sequence& seq,
|
|||||||
row
|
row
|
||||||
};
|
};
|
||||||
|
|
||||||
static std::shared_ptr<kp::Algorithm> s_algo = nullptr;
|
std::shared_ptr<kp::Algorithm> s_algo = nullptr;
|
||||||
if (!s_algo)
|
if (!komputeManager()->hasAlgorithm(__func__))
|
||||||
s_algo = mgr.algorithm<float, PushConstants>(ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts});
|
s_algo = komputeManager()->algorithm<float, PushConstants>(__func__, ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts});
|
||||||
else {
|
else {
|
||||||
|
s_algo = komputeManager()->getAlgorithm(__func__);
|
||||||
s_algo->setTensors({inA, inB, out});
|
s_algo->setTensors({inA, inB, out});
|
||||||
s_algo->setWorkgroup({size});
|
s_algo->setWorkgroup({size});
|
||||||
s_algo->setPushConstants<PushConstants>({pushConsts});
|
s_algo->setPushConstants<PushConstants>({pushConsts});
|
||||||
@ -653,10 +679,11 @@ void ggml_vk_scale(kp::Sequence& seq,
|
|||||||
scale
|
scale
|
||||||
};
|
};
|
||||||
|
|
||||||
static std::shared_ptr<kp::Algorithm> s_algo = nullptr;
|
std::shared_ptr<kp::Algorithm> s_algo = nullptr;
|
||||||
if (!s_algo)
|
if (!komputeManager()->hasAlgorithm(__func__))
|
||||||
s_algo = mgr.algorithm<float, PushConstants>(ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {size}, {}, {pushConsts});
|
s_algo = komputeManager()->algorithm<float, PushConstants>(__func__, ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {size}, {}, {pushConsts});
|
||||||
else {
|
else {
|
||||||
|
s_algo = komputeManager()->getAlgorithm(__func__);
|
||||||
s_algo->setTensors({in, out});
|
s_algo->setTensors({in, out});
|
||||||
s_algo->setWorkgroup({size});
|
s_algo->setWorkgroup({size});
|
||||||
s_algo->setPushConstants<PushConstants>({pushConsts});
|
s_algo->setPushConstants<PushConstants>({pushConsts});
|
||||||
@ -676,10 +703,11 @@ void ggml_vk_xxlu(const std::vector<uint32_t>& spirv, kp::Sequence& seq,
|
|||||||
safe_divide(inOff, 4), safe_divide(outOff, 4),
|
safe_divide(inOff, 4), safe_divide(outOff, 4),
|
||||||
};
|
};
|
||||||
|
|
||||||
static std::shared_ptr<kp::Algorithm> s_algo = nullptr;
|
std::shared_ptr<kp::Algorithm> s_algo = nullptr;
|
||||||
if (!s_algo)
|
if (!komputeManager()->hasAlgorithm(__func__))
|
||||||
s_algo = mgr.algorithm<float, PushConstants>(ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {size}, {}, {pushConsts});
|
s_algo = komputeManager()->algorithm<float, PushConstants>(__func__, ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {size}, {}, {pushConsts});
|
||||||
else {
|
else {
|
||||||
|
s_algo = komputeManager()->getAlgorithm(__func__);
|
||||||
s_algo->setTensors({in, out});
|
s_algo->setTensors({in, out});
|
||||||
s_algo->setWorkgroup({size});
|
s_algo->setWorkgroup({size});
|
||||||
s_algo->setPushConstants<PushConstants>({pushConsts});
|
s_algo->setPushConstants<PushConstants>({pushConsts});
|
||||||
@ -729,10 +757,11 @@ void ggml_vk_soft_max(kp::Sequence& seq,
|
|||||||
ne00, ne01, ne02
|
ne00, ne01, ne02
|
||||||
};
|
};
|
||||||
|
|
||||||
static std::shared_ptr<kp::Algorithm> s_algo = nullptr;
|
std::shared_ptr<kp::Algorithm> s_algo = nullptr;
|
||||||
if (!s_algo)
|
if (!komputeManager()->hasAlgorithm(__func__))
|
||||||
s_algo = mgr.algorithm<float, PushConstants>(ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {unsigned(ne01), unsigned(ne02), unsigned(ne03)}, {}, {pushConsts});
|
s_algo = komputeManager()->algorithm<float, PushConstants>(__func__, ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {unsigned(ne01), unsigned(ne02), unsigned(ne03)}, {}, {pushConsts});
|
||||||
else {
|
else {
|
||||||
|
s_algo = komputeManager()->getAlgorithm(__func__);
|
||||||
s_algo->setTensors({in, out});
|
s_algo->setTensors({in, out});
|
||||||
s_algo->setWorkgroup({unsigned(ne01), unsigned(ne02), unsigned(ne03)});
|
s_algo->setWorkgroup({unsigned(ne01), unsigned(ne02), unsigned(ne03)});
|
||||||
s_algo->setPushConstants<PushConstants>({pushConsts});
|
s_algo->setPushConstants<PushConstants>({pushConsts});
|
||||||
@ -761,10 +790,11 @@ void ggml_vk_norm_(const std::vector<uint32_t>& spirv, kp::Sequence& seq,
|
|||||||
(uint32_t)ne00, (uint32_t)nb01, epsilon
|
(uint32_t)ne00, (uint32_t)nb01, epsilon
|
||||||
};
|
};
|
||||||
|
|
||||||
static std::shared_ptr<kp::Algorithm> s_algo = nullptr;
|
std::shared_ptr<kp::Algorithm> s_algo = nullptr;
|
||||||
if (!s_algo)
|
if (!komputeManager()->hasAlgorithm(__func__))
|
||||||
s_algo = mgr.algorithm<float, PushConstants>(ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {(uint32_t)nrows}, {}, {pushConsts});
|
s_algo = komputeManager()->algorithm<float, PushConstants>(__func__, ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {(uint32_t)nrows}, {}, {pushConsts});
|
||||||
else {
|
else {
|
||||||
|
s_algo = komputeManager()->getAlgorithm(__func__);
|
||||||
s_algo->setTensors({in, out});
|
s_algo->setTensors({in, out});
|
||||||
s_algo->setWorkgroup({(uint32_t)nrows});
|
s_algo->setWorkgroup({(uint32_t)nrows});
|
||||||
s_algo->setPushConstants<PushConstants>({pushConsts});
|
s_algo->setPushConstants<PushConstants>({pushConsts});
|
||||||
@ -808,10 +838,11 @@ void ggml_vk_diag_mask_inf(kp::Sequence& seq,
|
|||||||
ne00, ne01
|
ne00, ne01
|
||||||
};
|
};
|
||||||
|
|
||||||
static std::shared_ptr<kp::Algorithm> s_algo = nullptr;
|
std::shared_ptr<kp::Algorithm> s_algo = nullptr;
|
||||||
if (!s_algo)
|
if (!komputeManager()->hasAlgorithm(__func__))
|
||||||
s_algo = mgr.algorithm<float, PushConstants>(ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {unsigned(ne00), unsigned(ne01), unsigned(ne02)}, {}, {pushConsts});
|
s_algo = komputeManager()->algorithm<float, PushConstants>(__func__, ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {unsigned(ne00), unsigned(ne01), unsigned(ne02)}, {}, {pushConsts});
|
||||||
else {
|
else {
|
||||||
|
s_algo = komputeManager()->getAlgorithm(__func__);
|
||||||
s_algo->setTensors({in, out});
|
s_algo->setTensors({in, out});
|
||||||
s_algo->setWorkgroup({unsigned(ne00), unsigned(ne01), unsigned(ne02)});
|
s_algo->setWorkgroup({unsigned(ne00), unsigned(ne01), unsigned(ne02)});
|
||||||
s_algo->setPushConstants<PushConstants>({pushConsts});
|
s_algo->setPushConstants<PushConstants>({pushConsts});
|
||||||
@ -844,10 +875,11 @@ void ggml_vk_mul_mat_f16(kp::Sequence& seq,
|
|||||||
ne00, nb01, nb02, nb11, nb12, ne0, ne1,
|
ne00, nb01, nb02, nb11, nb12, ne0, ne1,
|
||||||
};
|
};
|
||||||
|
|
||||||
static std::shared_ptr<kp::Algorithm> s_algo = nullptr;
|
std::shared_ptr<kp::Algorithm> s_algo = nullptr;
|
||||||
if (!s_algo)
|
if (!komputeManager()->hasAlgorithm(__func__))
|
||||||
s_algo = mgr.algorithm<float, PushConstants>(ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {unsigned(ne01), unsigned(ne11), unsigned(ne12)}, {}, {pushConsts});
|
s_algo = komputeManager()->algorithm<float, PushConstants>(__func__, ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {unsigned(ne01), unsigned(ne11), unsigned(ne12)}, {}, {pushConsts});
|
||||||
else {
|
else {
|
||||||
|
s_algo = komputeManager()->getAlgorithm(__func__);
|
||||||
s_algo->setTensors({inA, inB, out});
|
s_algo->setTensors({inA, inB, out});
|
||||||
s_algo->setWorkgroup({unsigned(ne01), unsigned(ne11), unsigned(ne12)});
|
s_algo->setWorkgroup({unsigned(ne01), unsigned(ne11), unsigned(ne12)});
|
||||||
s_algo->setPushConstants<PushConstants>({pushConsts});
|
s_algo->setPushConstants<PushConstants>({pushConsts});
|
||||||
@ -871,10 +903,11 @@ void ggml_vk_mul_mat_q4_x(const std::vector<uint32_t>& spirv, uint32_t block_siz
|
|||||||
ne00, ne10, ne0,
|
ne00, ne10, ne0,
|
||||||
};
|
};
|
||||||
|
|
||||||
static std::shared_ptr<kp::Algorithm> s_algo = nullptr;
|
std::shared_ptr<kp::Algorithm> s_algo = nullptr;
|
||||||
if (!s_algo)
|
if (!komputeManager()->hasAlgorithm(__func__))
|
||||||
s_algo = mgr.algorithm<float, PushConstants>(ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {unsigned(ne01), unsigned(ne11)}, {}, {pushConsts});
|
s_algo = komputeManager()->algorithm<float, PushConstants>(__func__, ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {unsigned(ne01), unsigned(ne11)}, {}, {pushConsts});
|
||||||
else {
|
else {
|
||||||
|
s_algo = komputeManager()->getAlgorithm(__func__);
|
||||||
s_algo->setTensors({inA, inB, out});
|
s_algo->setTensors({inA, inB, out});
|
||||||
s_algo->setWorkgroup({unsigned(ne01), unsigned(ne11)});
|
s_algo->setWorkgroup({unsigned(ne01), unsigned(ne11)});
|
||||||
s_algo->setPushConstants<PushConstants>({pushConsts});
|
s_algo->setPushConstants<PushConstants>({pushConsts});
|
||||||
@ -921,10 +954,11 @@ void ggml_vk_get_rows(const std::vector<uint32_t>& spirv,
|
|||||||
ne00, nb01, nb1
|
ne00, nb01, nb1
|
||||||
};
|
};
|
||||||
|
|
||||||
static std::shared_ptr<kp::Algorithm> s_algo = nullptr;
|
std::shared_ptr<kp::Algorithm> s_algo = nullptr;
|
||||||
if (!s_algo)
|
if (!komputeManager()->hasAlgorithm(__func__))
|
||||||
s_algo = mgr.algorithm<float, PushConstants>(ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts});
|
s_algo = komputeManager()->algorithm<float, PushConstants>(__func__, ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts});
|
||||||
else {
|
else {
|
||||||
|
s_algo = komputeManager()->getAlgorithm(__func__);
|
||||||
s_algo->setTensors({inA, inB, out});
|
s_algo->setTensors({inA, inB, out});
|
||||||
s_algo->setWorkgroup({size});
|
s_algo->setWorkgroup({size});
|
||||||
s_algo->setPushConstants<PushConstants>({pushConsts});
|
s_algo->setPushConstants<PushConstants>({pushConsts});
|
||||||
@ -996,10 +1030,11 @@ void ggml_vk_rope(kp::Sequence& seq,
|
|||||||
nb0, nb1, nb2, nb3
|
nb0, nb1, nb2, nb3
|
||||||
};
|
};
|
||||||
|
|
||||||
static std::shared_ptr<kp::Algorithm> s_algo = nullptr;
|
std::shared_ptr<kp::Algorithm> s_algo = nullptr;
|
||||||
if (!s_algo)
|
if (!komputeManager()->hasAlgorithm(__func__))
|
||||||
s_algo = mgr.algorithm<float, PushConstants>(ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {unsigned(ne01), unsigned(ne02), unsigned(ne03)}, {}, {pushConsts});
|
s_algo = komputeManager()->algorithm<float, PushConstants>(__func__, ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {unsigned(ne01), unsigned(ne02), unsigned(ne03)}, {}, {pushConsts});
|
||||||
else {
|
else {
|
||||||
|
s_algo = komputeManager()->getAlgorithm(__func__);
|
||||||
s_algo->setTensors({in, out});
|
s_algo->setTensors({in, out});
|
||||||
s_algo->setWorkgroup({unsigned(ne01), unsigned(ne02), unsigned(ne03)});
|
s_algo->setWorkgroup({unsigned(ne01), unsigned(ne02), unsigned(ne03)});
|
||||||
s_algo->setPushConstants<PushConstants>({pushConsts});
|
s_algo->setPushConstants<PushConstants>({pushConsts});
|
||||||
@ -1032,10 +1067,14 @@ void ggml_vk_cpy(const std::vector<uint32_t>& spirv,
|
|||||||
nb0, nb1, nb2, nb3
|
nb0, nb1, nb2, nb3
|
||||||
};
|
};
|
||||||
|
|
||||||
static std::shared_ptr<kp::Algorithm> s_algo = nullptr;
|
static std::string unique_name = std::string(__func__) +
|
||||||
if (!s_algo)
|
"_i_" + std::to_string(in_element_size) +
|
||||||
s_algo = mgr.algorithm<float, PushConstants>(ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {unsigned(ne01), unsigned(ne02), unsigned(ne03)}, {}, {pushConsts});
|
"_o_" + std::to_string(out_element_size);
|
||||||
|
std::shared_ptr<kp::Algorithm> s_algo = nullptr;
|
||||||
|
if (!komputeManager()->hasAlgorithm(unique_name))
|
||||||
|
s_algo = komputeManager()->algorithm<float, PushConstants>(unique_name, ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {unsigned(ne01), unsigned(ne02), unsigned(ne03)}, {}, {pushConsts});
|
||||||
else {
|
else {
|
||||||
|
s_algo = komputeManager()->getAlgorithm(unique_name);
|
||||||
s_algo->setTensors({in, out});
|
s_algo->setTensors({in, out});
|
||||||
s_algo->setWorkgroup({unsigned(ne01), unsigned(ne02), unsigned(ne03)});
|
s_algo->setWorkgroup({unsigned(ne01), unsigned(ne02), unsigned(ne03)});
|
||||||
s_algo->setPushConstants<PushConstants>({pushConsts});
|
s_algo->setPushConstants<PushConstants>({pushConsts});
|
||||||
@ -1082,7 +1121,7 @@ void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml_cgraph
|
|||||||
std::vector<std::shared_ptr<kp::Sequence>> sequences(n_seq);
|
std::vector<std::shared_ptr<kp::Sequence>> sequences(n_seq);
|
||||||
|
|
||||||
for (auto& sequence : sequences) {
|
for (auto& sequence : sequences) {
|
||||||
sequence = mgr.sequence();
|
sequence = komputeManager()->sequence();
|
||||||
}
|
}
|
||||||
for (int seq_idx = 0; seq_idx < n_seq; ++seq_idx) {
|
for (int seq_idx = 0; seq_idx < n_seq; ++seq_idx) {
|
||||||
const int n_nodes_per_seq = (gf->n_nodes + n_seq - 1) / n_seq;
|
const int n_nodes_per_seq = (gf->n_nodes + n_seq - 1) / n_seq;
|
||||||
|
@ -40,6 +40,7 @@ std::vector<ggml_vk_device> ggml_vk_available_devices(size_t memoryRequired);
|
|||||||
bool ggml_vk_init_device(size_t memoryRequired, const std::string &device);
|
bool ggml_vk_init_device(size_t memoryRequired, const std::string &device);
|
||||||
bool ggml_vk_init_device(const ggml_vk_device &device);
|
bool ggml_vk_init_device(const ggml_vk_device &device);
|
||||||
bool ggml_vk_init_device(int device);
|
bool ggml_vk_init_device(int device);
|
||||||
|
bool ggml_vk_free_device();
|
||||||
bool ggml_vk_has_vulkan();
|
bool ggml_vk_has_vulkan();
|
||||||
bool ggml_vk_has_device();
|
bool ggml_vk_has_device();
|
||||||
ggml_vk_device ggml_vk_current_device();
|
ggml_vk_device ggml_vk_current_device();
|
||||||
|
@ -58,18 +58,6 @@ Algorithm::destroy()
|
|||||||
this->mPipeline = nullptr;
|
this->mPipeline = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this->mFreePipelineCache && this->mPipelineCache) {
|
|
||||||
KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline cache");
|
|
||||||
if (!this->mPipelineCache) {
|
|
||||||
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
|
|
||||||
"pipeline cache but it is null");
|
|
||||||
}
|
|
||||||
this->mDevice->destroy(
|
|
||||||
*this->mPipelineCache,
|
|
||||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
|
||||||
this->mPipelineCache = nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (this->mFreePipelineLayout && this->mPipelineLayout) {
|
if (this->mFreePipelineLayout && this->mPipelineLayout) {
|
||||||
KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline layout");
|
KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline layout");
|
||||||
if (!this->mPipelineLayout) {
|
if (!this->mPipelineLayout) {
|
||||||
@ -317,16 +305,6 @@ Algorithm::createPipeline()
|
|||||||
"main",
|
"main",
|
||||||
&specializationInfo);
|
&specializationInfo);
|
||||||
|
|
||||||
static std::shared_ptr<vk::PipelineCache> globalPipelineCache = std::make_shared<vk::PipelineCache>();
|
|
||||||
if(!*globalPipelineCache) {
|
|
||||||
vk::PipelineCacheCreateInfo pipelineCacheInfo =
|
|
||||||
vk::PipelineCacheCreateInfo();
|
|
||||||
this->mPipelineCache = globalPipelineCache;
|
|
||||||
this->mFreePipelineCache = true;
|
|
||||||
this->mDevice->createPipelineCache(
|
|
||||||
&pipelineCacheInfo, nullptr, globalPipelineCache.get());
|
|
||||||
}
|
|
||||||
|
|
||||||
vk::ComputePipelineCreateInfo pipelineInfo(vk::PipelineCreateFlags(),
|
vk::ComputePipelineCreateInfo pipelineInfo(vk::PipelineCreateFlags(),
|
||||||
shaderStage,
|
shaderStage,
|
||||||
*this->mPipelineLayout,
|
*this->mPipelineLayout,
|
||||||
@ -335,7 +313,7 @@ Algorithm::createPipeline()
|
|||||||
|
|
||||||
#ifdef KOMPUTE_CREATE_PIPELINE_RESULT_VALUE
|
#ifdef KOMPUTE_CREATE_PIPELINE_RESULT_VALUE
|
||||||
vk::ResultValue<vk::Pipeline> pipelineResult =
|
vk::ResultValue<vk::Pipeline> pipelineResult =
|
||||||
this->mDevice->createComputePipeline(*globalPipelineCache, pipelineInfo);
|
this->mDevice->createComputePipeline(*mPipelineCache, pipelineInfo);
|
||||||
|
|
||||||
if (pipelineResult.result != vk::Result::eSuccess) {
|
if (pipelineResult.result != vk::Result::eSuccess) {
|
||||||
throw std::runtime_error("Failed to create pipeline result: " +
|
throw std::runtime_error("Failed to create pipeline result: " +
|
||||||
@ -347,7 +325,7 @@ Algorithm::createPipeline()
|
|||||||
this->mFreePipeline = true;
|
this->mFreePipeline = true;
|
||||||
#else
|
#else
|
||||||
vk::Pipeline pipeline =
|
vk::Pipeline pipeline =
|
||||||
this->mDevice->createComputePipeline(*globalPipelineCache, pipelineInfo)
|
this->mDevice->createComputePipeline(*mPipelineCache, pipelineInfo)
|
||||||
.value;
|
.value;
|
||||||
this->mPipeline = std::make_shared<vk::Pipeline>(pipeline);
|
this->mPipeline = std::make_shared<vk::Pipeline>(pipeline);
|
||||||
this->mFreePipeline = true;
|
this->mFreePipeline = true;
|
||||||
|
@ -88,15 +88,14 @@ Manager::destroy()
|
|||||||
this->mManagedSequences.clear();
|
this->mManagedSequences.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this->mManageResources && this->mManagedAlgorithms.size()) {
|
if (this->mManageResources && !this->mManagedAlgorithmsMap.empty()) {
|
||||||
KP_LOG_DEBUG("Kompute Manager explicitly freeing algorithms");
|
KP_LOG_DEBUG("Kompute Manager explicitly freeing algorithms");
|
||||||
for (const std::weak_ptr<Algorithm>& weakAlgorithm :
|
for (const auto& kv : this->mManagedAlgorithmsMap) {
|
||||||
this->mManagedAlgorithms) {
|
if (std::shared_ptr<Algorithm> algorithm = kv.second) {
|
||||||
if (std::shared_ptr<Algorithm> algorithm = weakAlgorithm.lock()) {
|
|
||||||
algorithm->destroy();
|
algorithm->destroy();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
this->mManagedAlgorithms.clear();
|
this->mManagedAlgorithmsMap.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this->mManageResources && this->mManagedTensors.size()) {
|
if (this->mManageResources && this->mManagedTensors.size()) {
|
||||||
@ -109,6 +108,18 @@ Manager::destroy()
|
|||||||
this->mManagedTensors.clear();
|
this->mManagedTensors.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (this->mPipelineCache) {
|
||||||
|
KP_LOG_DEBUG("Kompute Manager Destroying pipeline cache");
|
||||||
|
if (!this->mPipelineCache) {
|
||||||
|
KP_LOG_WARN("Kompute Manager Error requested to destroy "
|
||||||
|
"pipeline cache but it is null");
|
||||||
|
}
|
||||||
|
this->mDevice->destroy(
|
||||||
|
*this->mPipelineCache,
|
||||||
|
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||||
|
this->mPipelineCache = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
if (this->mFreeDevice) {
|
if (this->mFreeDevice) {
|
||||||
KP_LOG_INFO("Destroying device");
|
KP_LOG_INFO("Destroying device");
|
||||||
this->mDevice->destroy(
|
this->mDevice->destroy(
|
||||||
@ -269,12 +280,14 @@ Manager::clear()
|
|||||||
end(this->mManagedTensors),
|
end(this->mManagedTensors),
|
||||||
[](std::weak_ptr<Tensor> t) { return t.expired(); }),
|
[](std::weak_ptr<Tensor> t) { return t.expired(); }),
|
||||||
end(this->mManagedTensors));
|
end(this->mManagedTensors));
|
||||||
this->mManagedAlgorithms.erase(
|
for (auto it = this->mManagedAlgorithmsMap.begin();
|
||||||
std::remove_if(
|
it != this->mManagedAlgorithmsMap.end();) {
|
||||||
begin(this->mManagedAlgorithms),
|
if (it->second) {
|
||||||
end(this->mManagedAlgorithms),
|
it = this->mManagedAlgorithmsMap.erase(it);
|
||||||
[](std::weak_ptr<Algorithm> t) { return t.expired(); }),
|
} else {
|
||||||
end(this->mManagedAlgorithms));
|
++it;
|
||||||
|
}
|
||||||
|
}
|
||||||
this->mManagedSequences.erase(
|
this->mManagedSequences.erase(
|
||||||
std::remove_if(begin(this->mManagedSequences),
|
std::remove_if(begin(this->mManagedSequences),
|
||||||
end(this->mManagedSequences),
|
end(this->mManagedSequences),
|
||||||
@ -452,6 +465,12 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices,
|
|||||||
}
|
}
|
||||||
|
|
||||||
KP_LOG_DEBUG("Kompute Manager compute queue obtained");
|
KP_LOG_DEBUG("Kompute Manager compute queue obtained");
|
||||||
|
|
||||||
|
mPipelineCache = std::make_shared<vk::PipelineCache>();
|
||||||
|
vk::PipelineCacheCreateInfo pipelineCacheInfo =
|
||||||
|
vk::PipelineCacheCreateInfo();
|
||||||
|
this->mDevice->createPipelineCache(
|
||||||
|
&pipelineCacheInfo, nullptr, mPipelineCache.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<Sequence>
|
std::shared_ptr<Sequence>
|
||||||
|
@ -45,6 +45,7 @@ class Algorithm
|
|||||||
*/
|
*/
|
||||||
template<typename S = float, typename P = float>
|
template<typename S = float, typename P = float>
|
||||||
Algorithm(std::shared_ptr<vk::Device> device,
|
Algorithm(std::shared_ptr<vk::Device> device,
|
||||||
|
vk::PipelineCache *pipelineCache,
|
||||||
vk::DescriptorPool *pool,
|
vk::DescriptorPool *pool,
|
||||||
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
|
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
|
||||||
const std::vector<uint32_t>& spirv = {},
|
const std::vector<uint32_t>& spirv = {},
|
||||||
@ -55,6 +56,7 @@ class Algorithm
|
|||||||
KP_LOG_DEBUG("Kompute Algorithm Constructor with device");
|
KP_LOG_DEBUG("Kompute Algorithm Constructor with device");
|
||||||
|
|
||||||
this->mDevice = device;
|
this->mDevice = device;
|
||||||
|
this->mPipelineCache = pipelineCache;
|
||||||
this->mDescriptorPool = pool;
|
this->mDescriptorPool = pool;
|
||||||
|
|
||||||
if (tensors.size() && spirv.size()) {
|
if (tensors.size() && spirv.size()) {
|
||||||
@ -310,8 +312,7 @@ class Algorithm
|
|||||||
bool mFreeShaderModule = false;
|
bool mFreeShaderModule = false;
|
||||||
std::shared_ptr<vk::PipelineLayout> mPipelineLayout;
|
std::shared_ptr<vk::PipelineLayout> mPipelineLayout;
|
||||||
bool mFreePipelineLayout = false;
|
bool mFreePipelineLayout = false;
|
||||||
std::shared_ptr<vk::PipelineCache> mPipelineCache;
|
vk::PipelineCache *mPipelineCache = nullptr;
|
||||||
bool mFreePipelineCache = false;
|
|
||||||
std::shared_ptr<vk::Pipeline> mPipeline;
|
std::shared_ptr<vk::Pipeline> mPipeline;
|
||||||
bool mFreePipeline = false;
|
bool mFreePipeline = false;
|
||||||
|
|
||||||
|
@ -39,6 +39,10 @@ class Manager
|
|||||||
*/
|
*/
|
||||||
~Manager();
|
~Manager();
|
||||||
|
|
||||||
|
bool hasInstance() const {
|
||||||
|
return this->mInstance.get();
|
||||||
|
}
|
||||||
|
|
||||||
bool hasDevice() const {
|
bool hasDevice() const {
|
||||||
return this->mDevice.get();
|
return this->mDevice.get();
|
||||||
}
|
}
|
||||||
@ -149,6 +153,7 @@ class Manager
|
|||||||
* @returns Shared pointer with initialised algorithm
|
* @returns Shared pointer with initialised algorithm
|
||||||
*/
|
*/
|
||||||
std::shared_ptr<Algorithm> algorithm(
|
std::shared_ptr<Algorithm> algorithm(
|
||||||
|
const std::string &name,
|
||||||
vk::DescriptorPool *pool,
|
vk::DescriptorPool *pool,
|
||||||
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
|
const std::vector<std::shared_ptr<Tensor>>& tensors = {},
|
||||||
const std::vector<uint32_t>& spirv = {},
|
const std::vector<uint32_t>& spirv = {},
|
||||||
@ -157,7 +162,7 @@ class Manager
|
|||||||
const std::vector<float>& pushConstants = {})
|
const std::vector<float>& pushConstants = {})
|
||||||
{
|
{
|
||||||
return this->algorithm<>(
|
return this->algorithm<>(
|
||||||
pool, tensors, spirv, workgroup, specializationConstants, pushConstants);
|
name, pool, tensors, spirv, workgroup, specializationConstants, pushConstants);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -176,6 +181,7 @@ class Manager
|
|||||||
*/
|
*/
|
||||||
template<typename S = float, typename P = float>
|
template<typename S = float, typename P = float>
|
||||||
std::shared_ptr<Algorithm> algorithm(
|
std::shared_ptr<Algorithm> algorithm(
|
||||||
|
const std::string &name,
|
||||||
vk::DescriptorPool *pool,
|
vk::DescriptorPool *pool,
|
||||||
const std::vector<std::shared_ptr<Tensor>>& tensors,
|
const std::vector<std::shared_ptr<Tensor>>& tensors,
|
||||||
const std::vector<uint32_t>& spirv,
|
const std::vector<uint32_t>& spirv,
|
||||||
@ -188,6 +194,7 @@ class Manager
|
|||||||
|
|
||||||
std::shared_ptr<Algorithm> algorithm{ new kp::Algorithm(
|
std::shared_ptr<Algorithm> algorithm{ new kp::Algorithm(
|
||||||
this->mDevice,
|
this->mDevice,
|
||||||
|
mPipelineCache.get(),
|
||||||
pool,
|
pool,
|
||||||
tensors,
|
tensors,
|
||||||
spirv,
|
spirv,
|
||||||
@ -196,12 +203,24 @@ class Manager
|
|||||||
pushConstants) };
|
pushConstants) };
|
||||||
|
|
||||||
if (this->mManageResources) {
|
if (this->mManageResources) {
|
||||||
this->mManagedAlgorithms.push_back(algorithm);
|
this->mManagedAlgorithmsMap.insert({name, algorithm});
|
||||||
}
|
}
|
||||||
|
|
||||||
return algorithm;
|
return algorithm;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool hasAlgorithm(const std::string &name) const {
|
||||||
|
return mManagedAlgorithmsMap.find(name) != mManagedAlgorithmsMap.end();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::shared_ptr<Algorithm> getAlgorithm(const std::string &name) const {
|
||||||
|
auto it = mManagedAlgorithmsMap.find(name);
|
||||||
|
if (it != mManagedAlgorithmsMap.end()) {
|
||||||
|
return it->second;
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Destroy the GPU resources and all managed resources by manager.
|
* Destroy the GPU resources and all managed resources by manager.
|
||||||
**/
|
**/
|
||||||
@ -237,6 +256,7 @@ class Manager
|
|||||||
|
|
||||||
std::shared_ptr<vk::Device> device() const { return mDevice; }
|
std::shared_ptr<vk::Device> device() const { return mDevice; }
|
||||||
std::shared_ptr<vk::PhysicalDevice> physicalDevice() const { return mPhysicalDevice; }
|
std::shared_ptr<vk::PhysicalDevice> physicalDevice() const { return mPhysicalDevice; }
|
||||||
|
std::shared_ptr<vk::PipelineCache> pipelineCache() const { return mPipelineCache; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// -------------- OPTIONALLY OWNED RESOURCES
|
// -------------- OPTIONALLY OWNED RESOURCES
|
||||||
@ -250,10 +270,11 @@ class Manager
|
|||||||
// -------------- ALWAYS OWNED RESOURCES
|
// -------------- ALWAYS OWNED RESOURCES
|
||||||
std::vector<std::weak_ptr<Tensor>> mManagedTensors;
|
std::vector<std::weak_ptr<Tensor>> mManagedTensors;
|
||||||
std::vector<std::weak_ptr<Sequence>> mManagedSequences;
|
std::vector<std::weak_ptr<Sequence>> mManagedSequences;
|
||||||
std::vector<std::weak_ptr<Algorithm>> mManagedAlgorithms;
|
std::unordered_map<std::string, std::shared_ptr<Algorithm>> mManagedAlgorithmsMap;
|
||||||
|
|
||||||
std::vector<uint32_t> mComputeQueueFamilyIndices;
|
std::vector<uint32_t> mComputeQueueFamilyIndices;
|
||||||
std::vector<std::shared_ptr<vk::Queue>> mComputeQueues;
|
std::vector<std::shared_ptr<vk::Queue>> mComputeQueues;
|
||||||
|
std::shared_ptr<vk::PipelineCache> mPipelineCache;
|
||||||
|
|
||||||
bool mManageResources = false;
|
bool mManageResources = false;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user