diff --git a/ggml-vulkan.cpp b/ggml-vulkan.cpp
index 055b1124d..89de70fa4 100644
--- a/ggml-vulkan.cpp
+++ b/ggml-vulkan.cpp
@@ -65,9 +65,21 @@ struct ggml_kompute_context {
     }
 };
 
+// FIXME: It would be good to consolidate the kompute manager and the kompute context into one object
+// and consolidate the init functions and simplify object lifetime management. As it currently stands,
+// we *have* to have the kompute manager no matter what for device discovery, but the kompute context
+// is only created when a device is set and vulkan is explicitly turned on.
 ggml_kompute_context *ggml_kompute_context::instance;
-
-kp::Manager mgr;
+kp::Manager *komputeManager() {
+    static kp::Manager *s_mgr = nullptr;
+    if (s_mgr && !s_mgr->hasInstance()) {
+        delete s_mgr;
+        s_mgr = nullptr;
+    }
+    if (!s_mgr)
+        s_mgr = new kp::Manager;
+    return s_mgr;
+}
 
 #ifdef __linux__
 __attribute__((constructor))
@@ -123,12 +135,11 @@ static std::string ggml_vk_getVendorName(uint32_t vendorID) {
 }
 
 std::vector<ggml_vk_device> ggml_vk_available_devices(size_t memoryRequired) {
-
     std::vector<ggml_vk_device> results;
-    if (!mgr.hasVulkan())
+    if (!komputeManager()->hasVulkan())
         return results;
 
-    std::vector<vk::PhysicalDevice> physicalDevices = mgr.listDevices();
+    std::vector<vk::PhysicalDevice> physicalDevices = komputeManager()->listDevices();
     uint32_t deviceCount = physicalDevices.size();
 
     if (deviceCount == 0)
@@ -228,22 +239,33 @@ bool ggml_vk_init_device(const ggml_vk_device &device) {
 }
 
 bool ggml_vk_init_device(int device) {
-    mgr.initializeDevice(device, {},
+    komputeManager()->initializeDevice(device, {},
                          {"VK_KHR_shader_float16_int8", "VK_KHR_8bit_storage",
                           "VK_KHR_16bit_storage", "VK_KHR_storage_buffer_storage_class"});
     return ggml_vk_has_device();
 }
 
+bool ggml_vk_free_device() {
+    if (!ggml_vk_has_device())
+        return false;
+    komputeManager()->destroy();
+    return true;
+}
+
+bool ggml_vk_has_vulkan() {
+    return komputeManager()->hasVulkan();
+}
+
 bool ggml_vk_has_device() {
-    return mgr.hasDevice();
+    return komputeManager()->hasDevice();
 }
 
 ggml_vk_device ggml_vk_current_device() {
-    if (!mgr.hasDevice())
+    if (!komputeManager()->hasDevice())
         return ggml_vk_device();
 
     std::vector<ggml_vk_device> devices = ggml_vk_available_devices(0);
-    ggml_vk_filterByName(devices, mgr.physicalDevice()->getProperties().deviceName);
+    ggml_vk_filterByName(devices, komputeManager()->physicalDevice()->getProperties().deviceName);
     return devices.front();
 }
 
@@ -275,7 +297,7 @@ void ggml_vk_allocate_descriptor_pool(struct ggml_kompute_context * ctx, size_t
       descriptorPoolSizes.data());
 
     ctx->pool = std::make_shared<vk::DescriptorPool>();
-    vk::Result r = mgr.device()->createDescriptorPool(
+    vk::Result r = komputeManager()->device()->createDescriptorPool(
       &descriptorPoolInfo, nullptr, ctx->pool.get());
     if (r != vk::Result::eSuccess)
         std::cerr << "Error allocating descriptor pool" << vk::to_string(r);
@@ -284,7 +306,7 @@ void ggml_vk_allocate_descriptor_pool(struct ggml_kompute_context * ctx, size_t
 static
 void ggml_vk_free_descriptor_pool(struct ggml_kompute_context * ctx) {
     if (ctx->pool) {
-        mgr.device()->destroy(
+        komputeManager()->device()->destroy(
           *ctx->pool,
           (vk::Optional<const vk::AllocationCallbacks>)nullptr);
         ctx->pool = nullptr;
@@ -301,7 +323,7 @@ vk::Buffer *ggml_vk_allocate_buffer(size_t size) {
     bufferCreateInfo.sharingMode = vk::SharingMode::eExclusive;
 
     vk::Buffer *vkBuffer = new vk::Buffer;
-    vk::Result r = mgr.device()->createBuffer(&bufferCreateInfo, nullptr, vkBuffer);
+    vk::Result r = komputeManager()->device()->createBuffer(&bufferCreateInfo, nullptr, vkBuffer);
     if (r != vk::Result::eSuccess)
         std::cerr << "Error allocating buffer" << vk::to_string(r);
     return vkBuffer;
@@ -312,7 +334,7 @@ vk::DeviceMemory *ggml_vk_allocate(size_t size, vk::MemoryPropertyFlags flags, v
 
     uint32_t memoryTypeIndex = -1;
     bool memoryTypeIndexFound = false;
-    vk::PhysicalDeviceMemoryProperties memoryProperties = mgr.physicalDevice()->getMemoryProperties();
+    vk::PhysicalDeviceMemoryProperties memoryProperties = komputeManager()->physicalDevice()->getMemoryProperties();
     for (uint32_t i = 0; i < memoryProperties.memoryTypeCount; i++) {
         if (requirements.memoryTypeBits & (1 << i)) {
             if (((memoryProperties.memoryTypes[i]).propertyFlags &
@@ -335,7 +357,7 @@ vk::DeviceMemory *ggml_vk_allocate(size_t size, vk::MemoryPropertyFlags flags, v
     allocInfo.allocationSize = size;
     allocInfo.memoryTypeIndex = memoryTypeIndex;
     vk::DeviceMemory *vkDeviceMemory =  new vk::DeviceMemory;
-    vk::Result r = mgr.device()->allocateMemory(&allocInfo, nullptr, vkDeviceMemory);
+    vk::Result r = komputeManager()->device()->allocateMemory(&allocInfo, nullptr, vkDeviceMemory);
     if (r != vk::Result::eSuccess)
         std::cerr << "Error allocating memory" << vk::to_string(r);
     return vkDeviceMemory;
@@ -346,7 +368,7 @@ size_t ggml_vk_aligned_offset(size_t offset) {
     static size_t minStorageBufferOffsetAlignment = 0;
     if (minStorageBufferOffsetAlignment == 0) {
         vk::PhysicalDeviceProperties deviceProperties;
-        deviceProperties = mgr.physicalDevice()->getProperties();
+        deviceProperties = komputeManager()->physicalDevice()->getProperties();
         vk::PhysicalDeviceLimits deviceLimits = deviceProperties.limits;
         minStorageBufferOffsetAlignment = deviceLimits.minStorageBufferOffsetAlignment;
     }
@@ -362,12 +384,12 @@ size_t ggml_vk_aligned_offset(size_t offset) {
 
 static void ggml_vk_h2d_buffer(const ggml_vk_memory &memory) {
     if (memory.stagingBuffer)
-        mgr.sequence()->eval<kp::OpBufferSyncDevice>(memory.primaryBuffer, memory.stagingBuffer, memory.size);
+        komputeManager()->sequence()->eval<kp::OpBufferSyncDevice>(memory.primaryBuffer, memory.stagingBuffer, memory.size);
 }
 
 static void ggml_vk_d2h_buffer(const ggml_vk_memory &memory) {
     if (memory.stagingBuffer)
-        mgr.sequence()->eval<kp::OpBufferSyncLocal>(memory.primaryBuffer, memory.stagingBuffer, memory.size);
+        komputeManager()->sequence()->eval<kp::OpBufferSyncLocal>(memory.primaryBuffer, memory.stagingBuffer, memory.size);
 }
 
 ggml_vk_memory ggml_vk_allocate(size_t size) {
@@ -375,12 +397,12 @@ ggml_vk_memory ggml_vk_allocate(size_t size) {
     bool isHostVisible = false;
     {
         memory.primaryBuffer = ggml_vk_allocate_buffer(size);
-        vk::MemoryRequirements memoryRequirements = mgr.device()->getBufferMemoryRequirements(*memory.primaryBuffer);
+        vk::MemoryRequirements memoryRequirements = komputeManager()->device()->getBufferMemoryRequirements(*memory.primaryBuffer);
         vk::MemoryPropertyFlags memoryPropertyFlags = vk::MemoryPropertyFlagBits::eDeviceLocal;
         memory.primaryMemory = ggml_vk_allocate(size, memoryPropertyFlags, memoryRequirements, &isHostVisible);
-        mgr.device()->bindBufferMemory(*memory.primaryBuffer, *memory.primaryMemory, 0);
+        komputeManager()->device()->bindBufferMemory(*memory.primaryBuffer, *memory.primaryMemory, 0);
         if (isHostVisible) {
-            vk::Result r = mgr.device()->mapMemory(*memory.primaryMemory, 0, size, vk::MemoryMapFlags(), &memory.data);
+            vk::Result r = komputeManager()->device()->mapMemory(*memory.primaryMemory, 0, size, vk::MemoryMapFlags(), &memory.data);
             if (r != vk::Result::eSuccess)
                 std::cerr << "Error mapping memory" << vk::to_string(r);
         }
@@ -388,13 +410,13 @@ ggml_vk_memory ggml_vk_allocate(size_t size) {
 
     if (!isHostVisible) {
         memory.stagingBuffer = ggml_vk_allocate_buffer(size);
-        vk::MemoryRequirements memoryRequirements = mgr.device()->getBufferMemoryRequirements(*memory.stagingBuffer);
+        vk::MemoryRequirements memoryRequirements = komputeManager()->device()->getBufferMemoryRequirements(*memory.stagingBuffer);
         vk::MemoryPropertyFlags memoryPropertyFlags = vk::MemoryPropertyFlagBits::eHostVisible |
                                                       vk::MemoryPropertyFlagBits::eHostCoherent |
                                                       vk::MemoryPropertyFlagBits::eHostCached;
         memory.stagingMemory = ggml_vk_allocate(size, memoryPropertyFlags, memoryRequirements, &isHostVisible);
-        mgr.device()->bindBufferMemory(*memory.stagingBuffer, *memory.stagingMemory, 0);
-        vk::Result r = mgr.device()->mapMemory(*memory.stagingMemory, 0, size, vk::MemoryMapFlags(), &memory.data);
+        komputeManager()->device()->bindBufferMemory(*memory.stagingBuffer, *memory.stagingMemory, 0);
+        vk::Result r = komputeManager()->device()->mapMemory(*memory.stagingMemory, 0, size, vk::MemoryMapFlags(), &memory.data);
         if (r != vk::Result::eSuccess)
             std::cerr << "Error mapping memory" << vk::to_string(r);
     }
@@ -405,19 +427,19 @@ ggml_vk_memory ggml_vk_allocate(size_t size) {
 
 void ggml_vk_free_memory(ggml_vk_memory &memory)
 {
-    mgr.device()->destroy(
+    komputeManager()->device()->destroy(
       *memory.primaryBuffer,
       (vk::Optional<const vk::AllocationCallbacks>)nullptr);
     if (memory.stagingBuffer) {
-        mgr.device()->destroy(
+        komputeManager()->device()->destroy(
           *memory.stagingBuffer,
           (vk::Optional<const vk::AllocationCallbacks>)nullptr);
     }
-    mgr.device()->freeMemory(
+    komputeManager()->device()->freeMemory(
       *memory.primaryMemory,
       (vk::Optional<const vk::AllocationCallbacks>)nullptr);
     if (memory.stagingMemory) {
-        mgr.device()->freeMemory(
+        komputeManager()->device()->freeMemory(
           *memory.stagingMemory,
           (vk::Optional<const vk::AllocationCallbacks>)nullptr);
     }
@@ -457,7 +479,7 @@ const std::shared_ptr<kp::Tensor> ggml_vk_get_tensor(struct ggml_kompute_context
         nbytes += *alignedOffset;
     }
 
-    return mgr.tensor(
+    return komputeManager()->tensor(
         t->data,
         nelements,
         nbytes, kp::Tensor::TensorDataTypes::eFloat,
@@ -476,7 +498,7 @@ void ggml_vk_add_buffer(
 void ggml_vk_h2d_tensor(struct ggml_kompute_context * ctx, struct ggml_tensor * t) {
     const auto res = ggml_vk_get_tensor(ctx, t, nullptr);
     GGML_ASSERT(res);
-    mgr.sequence()->eval<kp::OpTensorSyncDevice>({res});
+    komputeManager()->sequence()->eval<kp::OpTensorSyncDevice>({res});
 }
 
 void ggml_vk_h2d_all(struct ggml_kompute_context * ctx) {
@@ -496,7 +518,7 @@ void ggml_vk_d2h_tensor(struct ggml_kompute_context * ctx, struct ggml_tensor *
     const auto res = ggml_vk_get_tensor(ctx, t, nullptr);
 
     GGML_ASSERT(res);
-    mgr.sequence()->eval<kp::OpTensorSyncLocal>({res});
+    komputeManager()->sequence()->eval<kp::OpTensorSyncLocal>({res});
 }
 
 std::vector<uint32_t> getSpirvShader(const unsigned char* rawData, size_t size) {
@@ -537,10 +559,11 @@ void ggml_vk_add(kp::Sequence& seq,
         safe_divide(inAOff, 4), safe_divide(inBOff, 4), safe_divide(outOff, 4)
     };
 
-    static std::shared_ptr<kp::Algorithm> s_algo = nullptr;
-    if (!s_algo)
-        s_algo = mgr.algorithm<float, PushConstants>(ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts});
+    std::shared_ptr<kp::Algorithm> s_algo = nullptr;
+    if (!komputeManager()->hasAlgorithm(__func__))
+        s_algo = komputeManager()->algorithm<float, PushConstants>(__func__, ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts});
     else {
+        s_algo = komputeManager()->getAlgorithm(__func__);
         s_algo->setTensors({inA, inB, out});
         s_algo->setWorkgroup({size});
         s_algo->setPushConstants<PushConstants>({pushConsts});
@@ -567,10 +590,11 @@ void ggml_vk_addrow(kp::Sequence& seq,
         row
     };
 
-    static std::shared_ptr<kp::Algorithm> s_algo = nullptr;
-    if (!s_algo)
-        s_algo = mgr.algorithm<float, PushConstants>(ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts});
+    std::shared_ptr<kp::Algorithm> s_algo = nullptr;
+    if (!komputeManager()->hasAlgorithm(__func__))
+        s_algo = komputeManager()->algorithm<float, PushConstants>(__func__, ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts});
     else {
+        s_algo = komputeManager()->getAlgorithm(__func__);
         s_algo->setTensors({inA, inB, out});
         s_algo->setWorkgroup({size});
         s_algo->setPushConstants<PushConstants>({pushConsts});
@@ -595,10 +619,11 @@ void ggml_vk_mul(kp::Sequence& seq,
         safe_divide(inAOff, 4), safe_divide(inBOff, 4), safe_divide(outOff, 4)
     };
 
-    static std::shared_ptr<kp::Algorithm> s_algo = nullptr;
-    if (!s_algo)
-        s_algo = mgr.algorithm<float, PushConstants>(ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts});
+    std::shared_ptr<kp::Algorithm> s_algo = nullptr;
+    if (!komputeManager()->hasAlgorithm(__func__))
+        s_algo = komputeManager()->algorithm<float, PushConstants>(__func__, ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts});
     else {
+        s_algo = komputeManager()->getAlgorithm(__func__);
         s_algo->setTensors({inA, inB, out});
         s_algo->setWorkgroup({size});
         s_algo->setPushConstants<PushConstants>({pushConsts});
@@ -625,10 +650,11 @@ void ggml_vk_mulrow(kp::Sequence& seq,
         row
     };
 
-    static std::shared_ptr<kp::Algorithm> s_algo = nullptr;
-    if (!s_algo)
-        s_algo = mgr.algorithm<float, PushConstants>(ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts});
+    std::shared_ptr<kp::Algorithm> s_algo = nullptr;
+    if (!komputeManager()->hasAlgorithm(__func__))
+        s_algo = komputeManager()->algorithm<float, PushConstants>(__func__, ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts});
     else {
+        s_algo = komputeManager()->getAlgorithm(__func__);
         s_algo->setTensors({inA, inB, out});
         s_algo->setWorkgroup({size});
         s_algo->setPushConstants<PushConstants>({pushConsts});
@@ -653,10 +679,11 @@ void ggml_vk_scale(kp::Sequence& seq,
         scale
     };
 
-    static std::shared_ptr<kp::Algorithm> s_algo = nullptr;
-    if (!s_algo)
-        s_algo = mgr.algorithm<float, PushConstants>(ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {size}, {}, {pushConsts});
+    std::shared_ptr<kp::Algorithm> s_algo = nullptr;
+    if (!komputeManager()->hasAlgorithm(__func__))
+        s_algo = komputeManager()->algorithm<float, PushConstants>(__func__, ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {size}, {}, {pushConsts});
     else {
+        s_algo = komputeManager()->getAlgorithm(__func__);
         s_algo->setTensors({in, out});
         s_algo->setWorkgroup({size});
         s_algo->setPushConstants<PushConstants>({pushConsts});
@@ -676,10 +703,11 @@ void ggml_vk_xxlu(const std::vector<uint32_t>& spirv, kp::Sequence& seq,
         safe_divide(inOff, 4), safe_divide(outOff, 4),
     };
 
-    static std::shared_ptr<kp::Algorithm> s_algo = nullptr;
-    if (!s_algo)
-        s_algo = mgr.algorithm<float, PushConstants>(ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {size}, {}, {pushConsts});
+    std::shared_ptr<kp::Algorithm> s_algo = nullptr;
+    if (!komputeManager()->hasAlgorithm(__func__))
+        s_algo = komputeManager()->algorithm<float, PushConstants>(__func__, ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {size}, {}, {pushConsts});
     else {
+        s_algo = komputeManager()->getAlgorithm(__func__);
         s_algo->setTensors({in, out});
         s_algo->setWorkgroup({size});
         s_algo->setPushConstants<PushConstants>({pushConsts});
@@ -729,10 +757,11 @@ void ggml_vk_soft_max(kp::Sequence& seq,
         ne00, ne01, ne02
     };
 
-    static std::shared_ptr<kp::Algorithm> s_algo = nullptr;
-    if (!s_algo)
-        s_algo = mgr.algorithm<float, PushConstants>(ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {unsigned(ne01), unsigned(ne02), unsigned(ne03)}, {}, {pushConsts});
+    std::shared_ptr<kp::Algorithm> s_algo = nullptr;
+    if (!komputeManager()->hasAlgorithm(__func__))
+        s_algo = komputeManager()->algorithm<float, PushConstants>(__func__, ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {unsigned(ne01), unsigned(ne02), unsigned(ne03)}, {}, {pushConsts});
     else {
+        s_algo = komputeManager()->getAlgorithm(__func__);
         s_algo->setTensors({in, out});
         s_algo->setWorkgroup({unsigned(ne01), unsigned(ne02), unsigned(ne03)});
         s_algo->setPushConstants<PushConstants>({pushConsts});
@@ -761,10 +790,11 @@ void ggml_vk_norm_(const std::vector<uint32_t>& spirv, kp::Sequence& seq,
         (uint32_t)ne00, (uint32_t)nb01, epsilon
     };
 
-    static std::shared_ptr<kp::Algorithm> s_algo = nullptr;
-    if (!s_algo)
-        s_algo = mgr.algorithm<float, PushConstants>(ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {(uint32_t)nrows}, {}, {pushConsts});
+    std::shared_ptr<kp::Algorithm> s_algo = nullptr;
+    if (!komputeManager()->hasAlgorithm(__func__))
+        s_algo = komputeManager()->algorithm<float, PushConstants>(__func__, ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {(uint32_t)nrows}, {}, {pushConsts});
     else {
+        s_algo = komputeManager()->getAlgorithm(__func__);
         s_algo->setTensors({in, out});
         s_algo->setWorkgroup({(uint32_t)nrows});
         s_algo->setPushConstants<PushConstants>({pushConsts});
@@ -808,10 +838,11 @@ void ggml_vk_diag_mask_inf(kp::Sequence& seq,
         ne00, ne01
     };
 
-    static std::shared_ptr<kp::Algorithm> s_algo = nullptr;
-    if (!s_algo)
-        s_algo = mgr.algorithm<float, PushConstants>(ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {unsigned(ne00), unsigned(ne01), unsigned(ne02)}, {}, {pushConsts});
+    std::shared_ptr<kp::Algorithm> s_algo = nullptr;
+    if (!komputeManager()->hasAlgorithm(__func__))
+        s_algo = komputeManager()->algorithm<float, PushConstants>(__func__, ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {unsigned(ne00), unsigned(ne01), unsigned(ne02)}, {}, {pushConsts});
     else {
+        s_algo = komputeManager()->getAlgorithm(__func__);
         s_algo->setTensors({in, out});
         s_algo->setWorkgroup({unsigned(ne00), unsigned(ne01), unsigned(ne02)});
         s_algo->setPushConstants<PushConstants>({pushConsts});
@@ -844,10 +875,11 @@ void ggml_vk_mul_mat_f16(kp::Sequence& seq,
         ne00, nb01, nb02, nb11, nb12, ne0, ne1,
     };
 
-    static std::shared_ptr<kp::Algorithm> s_algo = nullptr;
-    if (!s_algo)
-        s_algo = mgr.algorithm<float, PushConstants>(ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {unsigned(ne01), unsigned(ne11), unsigned(ne12)}, {}, {pushConsts});
+    std::shared_ptr<kp::Algorithm> s_algo = nullptr;
+    if (!komputeManager()->hasAlgorithm(__func__))
+        s_algo = komputeManager()->algorithm<float, PushConstants>(__func__, ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {unsigned(ne01), unsigned(ne11), unsigned(ne12)}, {}, {pushConsts});
     else {
+        s_algo = komputeManager()->getAlgorithm(__func__);
         s_algo->setTensors({inA, inB, out});
         s_algo->setWorkgroup({unsigned(ne01), unsigned(ne11), unsigned(ne12)});
         s_algo->setPushConstants<PushConstants>({pushConsts});
@@ -871,10 +903,11 @@ void ggml_vk_mul_mat_q4_x(const std::vector<uint32_t>& spirv, uint32_t block_siz
         ne00, ne10, ne0,
     };
 
-    static std::shared_ptr<kp::Algorithm> s_algo = nullptr;
-    if (!s_algo)
-        s_algo = mgr.algorithm<float, PushConstants>(ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {unsigned(ne01), unsigned(ne11)}, {}, {pushConsts});
+    std::shared_ptr<kp::Algorithm> s_algo = nullptr;
+    if (!komputeManager()->hasAlgorithm(__func__))
+        s_algo = komputeManager()->algorithm<float, PushConstants>(__func__, ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {unsigned(ne01), unsigned(ne11)}, {}, {pushConsts});
     else {
+        s_algo = komputeManager()->getAlgorithm(__func__);
         s_algo->setTensors({inA, inB, out});
         s_algo->setWorkgroup({unsigned(ne01), unsigned(ne11)});
         s_algo->setPushConstants<PushConstants>({pushConsts});
@@ -921,10 +954,11 @@ void ggml_vk_get_rows(const std::vector<uint32_t>& spirv,
         ne00, nb01, nb1
     };
 
-    static std::shared_ptr<kp::Algorithm> s_algo = nullptr;
-    if (!s_algo)
-        s_algo = mgr.algorithm<float, PushConstants>(ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts});
+    std::shared_ptr<kp::Algorithm> s_algo = nullptr;
+    if (!komputeManager()->hasAlgorithm(__func__))
+        s_algo = komputeManager()->algorithm<float, PushConstants>(__func__, ggml_kompute_context::instance->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts});
     else {
+        s_algo = komputeManager()->getAlgorithm(__func__);
         s_algo->setTensors({inA, inB, out});
         s_algo->setWorkgroup({size});
         s_algo->setPushConstants<PushConstants>({pushConsts});
@@ -996,10 +1030,11 @@ void ggml_vk_rope(kp::Sequence& seq,
         nb0, nb1, nb2, nb3
     };
 
-    static std::shared_ptr<kp::Algorithm> s_algo = nullptr;
-    if (!s_algo)
-        s_algo = mgr.algorithm<float, PushConstants>(ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {unsigned(ne01), unsigned(ne02), unsigned(ne03)}, {}, {pushConsts});
+    std::shared_ptr<kp::Algorithm> s_algo = nullptr;
+    if (!komputeManager()->hasAlgorithm(__func__))
+        s_algo = komputeManager()->algorithm<float, PushConstants>(__func__, ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {unsigned(ne01), unsigned(ne02), unsigned(ne03)}, {}, {pushConsts});
     else {
+        s_algo = komputeManager()->getAlgorithm(__func__);
         s_algo->setTensors({in, out});
         s_algo->setWorkgroup({unsigned(ne01), unsigned(ne02), unsigned(ne03)});
         s_algo->setPushConstants<PushConstants>({pushConsts});
@@ -1032,10 +1067,14 @@ void ggml_vk_cpy(const std::vector<uint32_t>& spirv,
         nb0, nb1, nb2, nb3
     };
 
-    static std::shared_ptr<kp::Algorithm> s_algo = nullptr;
-    if (!s_algo)
-        s_algo = mgr.algorithm<float, PushConstants>(ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {unsigned(ne01), unsigned(ne02), unsigned(ne03)}, {}, {pushConsts});
+    static std::string unique_name = std::string(__func__) +
+                                     "_i_" + std::to_string(in_element_size) +
+                                     "_o_" + std::to_string(out_element_size);
+    std::shared_ptr<kp::Algorithm> s_algo = nullptr;
+    if (!komputeManager()->hasAlgorithm(unique_name))
+        s_algo = komputeManager()->algorithm<float, PushConstants>(unique_name, ggml_kompute_context::instance->pool.get(), {in, out}, spirv, {unsigned(ne01), unsigned(ne02), unsigned(ne03)}, {}, {pushConsts});
     else {
+        s_algo = komputeManager()->getAlgorithm(unique_name);
         s_algo->setTensors({in, out});
         s_algo->setWorkgroup({unsigned(ne01), unsigned(ne02), unsigned(ne03)});
         s_algo->setPushConstants<PushConstants>({pushConsts});
@@ -1082,7 +1121,7 @@ void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml_cgraph
     std::vector<std::shared_ptr<kp::Sequence>> sequences(n_seq);
 
     for (auto& sequence : sequences) {
-        sequence = mgr.sequence();
+        sequence = komputeManager()->sequence();
     }
     for (int seq_idx = 0; seq_idx < n_seq; ++seq_idx) {
         const int n_nodes_per_seq = (gf->n_nodes + n_seq - 1) / n_seq;
diff --git a/ggml-vulkan.h b/ggml-vulkan.h
index d13ed4184..e1d20e388 100644
--- a/ggml-vulkan.h
+++ b/ggml-vulkan.h
@@ -40,6 +40,7 @@ std::vector<ggml_vk_device> ggml_vk_available_devices(size_t memoryRequired);
 bool ggml_vk_init_device(size_t memoryRequired, const std::string &device);
 bool ggml_vk_init_device(const ggml_vk_device &device);
 bool ggml_vk_init_device(int device);
+bool ggml_vk_free_device();
 bool ggml_vk_has_vulkan();
 bool ggml_vk_has_device();
 ggml_vk_device ggml_vk_current_device();
diff --git a/kompute/src/Algorithm.cpp b/kompute/src/Algorithm.cpp
index 9c41ec90f..ea81fd97b 100644
--- a/kompute/src/Algorithm.cpp
+++ b/kompute/src/Algorithm.cpp
@@ -58,18 +58,6 @@ Algorithm::destroy()
         this->mPipeline = nullptr;
     }
 
-    if (this->mFreePipelineCache && this->mPipelineCache) {
-        KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline cache");
-        if (!this->mPipelineCache) {
-            KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
-                        "pipeline cache but it is null");
-        }
-        this->mDevice->destroy(
-          *this->mPipelineCache,
-          (vk::Optional<const vk::AllocationCallbacks>)nullptr);
-        this->mPipelineCache = nullptr;
-    }
-
     if (this->mFreePipelineLayout && this->mPipelineLayout) {
         KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline layout");
         if (!this->mPipelineLayout) {
@@ -317,16 +305,6 @@ Algorithm::createPipeline()
       "main",
       &specializationInfo);
 
-    static std::shared_ptr<vk::PipelineCache> globalPipelineCache = std::make_shared<vk::PipelineCache>();
-    if(!*globalPipelineCache) {
-       vk::PipelineCacheCreateInfo pipelineCacheInfo =
-         vk::PipelineCacheCreateInfo();
-      this->mPipelineCache = globalPipelineCache;
-      this->mFreePipelineCache = true;
-      this->mDevice->createPipelineCache(
-        &pipelineCacheInfo, nullptr, globalPipelineCache.get());
-    }
-
     vk::ComputePipelineCreateInfo pipelineInfo(vk::PipelineCreateFlags(),
                                                shaderStage,
                                                *this->mPipelineLayout,
@@ -335,7 +313,7 @@ Algorithm::createPipeline()
 
 #ifdef KOMPUTE_CREATE_PIPELINE_RESULT_VALUE
     vk::ResultValue<vk::Pipeline> pipelineResult =
-      this->mDevice->createComputePipeline(*globalPipelineCache, pipelineInfo);
+      this->mDevice->createComputePipeline(*mPipelineCache, pipelineInfo);
 
     if (pipelineResult.result != vk::Result::eSuccess) {
         throw std::runtime_error("Failed to create pipeline result: " +
@@ -347,7 +325,7 @@ Algorithm::createPipeline()
     this->mFreePipeline = true;
 #else
     vk::Pipeline pipeline =
-      this->mDevice->createComputePipeline(*globalPipelineCache, pipelineInfo)
+      this->mDevice->createComputePipeline(*mPipelineCache, pipelineInfo)
         .value;
     this->mPipeline = std::make_shared<vk::Pipeline>(pipeline);
     this->mFreePipeline = true;
diff --git a/kompute/src/Manager.cpp b/kompute/src/Manager.cpp
index 2c86b6e10..2a02b7b10 100644
--- a/kompute/src/Manager.cpp
+++ b/kompute/src/Manager.cpp
@@ -88,15 +88,14 @@ Manager::destroy()
         this->mManagedSequences.clear();
     }
 
-    if (this->mManageResources && this->mManagedAlgorithms.size()) {
+    if (this->mManageResources && !this->mManagedAlgorithmsMap.empty()) {
         KP_LOG_DEBUG("Kompute Manager explicitly freeing algorithms");
-        for (const std::weak_ptr<Algorithm>& weakAlgorithm :
-             this->mManagedAlgorithms) {
-            if (std::shared_ptr<Algorithm> algorithm = weakAlgorithm.lock()) {
+        for (const auto& kv : this->mManagedAlgorithmsMap) {
+            if (std::shared_ptr<Algorithm> algorithm = kv.second) {
                 algorithm->destroy();
             }
         }
-        this->mManagedAlgorithms.clear();
+        this->mManagedAlgorithmsMap.clear();
     }
 
     if (this->mManageResources && this->mManagedTensors.size()) {
@@ -109,6 +108,18 @@ Manager::destroy()
         this->mManagedTensors.clear();
     }
 
+    if (this->mPipelineCache) {
+        KP_LOG_DEBUG("Kompute Manager Destroying pipeline cache");
+        if (!this->mPipelineCache) {
+            KP_LOG_WARN("Kompute Manager Error requested to destroy "
+                        "pipeline cache but it is null");
+        }
+        this->mDevice->destroy(
+          *this->mPipelineCache,
+          (vk::Optional<const vk::AllocationCallbacks>)nullptr);
+        this->mPipelineCache = nullptr;
+    }
+
     if (this->mFreeDevice) {
         KP_LOG_INFO("Destroying device");
         this->mDevice->destroy(
@@ -269,12 +280,14 @@ Manager::clear()
                          end(this->mManagedTensors),
                          [](std::weak_ptr<Tensor> t) { return t.expired(); }),
           end(this->mManagedTensors));
-        this->mManagedAlgorithms.erase(
-          std::remove_if(
-            begin(this->mManagedAlgorithms),
-            end(this->mManagedAlgorithms),
-            [](std::weak_ptr<Algorithm> t) { return t.expired(); }),
-          end(this->mManagedAlgorithms));
+        for (auto it = this->mManagedAlgorithmsMap.begin();
+             it != this->mManagedAlgorithmsMap.end();) {
+            if (it->second) {
+                it = this->mManagedAlgorithmsMap.erase(it);
+            } else {
+                ++it;
+            }
+        }
         this->mManagedSequences.erase(
           std::remove_if(begin(this->mManagedSequences),
                          end(this->mManagedSequences),
@@ -452,6 +465,12 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices,
     }
 
     KP_LOG_DEBUG("Kompute Manager compute queue obtained");
+
+    mPipelineCache = std::make_shared<vk::PipelineCache>();
+    vk::PipelineCacheCreateInfo pipelineCacheInfo =
+        vk::PipelineCacheCreateInfo();
+    this->mDevice->createPipelineCache(
+        &pipelineCacheInfo, nullptr, mPipelineCache.get());
 }
 
 std::shared_ptr<Sequence>
diff --git a/kompute/src/include/kompute/Algorithm.hpp b/kompute/src/include/kompute/Algorithm.hpp
index 90fe48fef..ef11234ee 100644
--- a/kompute/src/include/kompute/Algorithm.hpp
+++ b/kompute/src/include/kompute/Algorithm.hpp
@@ -45,6 +45,7 @@ class Algorithm
      */
     template<typename S = float, typename P = float>
     Algorithm(std::shared_ptr<vk::Device> device,
+              vk::PipelineCache *pipelineCache,
               vk::DescriptorPool *pool,
               const std::vector<std::shared_ptr<Tensor>>& tensors = {},
               const std::vector<uint32_t>& spirv = {},
@@ -55,6 +56,7 @@ class Algorithm
         KP_LOG_DEBUG("Kompute Algorithm Constructor with device");
 
         this->mDevice = device;
+        this->mPipelineCache = pipelineCache;
         this->mDescriptorPool = pool;
 
         if (tensors.size() && spirv.size()) {
@@ -310,8 +312,7 @@ class Algorithm
     bool mFreeShaderModule = false;
     std::shared_ptr<vk::PipelineLayout> mPipelineLayout;
     bool mFreePipelineLayout = false;
-    std::shared_ptr<vk::PipelineCache> mPipelineCache;
-    bool mFreePipelineCache = false;
+    vk::PipelineCache *mPipelineCache = nullptr;
     std::shared_ptr<vk::Pipeline> mPipeline;
     bool mFreePipeline = false;
 
diff --git a/kompute/src/include/kompute/Manager.hpp b/kompute/src/include/kompute/Manager.hpp
index 42336f4e8..e910b2b81 100644
--- a/kompute/src/include/kompute/Manager.hpp
+++ b/kompute/src/include/kompute/Manager.hpp
@@ -39,6 +39,10 @@ class Manager
      */
     ~Manager();
 
+    bool hasInstance() const {
+        return this->mInstance.get();
+    }
+
     bool hasDevice() const {
         return this->mDevice.get();
     }
@@ -149,6 +153,7 @@ class Manager
      * @returns Shared pointer with initialised algorithm
      */
     std::shared_ptr<Algorithm> algorithm(
+      const std::string &name,
       vk::DescriptorPool *pool,
       const std::vector<std::shared_ptr<Tensor>>& tensors = {},
       const std::vector<uint32_t>& spirv = {},
@@ -157,7 +162,7 @@ class Manager
       const std::vector<float>& pushConstants = {})
     {
         return this->algorithm<>(
-          pool, tensors, spirv, workgroup, specializationConstants, pushConstants);
+          name, pool, tensors, spirv, workgroup, specializationConstants, pushConstants);
     }
 
     /**
@@ -176,6 +181,7 @@ class Manager
      */
     template<typename S = float, typename P = float>
     std::shared_ptr<Algorithm> algorithm(
+      const std::string &name,
       vk::DescriptorPool *pool,
       const std::vector<std::shared_ptr<Tensor>>& tensors,
       const std::vector<uint32_t>& spirv,
@@ -188,6 +194,7 @@ class Manager
 
         std::shared_ptr<Algorithm> algorithm{ new kp::Algorithm(
           this->mDevice,
+          mPipelineCache.get(),
           pool,
           tensors,
           spirv,
@@ -196,12 +203,24 @@ class Manager
           pushConstants) };
 
         if (this->mManageResources) {
-            this->mManagedAlgorithms.push_back(algorithm);
+            this->mManagedAlgorithmsMap.insert({name, algorithm});
         }
 
         return algorithm;
     }
 
+    bool hasAlgorithm(const std::string &name) const {
+        return mManagedAlgorithmsMap.find(name) != mManagedAlgorithmsMap.end();
+    }
+
+    std::shared_ptr<Algorithm> getAlgorithm(const std::string &name) const {
+        auto it = mManagedAlgorithmsMap.find(name);
+        if (it != mManagedAlgorithmsMap.end()) {
+            return it->second;
+        }
+        return nullptr;
+    }
+
     /**
      * Destroy the GPU resources and all managed resources by manager.
      **/
@@ -237,6 +256,7 @@ class Manager
 
     std::shared_ptr<vk::Device> device() const { return mDevice; }
     std::shared_ptr<vk::PhysicalDevice> physicalDevice() const { return mPhysicalDevice; }
+    std::shared_ptr<vk::PipelineCache> pipelineCache() const { return mPipelineCache; }
 
   private:
     // -------------- OPTIONALLY OWNED RESOURCES
@@ -250,10 +270,11 @@ class Manager
     // -------------- ALWAYS OWNED RESOURCES
     std::vector<std::weak_ptr<Tensor>> mManagedTensors;
     std::vector<std::weak_ptr<Sequence>> mManagedSequences;
-    std::vector<std::weak_ptr<Algorithm>> mManagedAlgorithms;
+    std::unordered_map<std::string, std::shared_ptr<Algorithm>> mManagedAlgorithmsMap;
 
     std::vector<uint32_t> mComputeQueueFamilyIndices;
     std::vector<std::shared_ptr<vk::Queue>> mComputeQueues;
+    std::shared_ptr<vk::PipelineCache> mPipelineCache;
 
     bool mManageResources = false;