metal : free metal objects (#5161)

* Releasing MTLFunction references after Metal pipeline construction * Keeping the `ggml_metal_kernel` structure * Spacing fix * Whitespace fix
2024-12-25 05:48:47 +01:00 · 2024-01-28 19:50:16 +00:00 · 2024-01-28 19:50:16 +00:00 · d2f650cb5b
commit d2f650cb5b
parent 35dec26cc2
1 changed files with 13 additions and 20 deletions
--- a/ggml-metal.m
+++ b/ggml-metal.m
@ -24,10 +24,7 @@

 #define UNUSED(x) (void)(x)

-#define GGML_METAL_MAX_KERNELS 256
-
 struct ggml_metal_kernel {
-    id<MTLFunction>             function;
    id<MTLComputePipelineState> pipeline;
 };

@ -159,11 +156,10 @@ struct ggml_metal_context {

    id<MTLDevice>       device;
    id<MTLCommandQueue> queue;
-    id<MTLLibrary>      library;

    dispatch_queue_t d_queue;

-    struct ggml_metal_kernel kernels[GGML_METAL_MAX_KERNELS];
+    struct ggml_metal_kernel kernels[GGML_METAL_KERNEL_TYPE_COUNT];

    bool support_simdgroup_reduction;
    bool support_simdgroup_mm;
@ -246,6 +242,8 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
    ctx->queue  = [ctx->device newCommandQueue];
    ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);

+    id<MTLLibrary> metal_library;
+
    // load library
    {
        NSBundle * bundle = nil;
@ -260,7 +258,7 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
            // pre-compiled library found
            NSURL * libURL = [NSURL fileURLWithPath:libPath];
            GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [libPath UTF8String]);
-            ctx->library = [ctx->device newLibraryWithURL:libURL error:&error];
+            metal_library = [ctx->device newLibraryWithURL:libURL error:&error];
            if (error) {
                GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
                return NULL;
@ -302,7 +300,7 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {

                //[options setFastMathEnabled:false];

-                ctx->library = [ctx->device newLibraryWithSource:src options:options error:&error];
+                metal_library = [ctx->device newLibraryWithSource:src options:options error:&error];
                if (error) {
                    GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
                    return NULL;
@ -367,8 +365,7 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
    {
        NSError * error = nil;

-        for (int i = 0; i < GGML_METAL_MAX_KERNELS; ++i) {
-            ctx->kernels[i].function = nil;
+        for (int i = 0; i < GGML_METAL_KERNEL_TYPE_COUNT; ++i) {
            ctx->kernels[i].pipeline = nil;
        }

@ -380,10 +377,12 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
 #define GGML_METAL_ADD_KERNEL(e, name, supported) \
        if (supported) { \
            struct ggml_metal_kernel * kernel = &ctx->kernels[e]; \
-            kernel->function = [ctx->library newFunctionWithName:@"kernel_"#name]; \
-            kernel->pipeline = [ctx->device newComputePipelineStateWithFunction:kernel->function error:&error]; \
+            id<MTLFunction> metal_function = [metal_library newFunctionWithName:@"kernel_"#name]; \
+            kernel->pipeline = [ctx->device newComputePipelineStateWithFunction:metal_function error:&error]; \
+            [metal_function release]; \
            if (error) { \
                GGML_METAL_LOG_ERROR("%s: error: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
+                [metal_library release]; \
                return NULL; \
            } \
        } else { \
@ -512,23 +511,17 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
        GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SUM_ROWS,                  sum_rows,               true);
    }

+    [metal_library release];
    return ctx;
 }

 static void ggml_metal_free(struct ggml_metal_context * ctx) {
    GGML_METAL_LOG_INFO("%s: deallocating\n", __func__);

-    for (int i = 0; i < GGML_METAL_MAX_KERNELS; ++i) {
-        if (ctx->kernels[i].pipeline) {
+    for (int i = 0; i < GGML_METAL_KERNEL_TYPE_COUNT; ++i) {
        [ctx->kernels[i].pipeline release];
    }

-        if (ctx->kernels[i].function) {
-            [ctx->kernels[i].function release];
-        }
-    }
-
-    [ctx->library release];
    [ctx->queue release];
    [ctx->device release];