mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 21:37:19 +01:00
metal : free metal objects (#5161)
* Releasing MTLFunction references after Metal pipeline construction * Keeping the `ggml_metal_kernel` structure * Spacing fix * Whitespace fix
This commit is contained in:
parent
35dec26cc2
commit
d2f650cb5b
31
ggml-metal.m
31
ggml-metal.m
@ -24,10 +24,7 @@
|
|||||||
|
|
||||||
#define UNUSED(x) (void)(x)
|
#define UNUSED(x) (void)(x)
|
||||||
|
|
||||||
#define GGML_METAL_MAX_KERNELS 256
|
|
||||||
|
|
||||||
struct ggml_metal_kernel {
|
struct ggml_metal_kernel {
|
||||||
id<MTLFunction> function;
|
|
||||||
id<MTLComputePipelineState> pipeline;
|
id<MTLComputePipelineState> pipeline;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -159,11 +156,10 @@ struct ggml_metal_context {
|
|||||||
|
|
||||||
id<MTLDevice> device;
|
id<MTLDevice> device;
|
||||||
id<MTLCommandQueue> queue;
|
id<MTLCommandQueue> queue;
|
||||||
id<MTLLibrary> library;
|
|
||||||
|
|
||||||
dispatch_queue_t d_queue;
|
dispatch_queue_t d_queue;
|
||||||
|
|
||||||
struct ggml_metal_kernel kernels[GGML_METAL_MAX_KERNELS];
|
struct ggml_metal_kernel kernels[GGML_METAL_KERNEL_TYPE_COUNT];
|
||||||
|
|
||||||
bool support_simdgroup_reduction;
|
bool support_simdgroup_reduction;
|
||||||
bool support_simdgroup_mm;
|
bool support_simdgroup_mm;
|
||||||
@ -246,6 +242,8 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
|||||||
ctx->queue = [ctx->device newCommandQueue];
|
ctx->queue = [ctx->device newCommandQueue];
|
||||||
ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
|
ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
|
||||||
|
|
||||||
|
id<MTLLibrary> metal_library;
|
||||||
|
|
||||||
// load library
|
// load library
|
||||||
{
|
{
|
||||||
NSBundle * bundle = nil;
|
NSBundle * bundle = nil;
|
||||||
@ -260,7 +258,7 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
|||||||
// pre-compiled library found
|
// pre-compiled library found
|
||||||
NSURL * libURL = [NSURL fileURLWithPath:libPath];
|
NSURL * libURL = [NSURL fileURLWithPath:libPath];
|
||||||
GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [libPath UTF8String]);
|
GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [libPath UTF8String]);
|
||||||
ctx->library = [ctx->device newLibraryWithURL:libURL error:&error];
|
metal_library = [ctx->device newLibraryWithURL:libURL error:&error];
|
||||||
if (error) {
|
if (error) {
|
||||||
GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
|
GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -302,7 +300,7 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
|||||||
|
|
||||||
//[options setFastMathEnabled:false];
|
//[options setFastMathEnabled:false];
|
||||||
|
|
||||||
ctx->library = [ctx->device newLibraryWithSource:src options:options error:&error];
|
metal_library = [ctx->device newLibraryWithSource:src options:options error:&error];
|
||||||
if (error) {
|
if (error) {
|
||||||
GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
|
GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -367,8 +365,7 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
|||||||
{
|
{
|
||||||
NSError * error = nil;
|
NSError * error = nil;
|
||||||
|
|
||||||
for (int i = 0; i < GGML_METAL_MAX_KERNELS; ++i) {
|
for (int i = 0; i < GGML_METAL_KERNEL_TYPE_COUNT; ++i) {
|
||||||
ctx->kernels[i].function = nil;
|
|
||||||
ctx->kernels[i].pipeline = nil;
|
ctx->kernels[i].pipeline = nil;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -380,10 +377,12 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
|||||||
#define GGML_METAL_ADD_KERNEL(e, name, supported) \
|
#define GGML_METAL_ADD_KERNEL(e, name, supported) \
|
||||||
if (supported) { \
|
if (supported) { \
|
||||||
struct ggml_metal_kernel * kernel = &ctx->kernels[e]; \
|
struct ggml_metal_kernel * kernel = &ctx->kernels[e]; \
|
||||||
kernel->function = [ctx->library newFunctionWithName:@"kernel_"#name]; \
|
id<MTLFunction> metal_function = [metal_library newFunctionWithName:@"kernel_"#name]; \
|
||||||
kernel->pipeline = [ctx->device newComputePipelineStateWithFunction:kernel->function error:&error]; \
|
kernel->pipeline = [ctx->device newComputePipelineStateWithFunction:metal_function error:&error]; \
|
||||||
|
[metal_function release]; \
|
||||||
if (error) { \
|
if (error) { \
|
||||||
GGML_METAL_LOG_ERROR("%s: error: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
|
GGML_METAL_LOG_ERROR("%s: error: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
|
||||||
|
[metal_library release]; \
|
||||||
return NULL; \
|
return NULL; \
|
||||||
} \
|
} \
|
||||||
} else { \
|
} else { \
|
||||||
@ -512,23 +511,17 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
|||||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SUM_ROWS, sum_rows, true);
|
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SUM_ROWS, sum_rows, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[metal_library release];
|
||||||
return ctx;
|
return ctx;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_metal_free(struct ggml_metal_context * ctx) {
|
static void ggml_metal_free(struct ggml_metal_context * ctx) {
|
||||||
GGML_METAL_LOG_INFO("%s: deallocating\n", __func__);
|
GGML_METAL_LOG_INFO("%s: deallocating\n", __func__);
|
||||||
|
|
||||||
for (int i = 0; i < GGML_METAL_MAX_KERNELS; ++i) {
|
for (int i = 0; i < GGML_METAL_KERNEL_TYPE_COUNT; ++i) {
|
||||||
if (ctx->kernels[i].pipeline) {
|
|
||||||
[ctx->kernels[i].pipeline release];
|
[ctx->kernels[i].pipeline release];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ctx->kernels[i].function) {
|
|
||||||
[ctx->kernels[i].function release];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
[ctx->library release];
|
|
||||||
[ctx->queue release];
|
[ctx->queue release];
|
||||||
[ctx->device release];
|
[ctx->device release];
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user