From 1bf681f90ef4cf37b36e6d604d3e30fc57eda650 Mon Sep 17 00:00:00 2001 From: Finn Voorhees Date: Wed, 3 Jan 2024 08:39:43 -0500 Subject: [PATCH] ggml : add error handling to graph_compute (whisper/1714) --- ggml-backend-impl.h | 2 +- ggml-backend.c | 10 +++++++--- ggml-backend.h | 2 +- ggml-cuda.cu | 4 +++- ggml-metal.h | 2 +- ggml-metal.m | 9 +++++---- 6 files changed, 18 insertions(+), 11 deletions(-) diff --git a/ggml-backend-impl.h b/ggml-backend-impl.h index 05859935a..ca21b4743 100644 --- a/ggml-backend-impl.h +++ b/ggml-backend-impl.h @@ -90,7 +90,7 @@ extern "C" { void (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan); // compute graph without a plan - void (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph); + bool (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph); // check if the backend supports an operation bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op); diff --git a/ggml-backend.c b/ggml-backend.c index 2c3752067..53e741cb8 100644 --- a/ggml-backend.c +++ b/ggml-backend.c @@ -195,11 +195,14 @@ void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_ ggml_backend_synchronize(backend); } -void ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { - backend->iface.graph_compute(backend, cgraph); +bool ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { + if (!backend->iface.graph_compute(backend, cgraph)) { + return false; + } // TODO: optional sync ggml_backend_synchronize(backend); + return true; } bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) { @@ -597,7 +600,7 @@ static void ggml_backend_cpu_graph_plan_compute(ggml_backend_t backend, ggml_bac GGML_UNUSED(backend); } -static void ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { +static bool ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context; struct ggml_cplan cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads); @@ -611,6 +614,7 @@ static void ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_c cplan.work_data = cpu_ctx->work_data; ggml_graph_compute(cgraph, &cplan); + return true; } static bool ggml_backend_cpu_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) { diff --git a/ggml-backend.h b/ggml-backend.h index a9d2fddd7..85ff67b0e 100644 --- a/ggml-backend.h +++ b/ggml-backend.h @@ -58,7 +58,7 @@ extern "C" { GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan); GGML_API void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan); - GGML_API void ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph); + GGML_API bool ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph); GGML_API bool ggml_backend_supports_op (ggml_backend_t backend, const struct ggml_tensor * op); // tensor copy between different backends diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 52d3cc6a6..10c21615e 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -9910,7 +9910,7 @@ static void ggml_backend_cuda_graph_plan_compute(ggml_backend_t backend, ggml_ba UNUSED(plan); } -static void ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) { +static bool ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) { ggml_backend_context_cuda * cuda_ctx = (ggml_backend_context_cuda *)backend->context; ggml_cuda_set_main_device(cuda_ctx->device); @@ -9967,6 +9967,8 @@ static void ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph } UNUSED(backend); + + return true; } static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, const ggml_tensor * op) { diff --git a/ggml-metal.h b/ggml-metal.h index b5e02b668..c4b7325da 100644 --- a/ggml-metal.h +++ b/ggml-metal.h @@ -87,7 +87,7 @@ int * ggml_metal_get_concur_list(struct ggml_metal_context * ctx); // same as ggml_graph_compute but uses Metal // creates gf->n_threads command buffers in parallel -void ggml_metal_graph_compute(struct ggml_metal_context * ctx, struct ggml_cgraph * gf); +bool ggml_metal_graph_compute(struct ggml_metal_context * ctx, struct ggml_cgraph * gf); // // backend API diff --git a/ggml-metal.m b/ggml-metal.m index 7aa92c14c..55cc1a872 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -977,7 +977,7 @@ static bool ggml_metal_supports_op(const struct ggml_tensor * op) { return false; } } -void ggml_metal_graph_compute( +bool ggml_metal_graph_compute( struct ggml_metal_context * ctx, struct ggml_cgraph * gf) { @autoreleasepool { @@ -2405,10 +2405,11 @@ void ggml_metal_graph_compute( MTLCommandBufferStatus status = (MTLCommandBufferStatus) [ctx->command_buffers[i] status]; if (status != MTLCommandBufferStatusCompleted) { GGML_METAL_LOG_INFO("%s: command buffer %d failed with status %lu\n", __func__, i, status); - GGML_ASSERT(false); + return false; } } + return true; } } @@ -2688,10 +2689,10 @@ static ggml_backend_buffer_type_t ggml_backend_metal_get_default_buffer_type(ggm UNUSED(backend); } -static void ggml_backend_metal_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { +static bool ggml_backend_metal_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { struct ggml_metal_context * metal_ctx = (struct ggml_metal_context *)backend->context; - ggml_metal_graph_compute(metal_ctx, cgraph); + return ggml_metal_graph_compute(metal_ctx, cgraph); } static bool ggml_backend_metal_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {