From 83f3d7a83c6eb9691db3f55477cccb3c9fd1cbab Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 15 Jan 2024 15:52:41 +0200 Subject: [PATCH] backend : clean-up the implementation ggml-ci --- examples/simple/simple.cpp | 11 ++++++----- ggml-backend.c | 27 +++++++++++++++------------ ggml-backend.h | 4 +--- 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/examples/simple/simple.cpp b/examples/simple/simple.cpp index dac7aa60a..ce3497345 100644 --- a/examples/simple/simple.cpp +++ b/examples/simple/simple.cpp @@ -8,19 +8,20 @@ // a function that can be called for every computed node during graph evaluation // the user can choose to whether to observe the data of the node depending on the tensor parameters -static bool observe_compute(int node_index, struct ggml_tensor * t, bool ask, void * user_data) { +static bool observe_compute(struct ggml_tensor * t, bool ask, void * user_data) { GGML_UNUSED(user_data); // the scheduler is asking us if we want to observe this node if (ask) { - // check if name contains soft_max + // check if name contains soft_max (customize to your needs) return strstr(t->name, "soft_max") != 0; } - // print the node data - printf("%s: node_index = %5d, t->name = %32s, t->op = %12s, [%5d, %5d, %5d, %5d]\n", - __func__, node_index, t->name, ggml_op_name(t->op), (int) t->ne[0], (int) t->ne[1], (int) t->ne[2], (int) t->ne[3]); + // print the node info + printf("%s: t->name = %32s, t->op = %12s, [%5d, %5d, %5d, %5d]\n", + __func__, t->name, ggml_op_name(t->op), (int) t->ne[0], (int) t->ne[1], (int) t->ne[2], (int) t->ne[3]); + // this will copy the data to host memory (if needed) std::vector t_data(ggml_nelements(t)); ggml_backend_tensor_get(t, t_data.data(), 0, ggml_nbytes(t)); diff --git a/ggml-backend.c b/ggml-backend.c index 0ec46ed32..07482bedf 100644 --- a/ggml-backend.c +++ b/ggml-backend.c @@ -1334,28 +1334,31 @@ static void sched_compute_splits(ggml_backend_sched_t sched) { //ggml_backend_synchronize(split_backend); // necessary to measure compute time } else { // similar to ggml_backend_compare_graph_backend - for (int j = 0; j < split->graph.n_nodes; j++) { - struct ggml_tensor * t = split->graph.nodes[j]; + for (int j0 = 0; j0 < split->graph.n_nodes; j0++) { + struct ggml_tensor * t = split->graph.nodes[j0]; - int k = j; + int j1 = j0; - // check if the user needs data from this node - while (!sched->callback_eval(k, t, true, sched->callback_eval_user_data) && k < split->graph.n_nodes - 1) { - t = split->graph.nodes[++k]; + // determine the range [j0, j1] of nodes that can be computed together + while (j1 < split->graph.n_nodes - 1) { + // check if the user needs data from this node + if (sched->callback_eval(t, true, sched->callback_eval_user_data)) { + break; + } + + t = split->graph.nodes[++j1]; } - struct ggml_cgraph gv = ggml_graph_view(&split->graph, j, k + 1); + struct ggml_cgraph gv = ggml_graph_view(&split->graph, j0, j1 + 1); ggml_backend_graph_compute(split_backend, &gv); - // TODO: k is node index in the split, not in the original graph - // TODO: avoid the ask == true call here - if (sched->callback_eval(k, t, true, sched->callback_eval_user_data) && - !sched->callback_eval(k, t, false, sched->callback_eval_user_data)) { + if (sched->callback_eval(t, true, sched->callback_eval_user_data) && // ask + !sched->callback_eval(t, false, sched->callback_eval_user_data)) { // eval break; } - j = k; + j0 = j1; } } uint64_t compute_end_us = ggml_time_us(); diff --git a/ggml-backend.h b/ggml-backend.h index 0d4ff69ba..5cef4d8b4 100644 --- a/ggml-backend.h +++ b/ggml-backend.h @@ -154,8 +154,7 @@ extern "C" { // when ask == false, the scheduler is passing the node tensor to the user for observation // if the user returns false, the scheduler will cancel the graph compute // - // TODO: propose to rename to ggml_backend_sched_callback_eval - typedef bool (*ggml_backend_sched_eval_callback)(int node_index, struct ggml_tensor * t, bool ask, void * user_data); + typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data); // Initialize a backend scheduler GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size); @@ -195,7 +194,6 @@ extern "C" { GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph); GGML_API void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy); - // TODO: propose to rename this to ggml_backend_callback_compare typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data); // Compare the output of two backends