From ef37dd14e744b6323f95196b00b88f29512de697 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 9 Jul 2023 17:01:08 +0300 Subject: [PATCH] mpi : fix output tensor after MPI compute (still not working) --- ggml-mpi.c | 11 ++++++++--- llama.cpp | 2 ++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/ggml-mpi.c b/ggml-mpi.c index e890d24d1..46ee5bacb 100644 --- a/ggml-mpi.c +++ b/ggml-mpi.c @@ -57,7 +57,7 @@ void ggml_mpi_eval_init( MPI_Bcast(n_threads, 1, MPI_INT, 0, MPI_COMM_WORLD); } -int ggml_graph_get_node_idx( struct ggml_cgraph * gf, const char * name) { +int ggml_graph_get_node_idx(struct ggml_cgraph * gf, const char * name) { struct ggml_tensor * t = ggml_graph_get_tensor(gf, name); if (t == NULL) { fprintf(stderr, "%s: tensor %s not found\n", __func__, name); @@ -141,8 +141,8 @@ void ggml_mpi_graph_compute( const int il0 = (mpi_idx + 0) * n_per_node; const int il1 = MIN(n_layers, (mpi_idx + 1) * n_per_node); - char name_l0[64]; - char name_l1[64]; + char name_l0[GGML_MAX_NAME]; + char name_l1[GGML_MAX_NAME]; snprintf(name_l0, sizeof(name_l0), "layer_inp_%d", il0); snprintf(name_l1, sizeof(name_l1), "layer_inp_%d", il1); @@ -175,6 +175,11 @@ void ggml_mpi_graph_compute( ggml_graph_compute(ctx, gf); + //if (mpi_rank == 0) { + // ggml_graph_print(gf); + // ggml_graph_dump_dot(gf, NULL, "llama.dot"); + //} + //fprintf(stderr, "%s: node %d: done\n", __func__, mpi_rank); // send the output data to the next node diff --git a/llama.cpp b/llama.cpp index 08a5bd284..4bf1e75d2 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1637,6 +1637,8 @@ static bool llama_eval_internal( } #elif GGML_USE_MPI ggml_mpi_graph_compute(lctx.ctx_mpi, ctx0, &gf, n_layer); + + cur = gf.nodes[gf.n_nodes - 1]; #else ggml_graph_compute(ctx0, &gf); #endif