diff --git a/ggml-mpi.c b/ggml-mpi.c index 6dd7e7b76..4bde41808 100644 --- a/ggml-mpi.c +++ b/ggml-mpi.c @@ -6,7 +6,6 @@ #include #include -#include #define MIN(a, b) ((a) < (b) ? (a) : (b)) @@ -168,7 +167,8 @@ void ggml_mpi_graph_compute( return; } - // attach the input data to the first layer for this node + // attach the input data to all nodes that need it + // TODO: not great - should be able to do this without modifying the compute graph (see next TODO below) for (int i = idx_l0; i < idx_l1; i++) { if (gf->nodes[i]->src0 == gf->nodes[idx_l0]) { gf->nodes[i]->src0 = inp0; diff --git a/llama.cpp b/llama.cpp index b7aad4c6e..8c2d0ea4b 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1342,6 +1342,10 @@ static bool llama_eval_internal( inpL = ggml_get_rows(ctx0, model.tok_embeddings, inp_tokens); } else { +#ifdef GGML_USE_MPI + GGML_ASSERT(false && "not implemented"); +#endif + inpL = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N); memcpy(inpL->data, embd, N * n_embd * ggml_element_size(inpL)); }