diff --git a/Makefile b/Makefile index f3bda7b9f..163971525 100644 --- a/Makefile +++ b/Makefile @@ -150,8 +150,8 @@ ifndef LLAMA_NO_ACCELERATE endif # LLAMA_NO_ACCELERATE ifdef LLAMA_MPI - CFLAGS += -DGGML_USE_MPI - CXXFLAGS += -DGGML_USE_MPI + CFLAGS += -DGGML_USE_MPI -Wno-cast-qual -Wno-int-to-void-pointer-cast -Wno-void-pointer-to-int-cast + CXXFLAGS += -DGGML_USE_MPI -Wno-cast-qual endif # LLAMA_MPI ifdef LLAMA_OPENBLAS diff --git a/examples/simple/simple.cpp b/examples/simple/simple.cpp index 2d913cebb..57a0fb7c5 100644 --- a/examples/simple/simple.cpp +++ b/examples/simple/simple.cpp @@ -173,6 +173,8 @@ int main(int argc, char ** argv) llama_free( ctx ); llama_free_model( model ); + llama_finalize_backend(); + return 0; } diff --git a/llama.cpp b/llama.cpp index c7de0bc60..a4435897e 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1581,17 +1581,17 @@ static bool llama_eval_internal( // used at the end to optionally extract the embeddings struct ggml_tensor * embeddings = NULL; -#ifdef GGML_USE_MPI - cur = ggml_send_tensor(ctx0, cur, (lctx.mpi_rank+1)%lctx.mpi_size); - ggml_set_name(cur, "send"); -#endif + if (lctx.mpi_size > 1) { + cur = ggml_send_tensor(ctx0, cur, (lctx.mpi_rank+1)%lctx.mpi_size); + ggml_set_name(cur, "send"); + } if (lctx.mpi_rank == 0) { -#ifdef GGML_USE_MPI - cur = ggml_recv_tensor(ctx0, cur, - ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N), - lctx.mpi_size-1); - ggml_set_name(cur, "recv"); -#endif + if (lctx.mpi_size > 1) { + cur = ggml_recv_tensor(ctx0, cur, + ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N), + lctx.mpi_size-1); + ggml_set_name(cur, "recv"); + } // norm { cur = ggml_rms_norm(ctx0, cur);