mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-07 11:23:56 +01:00
mpi : minor
This commit is contained in:
parent
beadbf3380
commit
9da9d26c70
@ -6,7 +6,6 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
|
||||
@ -168,7 +167,8 @@ void ggml_mpi_graph_compute(
|
||||
return;
|
||||
}
|
||||
|
||||
// attach the input data to the first layer for this node
|
||||
// attach the input data to all nodes that need it
|
||||
// TODO: not great - should be able to do this without modifying the compute graph (see next TODO below)
|
||||
for (int i = idx_l0; i < idx_l1; i++) {
|
||||
if (gf->nodes[i]->src0 == gf->nodes[idx_l0]) {
|
||||
gf->nodes[i]->src0 = inp0;
|
||||
|
@ -1342,6 +1342,10 @@ static bool llama_eval_internal(
|
||||
|
||||
inpL = ggml_get_rows(ctx0, model.tok_embeddings, inp_tokens);
|
||||
} else {
|
||||
#ifdef GGML_USE_MPI
|
||||
GGML_ASSERT(false && "not implemented");
|
||||
#endif
|
||||
|
||||
inpL = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N);
|
||||
memcpy(inpL->data, embd, N * n_embd * ggml_element_size(inpL));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user