- fix copy_tensor being called on the src buffer instead of the dst buffer

- always initialize views in the view_src buffer

- add RPC backend to Makefile build

- add endpoint to all RPC object names
This commit is contained in:
slaren 2024-05-31 17:05:14 +02:00
parent 6c276deb9d
commit a7060dffdd
5 changed files with 24 additions and 14 deletions

View File

@ -416,6 +416,12 @@ ifdef LLAMA_BLIS
MK_LDFLAGS += -lblis -L/usr/local/lib MK_LDFLAGS += -lblis -L/usr/local/lib
endif # LLAMA_BLIS endif # LLAMA_BLIS
ifdef LLAMA_RPC
MK_CPPFLAGS += -DGGML_USE_RPC
OBJS += ggml-rpc.o
endif # LLAMA_RPC
ifdef LLAMA_CUBLAS ifdef LLAMA_CUBLAS
# LLAMA_CUBLAS is deprecated and will be removed in the future # LLAMA_CUBLAS is deprecated and will be removed in the future
LLAMA_CUDA := 1 LLAMA_CUDA := 1
@ -631,6 +637,12 @@ sgemm.o: sgemm.cpp sgemm.h ggml.h
$(CXX) $(CXXFLAGS) -c $< -o $@ $(CXX) $(CXXFLAGS) -c $< -o $@
endif endif
ifdef LLAMA_RPC
ggml-rpc.o: ggml-rpc.cpp ggml-rpc.h
$(CXX) $(CXXFLAGS) -c $< -o $@
endif # LLAMA_RPC
GF_CC := $(CC) GF_CC := $(CC)
include scripts/get-flags.mk include scripts/get-flags.mk

View File

@ -750,7 +750,7 @@ static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor *
// this tensor was allocated without ggml-backend // this tensor was allocated without ggml-backend
return; return;
} }
ggml_backend_view_init(galloc->buffers[buffer_id], tensor); ggml_backend_view_init(tensor);
} }
} else { } else {
if (tensor->data == NULL) { if (tensor->data == NULL) {
@ -899,12 +899,12 @@ static bool alloc_tensor_range(struct ggml_context * ctx,
if (t->view_src == NULL) { if (t->view_src == NULL) {
ggml_tallocr_alloc(&tallocr, t); ggml_tallocr_alloc(&tallocr, t);
} else if (t->buffer == NULL) { } else if (t->buffer == NULL) {
ggml_backend_view_init(buffer, t); ggml_backend_view_init(t);
} }
} else { } else {
if (t->view_src != NULL && t->buffer == NULL) { if (t->view_src != NULL && t->buffer == NULL) {
// view of a pre-allocated tensor // view of a pre-allocated tensor
ggml_backend_view_init(buffer, t); ggml_backend_view_init(t);
} }
} }
} }

View File

@ -151,7 +151,7 @@ void ggml_backend_buffer_reset(ggml_backend_buffer_t buffer) {
bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst) { bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst) {
ggml_backend_buffer_t dst_buf = dst->view_src ? dst->view_src->buffer : dst->buffer; ggml_backend_buffer_t dst_buf = dst->view_src ? dst->view_src->buffer : dst->buffer;
if (dst_buf->iface.cpy_tensor) { if (dst_buf->iface.cpy_tensor) {
return src->buffer->iface.cpy_tensor(dst_buf, src, dst); return dst_buf->iface.cpy_tensor(dst_buf, src, dst);
} }
return false; return false;
} }
@ -321,9 +321,7 @@ void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst
ggml_backend_tensor_set(dst, src->data, 0, ggml_nbytes(src)); ggml_backend_tensor_set(dst, src->data, 0, ggml_nbytes(src));
} else if (ggml_backend_buffer_is_host(dst->buffer)) { } else if (ggml_backend_buffer_is_host(dst->buffer)) {
ggml_backend_tensor_get(src, dst->data, 0, ggml_nbytes(src)); ggml_backend_tensor_get(src, dst->data, 0, ggml_nbytes(src));
} } else if (!ggml_backend_buffer_copy_tensor(src, dst)) {
bool same_backend = strcmp(ggml_backend_buffer_name(src->buffer), ggml_backend_buffer_name(dst->buffer)) == 0;
if (!same_backend || !ggml_backend_buffer_copy_tensor(src, dst)) {
#ifndef NDEBUG #ifndef NDEBUG
fprintf(stderr, "%s: warning: slow copy from %s to %s\n", __func__, ggml_backend_buffer_name(src->buffer), ggml_backend_buffer_name(dst->buffer)); fprintf(stderr, "%s: warning: slow copy from %s to %s\n", __func__, ggml_backend_buffer_name(src->buffer), ggml_backend_buffer_name(dst->buffer));
#endif #endif
@ -1889,15 +1887,15 @@ ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched,
// utils // utils
void ggml_backend_view_init(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) { void ggml_backend_view_init(struct ggml_tensor * tensor) {
GGML_ASSERT(tensor->buffer == NULL); GGML_ASSERT(tensor->buffer == NULL);
GGML_ASSERT(tensor->view_src != NULL); GGML_ASSERT(tensor->view_src != NULL);
GGML_ASSERT(tensor->view_src->buffer != NULL); GGML_ASSERT(tensor->view_src->buffer != NULL);
GGML_ASSERT(tensor->view_src->data != NULL); GGML_ASSERT(tensor->view_src->data != NULL);
tensor->buffer = buffer; tensor->buffer = tensor->view_src->buffer;
tensor->data = (char *)tensor->view_src->data + tensor->view_offs; tensor->data = (char *)tensor->view_src->data + tensor->view_offs;
ggml_backend_buffer_init_tensor(buffer, tensor); ggml_backend_buffer_init_tensor(tensor->buffer, tensor);
} }
void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr) { void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr) {
@ -1956,7 +1954,7 @@ static void graph_copy_init_tensor(struct ggml_hash_set hash_set, struct ggml_te
struct ggml_tensor * dst = node_copies[id]; struct ggml_tensor * dst = node_copies[id];
if (dst->view_src != NULL) { if (dst->view_src != NULL) {
graph_copy_init_tensor(hash_set, node_copies, node_init, src->view_src); graph_copy_init_tensor(hash_set, node_copies, node_init, src->view_src);
ggml_backend_view_init(dst->view_src->buffer, dst); ggml_backend_view_init(dst);
} }
else { else {
ggml_backend_tensor_copy(src, dst); ggml_backend_tensor_copy(src, dst);

View File

@ -225,7 +225,7 @@ extern "C" {
// Tensor initialization // Tensor initialization
GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr); GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
GGML_API void ggml_backend_view_init(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); GGML_API void ggml_backend_view_init(struct ggml_tensor * tensor);
#ifdef __cplusplus #ifdef __cplusplus

View File

@ -491,7 +491,7 @@ GGML_CALL static ggml_backend_buffer_t ggml_backend_rpc_buffer_type_alloc_buffer
if (remote_ptr != 0) { if (remote_ptr != 0) {
ggml_backend_buffer_t buffer = ggml_backend_buffer_init(buft, ggml_backend_buffer_t buffer = ggml_backend_buffer_init(buft,
ggml_backend_rpc_buffer_interface, ggml_backend_rpc_buffer_interface,
new ggml_backend_rpc_buffer_context{sock, {}, remote_ptr, "RPC"}, new ggml_backend_rpc_buffer_context{sock, {}, remote_ptr, "RPC[" + std::string(buft_ctx->endpoint) + "]"},
remote_size); remote_size);
return buffer; return buffer;
} else { } else {
@ -692,7 +692,7 @@ GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const
GGML_CALL ggml_backend_t ggml_backend_rpc_init(const char * endpoint) { GGML_CALL ggml_backend_t ggml_backend_rpc_init(const char * endpoint) {
ggml_backend_rpc_context * ctx = new ggml_backend_rpc_context { ggml_backend_rpc_context * ctx = new ggml_backend_rpc_context {
/* .endpoint = */ endpoint, /* .endpoint = */ endpoint,
/* .name = */ "RPC", /* .name = */ "RPC[" + std::string(endpoint) + "]",
}; };
ggml_backend_t backend = new ggml_backend { ggml_backend_t backend = new ggml_backend {