rpc : fix nkvo

This commit is contained in:
slaren 2024-09-07 03:24:47 +02:00
parent 9b2c24c099
commit c3e2bb6dcf
3 changed files with 16 additions and 10 deletions

View File

@ -2544,7 +2544,11 @@ GGML_CALL static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t
for (int i = 0; i < cgraph->n_nodes; i++) { for (int i = 0; i < cgraph->n_nodes; i++) {
ggml_tensor * node = cgraph->nodes[i]; ggml_tensor * node = cgraph->nodes[i];
if (node->src[0] && ggml_backend_buffer_is_cuda_split(node->src[0]->buffer)) { if (ggml_is_empty(node) || node->op == GGML_OP_RESHAPE || node->op == GGML_OP_TRANSPOSE || node->op == GGML_OP_VIEW || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_NONE) {
continue;
}
if (node->src[0] && node->src[0]->buffer && ggml_backend_buffer_is_cuda_split(node->src[0]->buffer)) {
use_cuda_graph = false; // Split buffers are not supported by CUDA graph capture use_cuda_graph = false; // Split buffers are not supported by CUDA graph capture
#ifndef NDEBUG #ifndef NDEBUG
GGML_CUDA_LOG_WARN("%s: disabling CUDA graphs due to split buffer\n", __func__); GGML_CUDA_LOG_WARN("%s: disabling CUDA graphs due to split buffer\n", __func__);

View File

@ -883,15 +883,17 @@ ggml_tensor * rpc_server::deserialize_tensor(struct ggml_context * ctx, const rp
} }
result->buffer = reinterpret_cast<ggml_backend_buffer_t>(tensor->buffer); result->buffer = reinterpret_cast<ggml_backend_buffer_t>(tensor->buffer);
if (result->buffer && buffers.find(result->buffer) == buffers.end()) { if (result->buffer && buffers.find(result->buffer) == buffers.end()) {
return nullptr; result->buffer = nullptr;
} }
if (result->buffer) {
// require that the tensor data does not go beyond the buffer end // require that the tensor data does not go beyond the buffer end
uint64_t tensor_size = (uint64_t) ggml_nbytes(result); uint64_t tensor_size = (uint64_t) ggml_nbytes(result);
uint64_t buffer_start = (uint64_t) ggml_backend_buffer_get_base(result->buffer); uint64_t buffer_start = (uint64_t) ggml_backend_buffer_get_base(result->buffer);
uint64_t buffer_size = (uint64_t) ggml_backend_buffer_get_size(result->buffer); uint64_t buffer_size = (uint64_t) ggml_backend_buffer_get_size(result->buffer);
GGML_ASSERT(tensor->data + tensor_size >= tensor->data); // check for overflow GGML_ASSERT(tensor->data + tensor_size >= tensor->data); // check for overflow
GGML_ASSERT(tensor->data >= buffer_start && tensor->data + tensor_size <= buffer_start + buffer_size); GGML_ASSERT(tensor->data >= buffer_start && tensor->data + tensor_size <= buffer_start + buffer_size);
}
result->op = (ggml_op) tensor->op; result->op = (ggml_op) tensor->op;
for (uint32_t i = 0; i < GGML_MAX_OP_PARAMS / sizeof(int32_t); i++) { for (uint32_t i = 0; i < GGML_MAX_OP_PARAMS / sizeof(int32_t); i++) {
@ -1060,7 +1062,7 @@ bool rpc_server::graph_compute(const std::vector<uint8_t> & input, std::vector<u
const rpc_tensor * tensors = (const rpc_tensor *)(input.data() + sizeof(n_nodes) + n_nodes*sizeof(uint64_t) + sizeof(n_tensors)); const rpc_tensor * tensors = (const rpc_tensor *)(input.data() + sizeof(n_nodes) + n_nodes*sizeof(uint64_t) + sizeof(n_tensors));
GGML_PRINT_DEBUG("[%s] n_nodes: %u, n_tensors: %u\n", __func__, n_nodes, n_tensors); GGML_PRINT_DEBUG("[%s] n_nodes: %u, n_tensors: %u\n", __func__, n_nodes, n_tensors);
static size_t buf_size = ggml_tensor_overhead()*(n_nodes + n_tensors) + ggml_graph_overhead_custom(n_nodes, false); static size_t buf_size = ggml_tensor_overhead()*(n_nodes*20 + n_tensors) + ggml_graph_overhead_custom(n_nodes, false);
struct ggml_init_params params = { struct ggml_init_params params = {
/*.mem_size =*/ buf_size, /*.mem_size =*/ buf_size,
/*.mem_buffer =*/ NULL, /*.mem_buffer =*/ NULL,

View File

@ -3847,7 +3847,7 @@ static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml
if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) { if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n", GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
__func__, cur_end + size_needed, ctx->mem_size); __func__, cur_end + size_needed + GGML_OBJECT_SIZE, ctx->mem_size);
assert(false); assert(false);
return NULL; return NULL;
} }