mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-13 05:42:22 +01:00
ggml : sync ggml core (minor additions, e.g. ggml_get_tensor_by_name())
This commit is contained in:
parent
66874d4fbc
commit
bdbda1b17a
46
ggml.c
46
ggml.c
@ -3494,7 +3494,7 @@ static bool GGML_IS_QUANTIZED[GGML_TYPE_COUNT] = {
|
|||||||
};
|
};
|
||||||
static_assert(GGML_TYPE_COUNT == 13, "GGML_IS_QUANTIZED is outdated");
|
static_assert(GGML_TYPE_COUNT == 13, "GGML_IS_QUANTIZED is outdated");
|
||||||
|
|
||||||
static const char * GGML_OP_LABEL[GGML_OP_COUNT] = {
|
static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
||||||
"NONE",
|
"NONE",
|
||||||
|
|
||||||
"DUP",
|
"DUP",
|
||||||
@ -3749,6 +3749,9 @@ const char * ggml_type_name(enum ggml_type type) {
|
|||||||
return GGML_TYPE_NAME[type];
|
return GGML_TYPE_NAME[type];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const char * ggml_op_name(enum ggml_op op) {
|
||||||
|
return GGML_OP_NAME[op];
|
||||||
|
}
|
||||||
|
|
||||||
size_t ggml_element_size(const struct ggml_tensor * tensor) {
|
size_t ggml_element_size(const struct ggml_tensor * tensor) {
|
||||||
return GGML_TYPE_SIZE[tensor->type];
|
return GGML_TYPE_SIZE[tensor->type];
|
||||||
@ -4017,6 +4020,10 @@ size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch)
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) {
|
||||||
|
ctx->no_alloc = no_alloc;
|
||||||
|
}
|
||||||
|
|
||||||
// IMPORTANT:
|
// IMPORTANT:
|
||||||
// when creating "opt" tensors, always save and load the scratch buffer
|
// when creating "opt" tensors, always save and load the scratch buffer
|
||||||
// this is an error prone process, but it is necessary to support inplace
|
// this is an error prone process, but it is necessary to support inplace
|
||||||
@ -4061,7 +4068,7 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
|||||||
struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
|
struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
|
||||||
|
|
||||||
if (ctx->scratch.data == NULL || data != NULL) {
|
if (ctx->scratch.data == NULL || data != NULL) {
|
||||||
size_needed += sizeof(struct ggml_tensor);
|
size_needed += GGML_TENSOR_SIZE;
|
||||||
|
|
||||||
if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
|
if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
|
||||||
GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
|
GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
|
||||||
@ -4077,14 +4084,15 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
|||||||
};
|
};
|
||||||
} else {
|
} else {
|
||||||
if (ctx->scratch.offs + size_needed > ctx->scratch.size) {
|
if (ctx->scratch.offs + size_needed > ctx->scratch.size) {
|
||||||
GGML_PRINT("%s: not enough space in the scratch memory\n", __func__);
|
GGML_PRINT("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n",
|
||||||
|
__func__, ctx->scratch.offs + size_needed, ctx->scratch.size);
|
||||||
assert(false);
|
assert(false);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cur_end + sizeof(struct ggml_tensor) + GGML_OBJECT_SIZE > ctx->mem_size) {
|
if (cur_end + GGML_TENSOR_SIZE + GGML_OBJECT_SIZE > ctx->mem_size) {
|
||||||
GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
|
GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
|
||||||
__func__, cur_end + sizeof(struct ggml_tensor) + GGML_OBJECT_SIZE, ctx->mem_size);
|
__func__, cur_end + GGML_TENSOR_SIZE + GGML_OBJECT_SIZE, ctx->mem_size);
|
||||||
assert(false);
|
assert(false);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
@ -4093,7 +4101,7 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
|||||||
|
|
||||||
*obj_new = (struct ggml_object) {
|
*obj_new = (struct ggml_object) {
|
||||||
.offs = cur_end + GGML_OBJECT_SIZE,
|
.offs = cur_end + GGML_OBJECT_SIZE,
|
||||||
.size = sizeof(struct ggml_tensor),
|
.size = GGML_TENSOR_SIZE,
|
||||||
.next = NULL,
|
.next = NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -13792,11 +13800,19 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor *
|
|||||||
// reached a leaf node, not part of the gradient graph (e.g. a constant)
|
// reached a leaf node, not part of the gradient graph (e.g. a constant)
|
||||||
GGML_ASSERT(cgraph->n_leafs < GGML_MAX_NODES);
|
GGML_ASSERT(cgraph->n_leafs < GGML_MAX_NODES);
|
||||||
|
|
||||||
|
if (strlen(node->name) == 0) {
|
||||||
|
snprintf(node->name, sizeof(node->name), "leaf_%d", cgraph->n_leafs);
|
||||||
|
}
|
||||||
|
|
||||||
cgraph->leafs[cgraph->n_leafs] = node;
|
cgraph->leafs[cgraph->n_leafs] = node;
|
||||||
cgraph->n_leafs++;
|
cgraph->n_leafs++;
|
||||||
} else {
|
} else {
|
||||||
GGML_ASSERT(cgraph->n_nodes < GGML_MAX_NODES);
|
GGML_ASSERT(cgraph->n_nodes < GGML_MAX_NODES);
|
||||||
|
|
||||||
|
if (strlen(node->name) == 0) {
|
||||||
|
snprintf(node->name, sizeof(node->name), "node_%d", cgraph->n_nodes);
|
||||||
|
}
|
||||||
|
|
||||||
cgraph->nodes[cgraph->n_nodes] = node;
|
cgraph->nodes[cgraph->n_nodes] = node;
|
||||||
cgraph->grads[cgraph->n_nodes] = node->grad;
|
cgraph->grads[cgraph->n_nodes] = node->grad;
|
||||||
cgraph->n_nodes++;
|
cgraph->n_nodes++;
|
||||||
@ -14510,6 +14526,18 @@ void ggml_graph_reset(struct ggml_cgraph * cgraph) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor * ggml_get_tensor_by_name(struct ggml_cgraph * cgraph, const char * name) {
|
||||||
|
for (int i = 0; i < cgraph->n_nodes; i++) {
|
||||||
|
struct ggml_tensor * node = cgraph->nodes[i];
|
||||||
|
|
||||||
|
if (strcmp(node->name, name) == 0) {
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
||||||
int64_t perf_total_per_op_us[GGML_OP_COUNT] = {0};
|
int64_t perf_total_per_op_us[GGML_OP_COUNT] = {0};
|
||||||
|
|
||||||
@ -14527,7 +14555,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|||||||
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n",
|
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n",
|
||||||
i,
|
i,
|
||||||
node->ne[0], node->ne[1], node->ne[2],
|
node->ne[0], node->ne[1], node->ne[2],
|
||||||
GGML_OP_LABEL[node->op], node->is_param ? "x" : node->grad ? "g" : " ", node->perf_runs,
|
GGML_OP_NAME[node->op], node->is_param ? "x" : node->grad ? "g" : " ", node->perf_runs,
|
||||||
(double) node->perf_cycles / (double) ggml_cycles_per_ms(),
|
(double) node->perf_cycles / (double) ggml_cycles_per_ms(),
|
||||||
(double) node->perf_cycles / (double) ggml_cycles_per_ms() / (double) node->perf_runs,
|
(double) node->perf_cycles / (double) ggml_cycles_per_ms() / (double) node->perf_runs,
|
||||||
(double) node->perf_time_us / 1000.0,
|
(double) node->perf_time_us / 1000.0,
|
||||||
@ -14541,7 +14569,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|||||||
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s\n",
|
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s\n",
|
||||||
i,
|
i,
|
||||||
node->ne[0], node->ne[1],
|
node->ne[0], node->ne[1],
|
||||||
GGML_OP_LABEL[node->op]);
|
GGML_OP_NAME[node->op]);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < GGML_OP_COUNT; i++) {
|
for (int i = 0; i < GGML_OP_COUNT; i++) {
|
||||||
@ -14549,7 +14577,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_PRINT("perf_total_per_op_us[%16s] = %7.3f ms\n", GGML_OP_LABEL[i], (double) perf_total_per_op_us[i] / 1000.0);
|
GGML_PRINT("perf_total_per_op_us[%16s] = %7.3f ms\n", GGML_OP_NAME[i], (double) perf_total_per_op_us[i] / 1000.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_PRINT("========================================\n");
|
GGML_PRINT("========================================\n");
|
||||||
|
12
ggml.h
12
ggml.h
@ -198,6 +198,7 @@
|
|||||||
#define GGML_MAX_PARAMS 256
|
#define GGML_MAX_PARAMS 256
|
||||||
#define GGML_MAX_CONTEXTS 64
|
#define GGML_MAX_CONTEXTS 64
|
||||||
#define GGML_MAX_OPT 4
|
#define GGML_MAX_OPT 4
|
||||||
|
#define GGML_MAX_NAME 32
|
||||||
#define GGML_DEFAULT_N_THREADS 4
|
#define GGML_DEFAULT_N_THREADS 4
|
||||||
|
|
||||||
#define GGML_ASSERT(x) \
|
#define GGML_ASSERT(x) \
|
||||||
@ -372,11 +373,16 @@ extern "C" {
|
|||||||
|
|
||||||
void * data;
|
void * data;
|
||||||
|
|
||||||
char name[32];
|
char name[GGML_MAX_NAME];
|
||||||
|
|
||||||
char padding[16];
|
char padding[16];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
|
||||||
|
|
||||||
|
// use this to compute the memory overhead of a tensor
|
||||||
|
static const size_t GGML_TENSOR_OVERHEAD = (GGML_OBJECT_SIZE + GGML_TENSOR_SIZE + 16);
|
||||||
|
|
||||||
// computation graph
|
// computation graph
|
||||||
struct ggml_cgraph {
|
struct ggml_cgraph {
|
||||||
int n_nodes;
|
int n_nodes;
|
||||||
@ -429,6 +435,7 @@ extern "C" {
|
|||||||
GGML_API float ggml_type_sizef(enum ggml_type type); // ggml_type_size()/ggml_blck_size() as float
|
GGML_API float ggml_type_sizef(enum ggml_type type); // ggml_type_size()/ggml_blck_size() as float
|
||||||
|
|
||||||
GGML_API const char * ggml_type_name(enum ggml_type type);
|
GGML_API const char * ggml_type_name(enum ggml_type type);
|
||||||
|
GGML_API const char * ggml_op_name (enum ggml_op op);
|
||||||
|
|
||||||
GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
|
GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
|
||||||
|
|
||||||
@ -445,6 +452,7 @@ extern "C" {
|
|||||||
GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);
|
GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);
|
||||||
|
|
||||||
GGML_API size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch);
|
GGML_API size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch);
|
||||||
|
GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
|
||||||
|
|
||||||
GGML_API struct ggml_tensor * ggml_new_tensor(
|
GGML_API struct ggml_tensor * ggml_new_tensor(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
@ -970,6 +978,8 @@ extern "C" {
|
|||||||
GGML_API void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph);
|
GGML_API void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph);
|
||||||
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph);
|
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph);
|
||||||
|
|
||||||
|
GGML_API struct ggml_tensor * ggml_get_tensor_by_name(struct ggml_cgraph * cgraph, const char * name);
|
||||||
|
|
||||||
// print info and performance information for the graph
|
// print info and performance information for the graph
|
||||||
GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
|
GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user