mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-11 21:10:24 +01:00
add tool to allow plotting tensor allocation maps within buffers
This commit is contained in:
parent
5f4dcb1e60
commit
cad8abb49b
@ -71,6 +71,9 @@ GGML_API size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_i
|
|||||||
GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft);
|
GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft);
|
||||||
GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend);
|
GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend);
|
||||||
|
|
||||||
|
// Export tensor allocations in a graph to a file that can be plotted
|
||||||
|
GGML_API void ggml_gallocr_export_allocs(const char * filename, struct ggml_cgraph * graph);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -208,6 +208,9 @@ extern "C" {
|
|||||||
// Set a callback to be called for each resulting node during graph compute
|
// Set a callback to be called for each resulting node during graph compute
|
||||||
GGML_API void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data);
|
GGML_API void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data);
|
||||||
|
|
||||||
|
// internal
|
||||||
|
GGML_API struct ggml_cgraph * ggml_backend_sched_get_graph_copy(ggml_backend_sched_t sched);
|
||||||
|
|
||||||
//
|
//
|
||||||
// Utils
|
// Utils
|
||||||
//
|
//
|
||||||
|
@ -1034,3 +1034,30 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
|
|||||||
ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend) {
|
ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend) {
|
||||||
return ggml_backend_alloc_ctx_tensors_from_buft(ctx, ggml_backend_get_default_buffer_type(backend));
|
return ggml_backend_alloc_ctx_tensors_from_buft(ctx, ggml_backend_get_default_buffer_type(backend));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void export_tensor(FILE * f, struct ggml_tensor * t) {
|
||||||
|
size_t offset = (uintptr_t)t->data - (uintptr_t)ggml_backend_buffer_get_base(t->buffer);
|
||||||
|
// [tensor_id] [tensor_view_src_id] [tensor_view_offs] [tensor_name] [buffer_id] [buffer_name] [offset] [size]
|
||||||
|
fprintf(f, "%p,%p,%zu,\"%s\",%p,\"%s\",%zu,%zu\n",
|
||||||
|
(void *)t, (void *)t->view_src, t->view_offs, t->name,
|
||||||
|
(void *)t->buffer, ggml_backend_buffer_name(t->buffer),
|
||||||
|
offset, ggml_backend_buft_get_alloc_size(t->buffer->buft, t));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void ggml_gallocr_export_allocs(const char * filename, struct ggml_cgraph * graph) {
|
||||||
|
FILE * f = fopen(filename, "wb");
|
||||||
|
|
||||||
|
fprintf(f, "tensor_id,tensor_view_src_id,tensor_view_offs,tensor_name,buffer_id,buffer_name,offset,size\n");
|
||||||
|
|
||||||
|
for (int i = 0; i < graph->n_leafs; i++) {
|
||||||
|
export_tensor(f, graph->leafs[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < graph->n_nodes; i++) {
|
||||||
|
export_tensor(f, graph->nodes[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
fclose(f);
|
||||||
|
}
|
||||||
|
@ -2028,6 +2028,10 @@ ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched,
|
|||||||
return sched->backends[backend_index];
|
return sched->backends[backend_index];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GGML_API struct ggml_cgraph * ggml_backend_sched_get_graph_copy(ggml_backend_sched_t sched) {
|
||||||
|
return &sched->graph;
|
||||||
|
}
|
||||||
|
|
||||||
// utils
|
// utils
|
||||||
|
|
||||||
void ggml_backend_view_init(struct ggml_tensor * tensor) {
|
void ggml_backend_view_init(struct ggml_tensor * tensor) {
|
||||||
|
292
plot-allocs.ipynb
Normal file
292
plot-allocs.ipynb
Normal file
File diff suppressed because one or more lines are too long
@ -14642,6 +14642,13 @@ static int llama_decode_internal(
|
|||||||
|
|
||||||
ggml_backend_sched_alloc_graph(lctx.sched, gf);
|
ggml_backend_sched_alloc_graph(lctx.sched, gf);
|
||||||
|
|
||||||
|
#if 1
|
||||||
|
static int id = 0;
|
||||||
|
printf("saving allocs %d (%d tokens)\n", id, n_tokens);
|
||||||
|
ggml_gallocr_export_allocs(format("allocs%d.csv", id).c_str(), ggml_backend_sched_get_graph_copy(lctx.sched));
|
||||||
|
id++;
|
||||||
|
#endif
|
||||||
|
|
||||||
llama_set_inputs(lctx, u_batch);
|
llama_set_inputs(lctx, u_batch);
|
||||||
|
|
||||||
llama_graph_compute(lctx, gf, n_threads);
|
llama_graph_compute(lctx, gf, n_threads);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user