gguf : add comments

This commit is contained in:
Georgi Gerganov 2023-07-26 22:56:26 +03:00
parent 5628ec7163
commit d8491fc7e3
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735

44
ggml.c
View File

@ -18407,19 +18407,25 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
// offset from start of file // offset from start of file
size_t offset = 0; size_t offset = 0;
// check the magic before making allocations
uint32_t magic = 0; uint32_t magic = 0;
// check the magic before making allocations
{
gguf_fread_el(&magic, sizeof(magic), file, &offset); gguf_fread_el(&magic, sizeof(magic), file, &offset);
if (magic != GGUF_MAGIC) { if (magic != GGUF_MAGIC) {
fprintf(stderr, "%s: invalid magic number %08x\n", __func__, magic); fprintf(stderr, "%s: invalid magic number %08x\n", __func__, magic);
fclose(file); fclose(file);
return NULL; return NULL;
} }
}
bool ok = true; bool ok = true;
struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context)); struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
// read the header
{
ctx->header.magic = magic; ctx->header.magic = magic;
ctx->header.kv = NULL; ctx->header.kv = NULL;
@ -18436,7 +18442,10 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
gguf_free(ctx); gguf_free(ctx);
return NULL; return NULL;
} }
}
// read the kv pairs
{
ctx->header.kv = GGML_ALIGNED_MALLOC(ctx->header.n_kv * sizeof(struct gguf_kv)); ctx->header.kv = GGML_ALIGNED_MALLOC(ctx->header.n_kv * sizeof(struct gguf_kv));
for (uint32_t i = 0; i < ctx->header.n_kv; ++i) { for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
@ -18476,7 +18485,10 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
gguf_free(ctx); gguf_free(ctx);
return NULL; return NULL;
} }
}
// read the tensor infos
{
ctx->infos = GGML_ALIGNED_MALLOC(ctx->header.n_tensors * sizeof(struct gguf_tensor_info)); ctx->infos = GGML_ALIGNED_MALLOC(ctx->header.n_tensors * sizeof(struct gguf_tensor_info));
for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) { for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
@ -18502,11 +18514,13 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
return NULL; return NULL;
} }
} }
}
ctx->alignment = GGUF_DEFAULT_ALIGNMENT; ctx->alignment = GGUF_DEFAULT_ALIGNMENT;
// TODO: determine new alignment from kv if available // TODO: determine new alignment from kv if available
// we require the data section to be aligned, so take into account any padding
{ {
const size_t offset_pad = offset % ctx->alignment; const size_t offset_pad = offset % ctx->alignment;
@ -18516,10 +18530,13 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
} }
} }
// store the current file offset - this is where the data section starts
ctx->offset = offset; ctx->offset = offset;
ctx->size_data = 0; // compute the total size of the data section, taking into account the alignment
{
ctx->size_data = 0;
for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) { for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
struct gguf_tensor_info * info = &ctx->infos[i]; struct gguf_tensor_info * info = &ctx->infos[i];
@ -18541,13 +18558,18 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
ctx->size_data += GGML_PAD(size_cur, ctx->alignment); ctx->size_data += GGML_PAD(size_cur, ctx->alignment);
} }
}
// load the tensor data // load the tensor data only if requested
// TODO: simplify
if (params.ctx != NULL) { if (params.ctx != NULL) {
// if the provided gguf_context is no_alloc, then we create "empty" tensors and do not read the binary blob
// otherwise, we load the binary blob into the created ggml_context as well, and point the "data" members of
// the ggml_tensor structs to the appropriate locations in the binary blob
// compute the exact size needed for the new ggml_context
const size_t mem_size = const size_t mem_size =
params.no_alloc ? params.no_alloc ?
(ctx->header.n_tensors + 1)*ggml_tensor_overhead() : (ctx->header.n_tensors )*ggml_tensor_overhead() :
(ctx->header.n_tensors + 1)*ggml_tensor_overhead() + ctx->size_data; (ctx->header.n_tensors + 1)*ggml_tensor_overhead() + ctx->size_data;
struct ggml_init_params pdata = { struct ggml_init_params pdata = {
@ -18567,7 +18589,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
ok = ok && data != NULL; ok = ok && data != NULL;
// read the tensor data // read the binary blob with the tensor data
ok = ok && gguf_fread_el(data->data, ctx->size_data, file, &offset); ok = ok && gguf_fread_el(data->data, ctx->size_data, file, &offset);
if (!ok) { if (!ok) {
@ -18602,6 +18624,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
break; break;
} }
// point the data member to the appropriate location in the binary blob using the tensor infos
if (params.no_alloc == false) { if (params.no_alloc == false) {
//cur->data = (char *) data->data + ctx->infos[i].offset - ctx->offset; // offset from start of file //cur->data = (char *) data->data + ctx->infos[i].offset - ctx->offset; // offset from start of file
cur->data = (char *) data->data + ctx->infos[i].offset; // offset from data cur->data = (char *) data->data + ctx->infos[i].offset; // offset from data
@ -18609,7 +18632,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
} }
if (!ok) { if (!ok) {
fprintf(stderr, "%s: failed to create tensors\n", __func__); fprintf(stderr, "%s: failed to read the tensor data\n", __func__);
fclose(file); fclose(file);
ggml_free(ctx_data); ggml_free(ctx_data);
gguf_free(ctx); gguf_free(ctx);
@ -18619,13 +18642,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
ggml_set_no_alloc(ctx_data, params.no_alloc); ggml_set_no_alloc(ctx_data, params.no_alloc);
} }
if (!ok) {
fprintf(stderr, "%s: failed to read tensor data\n", __func__);
fclose(file);
gguf_free(ctx);
return NULL;
}
return ctx; return ctx;
} }