gguf : add comments

2025-01-06 02:48:57 +01:00 · 2023-07-26 22:56:26 +03:00 · 2023-07-26 22:56:26 +03:00 · d8491fc7e3
commit d8491fc7e3
parent 5628ec7163
1 changed files with 125 additions and 109 deletions
--- a/ggml.c
+++ b/ggml.c
@ -18407,19 +18407,25 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
    // offset from start of file
    size_t offset = 0;

-    // check the magic before making allocations
    uint32_t magic = 0;
+
+    // check the magic before making allocations
+    {
        gguf_fread_el(&magic, sizeof(magic), file, &offset);
+
        if (magic != GGUF_MAGIC) {
            fprintf(stderr, "%s: invalid magic number %08x\n", __func__, magic);
            fclose(file);
            return NULL;
        }
+    }

    bool ok = true;

    struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));

+    // read the header
+    {
        ctx->header.magic = magic;
        ctx->header.kv    = NULL;

@ -18436,7 +18442,10 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
            gguf_free(ctx);
            return NULL;
        }
+    }

+    // read the kv pairs
+    {
        ctx->header.kv = GGML_ALIGNED_MALLOC(ctx->header.n_kv * sizeof(struct gguf_kv));

        for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
@ -18476,7 +18485,10 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
            gguf_free(ctx);
            return NULL;
        }
+    }

+    // read the tensor infos
+    {
        ctx->infos = GGML_ALIGNED_MALLOC(ctx->header.n_tensors * sizeof(struct gguf_tensor_info));

        for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
@ -18502,11 +18514,13 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
                return NULL;
            }
        }
+    }

    ctx->alignment = GGUF_DEFAULT_ALIGNMENT;

    // TODO: determine new alignment from kv if available

+    // we require the data section to be aligned, so take into account any padding
    {
        const size_t offset_pad = offset % ctx->alignment;

@ -18516,10 +18530,13 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
        }
    }

+    // store the current file offset - this is where the data section starts
    ctx->offset = offset;

-    ctx->size_data = 0;
+    // compute the total size of the data section, taking into account the alignment
+    {

+        ctx->size_data = 0;
        for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
            struct gguf_tensor_info * info = &ctx->infos[i];

@ -18541,13 +18558,18 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p

            ctx->size_data += GGML_PAD(size_cur, ctx->alignment);
        }
+    }

-    // load the tensor data
-    // TODO: simplify
+    // load the tensor data only if requested
    if (params.ctx != NULL) {
+        // if the provided gguf_context is no_alloc, then we create "empty" tensors and do not read the binary blob
+        // otherwise, we load the binary blob into the created ggml_context as well, and point the "data" members of
+        // the ggml_tensor structs to the appropriate locations in the binary blob
+
+        // compute the exact size needed for the new ggml_context
        const size_t mem_size =
            params.no_alloc ?
-            (ctx->header.n_tensors + 1)*ggml_tensor_overhead() :
+            (ctx->header.n_tensors    )*ggml_tensor_overhead() :
            (ctx->header.n_tensors + 1)*ggml_tensor_overhead() + ctx->size_data;

        struct ggml_init_params pdata = {
@ -18567,7 +18589,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p

            ok = ok && data != NULL;

-            // read the tensor data
+            // read the binary blob with the tensor data
            ok = ok && gguf_fread_el(data->data, ctx->size_data, file, &offset);

            if (!ok) {
@ -18602,6 +18624,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
                break;
            }

+            // point the data member to the appropriate location in the binary blob using the tensor infos
            if (params.no_alloc == false) {
              //cur->data = (char *) data->data + ctx->infos[i].offset - ctx->offset; // offset from start of file
                cur->data = (char *) data->data + ctx->infos[i].offset;               // offset from data
@ -18609,7 +18632,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
        }

        if (!ok) {
-            fprintf(stderr, "%s: failed to create tensors\n", __func__);
+            fprintf(stderr, "%s: failed to read the tensor data\n", __func__);
            fclose(file);
            ggml_free(ctx_data);
            gguf_free(ctx);
@ -18619,13 +18642,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
        ggml_set_no_alloc(ctx_data, params.no_alloc);
    }

-    if (!ok) {
-        fprintf(stderr, "%s: failed to read tensor data\n", __func__);
-        fclose(file);
-        gguf_free(ctx);
-        return NULL;
-    }
-
    return ctx;
 }