gguf : add comments

2025-01-06 02:48:57 +01:00 · 2023-07-26 22:56:26 +03:00 · 2023-07-26 22:56:26 +03:00 · d8491fc7e3
commit d8491fc7e3
parent 5628ec7163
1 changed files with 125 additions and 109 deletions
--- a/ggml.c
+++ b/ggml.c
@ -18407,106 +18407,120 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
    // offset from start of file
    size_t offset = 0;
    // check the magic before making allocations
    uint32_t magic = 0;
-    gguf_fread_el(&magic, sizeof(magic), file, &offset);
+
-    if (magic != GGUF_MAGIC) {
+    // check the magic before making allocations
-        fprintf(stderr, "%s: invalid magic number %08x\n", __func__, magic);
+    {
-        fclose(file);
+        gguf_fread_el(&magic, sizeof(magic), file, &offset);
-        return NULL;
+
        if (magic != GGUF_MAGIC) {
            fprintf(stderr, "%s: invalid magic number %08x\n", __func__, magic);
            fclose(file);
            return NULL;
        }
    }
    bool ok = true;
    struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
-    ctx->header.magic = magic;
+    // read the header
-    ctx->header.kv    = NULL;
+    {
        ctx->header.magic = magic;
        ctx->header.kv    = NULL;
-    ctx->infos = NULL;
+        ctx->infos = NULL;
-    ctx->data  = NULL;
+        ctx->data  = NULL;
-    ok = ok && gguf_fread_el(&ctx->header.version,   sizeof(ctx->header.version),   file, &offset);
+        ok = ok && gguf_fread_el(&ctx->header.version,   sizeof(ctx->header.version),   file, &offset);
-    ok = ok && gguf_fread_el(&ctx->header.n_tensors, sizeof(ctx->header.n_tensors), file, &offset);
+        ok = ok && gguf_fread_el(&ctx->header.n_tensors, sizeof(ctx->header.n_tensors), file, &offset);
-    ok = ok && gguf_fread_el(&ctx->header.n_kv,      sizeof(ctx->header.n_kv),      file, &offset);
+        ok = ok && gguf_fread_el(&ctx->header.n_kv,      sizeof(ctx->header.n_kv),      file, &offset);
    if (!ok) {
        fprintf(stderr, "%s: failed to read header\n", __func__);
        fclose(file);
        gguf_free(ctx);
        return NULL;
    }
    ctx->header.kv = GGML_ALIGNED_MALLOC(ctx->header.n_kv * sizeof(struct gguf_kv));
    for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
        struct gguf_kv * kv = &ctx->header.kv[i];
        //fprintf(stderr, "%s: reading kv %d\n", __func__, i);
        ok = ok && gguf_fread_str(&kv->key,                          file, &offset);
      //ok = ok && gguf_fread_el (&kv->n_bytes, sizeof(kv->n_bytes), file, &offset);
        ok = ok && gguf_fread_el (&kv->type,    sizeof(kv->type),    file, &offset);
        //fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data);
        switch (kv->type) {
            case GGUF_TYPE_UINT8:   ok = ok && gguf_fread_el (&kv->value.uint8,   sizeof(kv->value.uint8),   file, &offset); break;
            case GGUF_TYPE_INT8:    ok = ok && gguf_fread_el (&kv->value.int8,    sizeof(kv->value.int8),    file, &offset); break;
            case GGUF_TYPE_UINT16:  ok = ok && gguf_fread_el (&kv->value.uint16,  sizeof(kv->value.uint16),  file, &offset); break;
            case GGUF_TYPE_INT16:   ok = ok && gguf_fread_el (&kv->value.int16,   sizeof(kv->value.int16),   file, &offset); break;
            case GGUF_TYPE_UINT32:  ok = ok && gguf_fread_el (&kv->value.uint32,  sizeof(kv->value.uint32),  file, &offset); break;
            case GGUF_TYPE_INT32:   ok = ok && gguf_fread_el (&kv->value.int32,   sizeof(kv->value.int32),   file, &offset); break;
            case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (&kv->value.float32, sizeof(kv->value.float32), file, &offset); break;
            case GGUF_TYPE_BOOL:    ok = ok && gguf_fread_el (&kv->value.bool_,   sizeof(kv->value.bool_),   file, &offset); break;
            case GGUF_TYPE_STRING:  ok = ok && gguf_fread_str(&kv->value.str,                                file, &offset); break;
            case GGUF_TYPE_ARRAY:
                                    GGML_ASSERT("gguf: array type not implemented");
                                    break;
        };
        if (!ok) {
-            break;
+            fprintf(stderr, "%s: failed to read header\n", __func__);
        }
    }
    if (!ok) {
        fprintf(stderr, "%s: failed to read key-value pairs\n", __func__);
        fclose(file);
        gguf_free(ctx);
        return NULL;
    }
    ctx->infos = GGML_ALIGNED_MALLOC(ctx->header.n_tensors * sizeof(struct gguf_tensor_info));
    for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
        struct gguf_tensor_info * info = &ctx->infos[i];
        for (int j = 0; j < GGML_MAX_DIMS; ++j) {
            info->ne[j] = 1;
        }
        ok = ok && gguf_fread_str(&info->name,                          file, &offset);
        ok = ok && gguf_fread_el (&info->n_dims, sizeof(info->n_dims),  file, &offset);
        for (uint32_t j = 0; j < info->n_dims; ++j) {
            ok = ok && gguf_fread_el(&info->ne[j], sizeof(info->ne[j]), file, &offset);
        }
      //ok = ok && gguf_fread_el (&info->n_elms, sizeof(info->n_elms),  file, &offset);
        ok = ok && gguf_fread_el (&info->type,    sizeof(info->type),   file, &offset);
        ok = ok && gguf_fread_el (&info->offset,  sizeof(info->offset), file, &offset);
        if (!ok) {
            fprintf(stderr, "%s: failed to read tensor info\n", __func__);
            fclose(file);
            gguf_free(ctx);
            return NULL;
        }
    }
    // read the kv pairs
    {
        ctx->header.kv = GGML_ALIGNED_MALLOC(ctx->header.n_kv * sizeof(struct gguf_kv));
        for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
            struct gguf_kv * kv = &ctx->header.kv[i];
            //fprintf(stderr, "%s: reading kv %d\n", __func__, i);
            ok = ok && gguf_fread_str(&kv->key,                          file, &offset);
          //ok = ok && gguf_fread_el (&kv->n_bytes, sizeof(kv->n_bytes), file, &offset);
            ok = ok && gguf_fread_el (&kv->type,    sizeof(kv->type),    file, &offset);
            //fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data);
            switch (kv->type) {
                case GGUF_TYPE_UINT8:   ok = ok && gguf_fread_el (&kv->value.uint8,   sizeof(kv->value.uint8),   file, &offset); break;
                case GGUF_TYPE_INT8:    ok = ok && gguf_fread_el (&kv->value.int8,    sizeof(kv->value.int8),    file, &offset); break;
                case GGUF_TYPE_UINT16:  ok = ok && gguf_fread_el (&kv->value.uint16,  sizeof(kv->value.uint16),  file, &offset); break;
                case GGUF_TYPE_INT16:   ok = ok && gguf_fread_el (&kv->value.int16,   sizeof(kv->value.int16),   file, &offset); break;
                case GGUF_TYPE_UINT32:  ok = ok && gguf_fread_el (&kv->value.uint32,  sizeof(kv->value.uint32),  file, &offset); break;
                case GGUF_TYPE_INT32:   ok = ok && gguf_fread_el (&kv->value.int32,   sizeof(kv->value.int32),   file, &offset); break;
                case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (&kv->value.float32, sizeof(kv->value.float32), file, &offset); break;
                case GGUF_TYPE_BOOL:    ok = ok && gguf_fread_el (&kv->value.bool_,   sizeof(kv->value.bool_),   file, &offset); break;
                case GGUF_TYPE_STRING:  ok = ok && gguf_fread_str(&kv->value.str,                                file, &offset); break;
                case GGUF_TYPE_ARRAY:
                                        GGML_ASSERT("gguf: array type not implemented");
                                        break;
            };
            if (!ok) {
                break;
            }
        }
        if (!ok) {
            fprintf(stderr, "%s: failed to read key-value pairs\n", __func__);
            fclose(file);
            gguf_free(ctx);
            return NULL;
        }
    }
    // read the tensor infos
    {
        ctx->infos = GGML_ALIGNED_MALLOC(ctx->header.n_tensors * sizeof(struct gguf_tensor_info));
        for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
            struct gguf_tensor_info * info = &ctx->infos[i];
            for (int j = 0; j < GGML_MAX_DIMS; ++j) {
                info->ne[j] = 1;
            }
            ok = ok && gguf_fread_str(&info->name,                          file, &offset);
            ok = ok && gguf_fread_el (&info->n_dims, sizeof(info->n_dims),  file, &offset);
            for (uint32_t j = 0; j < info->n_dims; ++j) {
                ok = ok && gguf_fread_el(&info->ne[j], sizeof(info->ne[j]), file, &offset);
            }
          //ok = ok && gguf_fread_el (&info->n_elms, sizeof(info->n_elms),  file, &offset);
            ok = ok && gguf_fread_el (&info->type,   sizeof(info->type),    file, &offset);
            ok = ok && gguf_fread_el (&info->offset, sizeof(info->offset),  file, &offset);
            if (!ok) {
                fprintf(stderr, "%s: failed to read tensor info\n", __func__);
                fclose(file);
                gguf_free(ctx);
                return NULL;
            }
        }
    }
    ctx->alignment = GGUF_DEFAULT_ALIGNMENT;
    // TODO: determine new alignment from kv if available
    // we require the data section to be aligned, so take into account any padding
    {
        const size_t offset_pad = offset % ctx->alignment;
@ -18516,38 +18530,46 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
        }
    }
    // store the current file offset - this is where the data section starts
    ctx->offset = offset;
-    ctx->size_data = 0;
+    // compute the total size of the data section, taking into account the alignment
    {
-    for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
+        ctx->size_data = 0;
-        struct gguf_tensor_info * info = &ctx->infos[i];
+        for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
            struct gguf_tensor_info * info = &ctx->infos[i];
-        const int64_t ne =
+            const int64_t ne =
-            (int64_t) info->ne[0] *
+                (int64_t) info->ne[0] *
-            (int64_t) info->ne[1] *
+                (int64_t) info->ne[1] *
-            (int64_t) info->ne[2] *
+                (int64_t) info->ne[2] *
-            (int64_t) info->ne[3];
+                (int64_t) info->ne[3];
-        if (ne % ggml_blck_size(info->type) != 0) {
+            if (ne % ggml_blck_size(info->type) != 0) {
-            fprintf(stderr, "%s: tensor '%s' number of elements (%" PRId64 ") is not a multiple of block size (%d)\n",
+                fprintf(stderr, "%s: tensor '%s' number of elements (%" PRId64 ") is not a multiple of block size (%d)\n",
-                    __func__, info->name.data, ne, ggml_blck_size(info->type));
+                        __func__, info->name.data, ne, ggml_blck_size(info->type));
-            fclose(file);
+                fclose(file);
-            gguf_free(ctx);
+                gguf_free(ctx);
-            return NULL;
+                return NULL;
            }
            const size_t size_cur = (ne*ggml_type_size(info->type))/ggml_blck_size(info->type);
            ctx->size_data += GGML_PAD(size_cur, ctx->alignment);
        }
        const size_t size_cur = (ne*ggml_type_size(info->type))/ggml_blck_size(info->type);
        ctx->size_data += GGML_PAD(size_cur, ctx->alignment);
    }
-    // load the tensor data
+    // load the tensor data only if requested
    // TODO: simplify
    if (params.ctx != NULL) {
        // if the provided gguf_context is no_alloc, then we create "empty" tensors and do not read the binary blob
        // otherwise, we load the binary blob into the created ggml_context as well, and point the "data" members of
        // the ggml_tensor structs to the appropriate locations in the binary blob
        // compute the exact size needed for the new ggml_context
        const size_t mem_size =
            params.no_alloc ?
-            (ctx->header.n_tensors + 1)*ggml_tensor_overhead() :
+            (ctx->header.n_tensors    )*ggml_tensor_overhead() :
            (ctx->header.n_tensors + 1)*ggml_tensor_overhead() + ctx->size_data;
        struct ggml_init_params pdata = {
@ -18567,7 +18589,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
            ok = ok && data != NULL;
-            // read the tensor data
+            // read the binary blob with the tensor data
            ok = ok && gguf_fread_el(data->data, ctx->size_data, file, &offset);
            if (!ok) {
@ -18602,6 +18624,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
                break;
            }
            // point the data member to the appropriate location in the binary blob using the tensor infos
            if (params.no_alloc == false) {
              //cur->data = (char *) data->data + ctx->infos[i].offset - ctx->offset; // offset from start of file
                cur->data = (char *) data->data + ctx->infos[i].offset;               // offset from data
@ -18609,7 +18632,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
        }
        if (!ok) {
-            fprintf(stderr, "%s: failed to create tensors\n", __func__);
+            fprintf(stderr, "%s: failed to read the tensor data\n", __func__);
            fclose(file);
            ggml_free(ctx_data);
            gguf_free(ctx);
@ -18619,13 +18642,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
        ggml_set_no_alloc(ctx_data, params.no_alloc);
    }
    if (!ok) {
        fprintf(stderr, "%s: failed to read tensor data\n", __func__);
        fclose(file);
        gguf_free(ctx);
        return NULL;
    }
    return ctx;
 }