gguf : add array support

2025-01-03 17:51:09 +01:00 · 2023-07-27 14:53:07 +03:00 · 2023-07-27 14:53:07 +03:00 · d2b6ca13ad
commit d2b6ca13ad
parent d89533dff6
3 changed files with 135 additions and 20 deletions
--- a/examples/gguf/gguf.cpp
+++ b/examples/gguf/gguf.cpp
@ -29,7 +29,7 @@ void gguf_ex_write_u64(std::ofstream & fout, size_t val) {
 }
 template<typename T>
-void gguf_ex_write_param(std::ofstream & fout, const std::string & key, enum gguf_type type, const T & val) {
+void gguf_ex_write_val(std::ofstream & fout, const std::string & key, enum gguf_type type, const T & val) {
    gguf_ex_write_str(fout, key);
    fout.write((const char *) &type, sizeof(type));
    fout.write((const char *) &val,  sizeof(val));
@ -38,13 +38,65 @@ void gguf_ex_write_param(std::ofstream & fout, const std::string & key, enum ggu
 }
 template<>
-void gguf_ex_write_param<std::string>(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::string & val) {
+void gguf_ex_write_val<std::string>(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::string & val) {
    gguf_ex_write_str(fout, key);
    fout.write((const char *) &type, sizeof(type));
    const int32_t n = val.size();
    fout.write((const char *) &n, sizeof(n));
    fout.write(val.c_str(), n);
    fprintf(stdout, "%s: write param: %s = %s\n", __func__, key.c_str(), val.c_str());
 }
 template<typename T>
 void gguf_ex_write_arr(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::vector<T> & val) {
    gguf_ex_write_str(fout, key);
    {
        const enum gguf_type tarr = GGUF_TYPE_ARRAY;
        fout.write((const char *) &tarr, sizeof(tarr));
    }
    const int32_t n = val.size();
    fout.write((const char *) &type, sizeof(type));
    fout.write((const char *) &n,    sizeof(n));
    fout.write((const char *) val.data(), n * sizeof(T));
    fprintf(stdout, "%s: write param: %s = [", __func__, key.c_str());
    for (int i = 0; i < n; ++i) {
        fprintf(stdout, "%s", to_string(val[i]).c_str());
        if (i < n - 1) {
            fprintf(stdout, ", ");
        }
    }
    fprintf(stdout, "]\n");
 }
 template<>
 void gguf_ex_write_arr<std::string>(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::vector<std::string> & val) {
    gguf_ex_write_str(fout, key);
    {
        const enum gguf_type tarr = GGUF_TYPE_ARRAY;
        fout.write((const char *) &tarr, sizeof(tarr));
    }
    const int32_t n = val.size();
    fout.write((const char *) &type, sizeof(type));
    fout.write((const char *) &n,    sizeof(n));
    for (int i = 0; i < n; ++i) {
        const int32_t nstr = val[i].size();
        fout.write((const char *) &nstr, sizeof(nstr));
        fout.write(val[i].c_str(), nstr);
    }
    fprintf(stdout, "%s: write param: %s = [", __func__, key.c_str());
    for (int i = 0; i < n; ++i) {
        fprintf(stdout, "%s", val[i].c_str());
        if (i < n - 1) {
            fprintf(stdout, ", ");
        }
    }
    fprintf(stdout, "]\n");
 }
 bool gguf_ex_write(const std::string & fname) {
@ -60,8 +112,9 @@ bool gguf_ex_write(const std::string & fname) {
        fout.write((const char *) &version, sizeof(version));
    }
    // NOTE: these have to match the output below!
    const int n_tensors = 10;
-    const int n_kv = 9;
+    const int n_kv      = 12;
    fout.write((const char*) &n_tensors, sizeof(n_tensors));
    fout.write((const char*) &n_kv, sizeof(n_kv));
@ -70,17 +123,21 @@ bool gguf_ex_write(const std::string & fname) {
    // kv data
    {
-        gguf_ex_write_param< uint8_t>(fout, "some.parameter.uint8",   GGUF_TYPE_UINT8,   0x12);
+        gguf_ex_write_val< uint8_t>(fout, "some.parameter.uint8",   GGUF_TYPE_UINT8,   0x12);
-        gguf_ex_write_param<  int8_t>(fout, "some.parameter.int8",    GGUF_TYPE_INT8,   -0x13);
+        gguf_ex_write_val<  int8_t>(fout, "some.parameter.int8",    GGUF_TYPE_INT8,   -0x13);
-        gguf_ex_write_param<uint16_t>(fout, "some.parameter.uint16",  GGUF_TYPE_UINT16,  0x1234);
+        gguf_ex_write_val<uint16_t>(fout, "some.parameter.uint16",  GGUF_TYPE_UINT16,  0x1234);
-        gguf_ex_write_param< int16_t>(fout, "some.parameter.int16",   GGUF_TYPE_INT16,  -0x1235);
+        gguf_ex_write_val< int16_t>(fout, "some.parameter.int16",   GGUF_TYPE_INT16,  -0x1235);
-        gguf_ex_write_param<uint32_t>(fout, "some.parameter.uint32",  GGUF_TYPE_UINT32,  0x12345678);
+        gguf_ex_write_val<uint32_t>(fout, "some.parameter.uint32",  GGUF_TYPE_UINT32,  0x12345678);
-        gguf_ex_write_param< int32_t>(fout, "some.parameter.int32",   GGUF_TYPE_INT32,  -0x12345679);
+        gguf_ex_write_val< int32_t>(fout, "some.parameter.int32",   GGUF_TYPE_INT32,  -0x12345679);
-        gguf_ex_write_param<float>   (fout, "some.parameter.float32", GGUF_TYPE_FLOAT32, 0.123456789f);
+        gguf_ex_write_val<float>   (fout, "some.parameter.float32", GGUF_TYPE_FLOAT32, 0.123456789f);
-        gguf_ex_write_param<bool>    (fout, "some.parameter.bool",    GGUF_TYPE_BOOL,    true);
+        gguf_ex_write_val<bool>    (fout, "some.parameter.bool",    GGUF_TYPE_BOOL,    true);
-        gguf_ex_write_param<std::string>(fout, "some.parameter.string",  GGUF_TYPE_STRING,  "hello world");
+        gguf_ex_write_val<std::string>(fout, "some.parameter.string",  GGUF_TYPE_STRING,  "hello world");
        gguf_ex_write_arr<int16_t>    (fout, "some.parameter.arr.i16", GGUF_TYPE_INT16,   { 1, 2, 3, 4, });
        gguf_ex_write_arr<float>      (fout, "some.parameter.arr.f32", GGUF_TYPE_FLOAT32, { 3.145f, 2.718f, 1.414f, });
        gguf_ex_write_arr<std::string>(fout, "some.parameter.arr.str", GGUF_TYPE_STRING,  { "hello", "world", "!" });
    }
    uint64_t offset_tensor = 0;
@ -203,13 +260,15 @@ bool gguf_ex_read_0(const std::string & fname) {
        fprintf(stdout, "%s: n_tensors: %d\n", __func__, n_tensors);
        for (int i = 0; i < n_tensors; ++i) {
-            const char * name = gguf_get_tensor_name(ctx, i);
+            const char * name   = gguf_get_tensor_name  (ctx, i);
            const size_t offset = gguf_get_tensor_offset(ctx, i);
            fprintf(stdout, "%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
        }
    }
    gguf_free(ctx);
    return true;
 }
@ -248,7 +307,7 @@ bool gguf_ex_read_1(const std::string & fname) {
        fprintf(stdout, "%s: n_tensors: %d\n", __func__, n_tensors);
        for (int i = 0; i < n_tensors; ++i) {
-            const char * name = gguf_get_tensor_name(ctx, i);
+            const char * name   = gguf_get_tensor_name  (ctx, i);
            const size_t offset = gguf_get_tensor_offset(ctx, i);
            fprintf(stdout, "%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
--- a/ggml.c
+++ b/ggml.c
@ -3698,7 +3698,6 @@ static const size_t GGML_TYPE_SIZE[GGML_TYPE_COUNT] = {
 };
 static_assert(GGML_TYPE_COUNT == 19, "GGML_TYPE_SIZE is outdated");
 static const char * GGML_TYPE_NAME[GGML_TYPE_COUNT] = {
    [GGML_TYPE_F32]  = "f32",
    [GGML_TYPE_F16]  = "f16",
@ -18302,7 +18301,19 @@ struct gguf_str {
    char * data;
 };
-union gguf_value;
+static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = {
    [GGUF_TYPE_UINT8]   = sizeof(uint8_t),
    [GGUF_TYPE_INT8]    = sizeof(int8_t),
    [GGUF_TYPE_UINT16]  = sizeof(uint16_t),
    [GGUF_TYPE_INT16]   = sizeof(int16_t),
    [GGUF_TYPE_UINT32]  = sizeof(uint32_t),
    [GGUF_TYPE_INT32]   = sizeof(int32_t),
    [GGUF_TYPE_FLOAT32] = sizeof(float),
    [GGUF_TYPE_BOOL]    = sizeof(bool),
    [GGUF_TYPE_STRING]  = sizeof(struct gguf_str),
    [GGUF_TYPE_ARRAY]   = 0, // undefined
 };
 static_assert(GGUF_TYPE_COUNT == 10, "GGUF_TYPE_COUNT != 10");
 union gguf_value {
    uint8_t  uint8;
@ -18320,7 +18331,7 @@ union gguf_value {
        enum gguf_type type;
        uint32_t n;
-        union gguf_value * arr;
+        void * data;
    } arr;
 };
@ -18457,8 +18468,35 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
                case GGUF_TYPE_BOOL:    ok = ok && gguf_fread_el (&kv->value.bool_,   sizeof(kv->value.bool_),   file, &offset); break;
                case GGUF_TYPE_STRING:  ok = ok && gguf_fread_str(&kv->value.str,                                file, &offset); break;
                case GGUF_TYPE_ARRAY:
-                                        GGML_ASSERT("gguf: array type not implemented");
+                    {
-                                        break;
+                        ok = ok && gguf_fread_el(&kv->value.arr.type, sizeof(kv->value.arr.type), file, &offset);
                        ok = ok && gguf_fread_el(&kv->value.arr.n,    sizeof(kv->value.arr.n),    file, &offset);
                        switch (kv->value.arr.type) {
                            case GGUF_TYPE_UINT8:
                            case GGUF_TYPE_INT8:
                            case GGUF_TYPE_UINT16:
                            case GGUF_TYPE_INT16:
                            case GGUF_TYPE_UINT32:
                            case GGUF_TYPE_INT32:
                            case GGUF_TYPE_FLOAT32:
                            case GGUF_TYPE_BOOL:
                                {
                                    kv->value.arr.data = malloc(kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
                                    ok = ok && gguf_fread_el(kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type], file, &offset);
                                } break;
                            case GGUF_TYPE_STRING:
                                {
                                    kv->value.arr.data = malloc(kv->value.arr.n * sizeof(struct gguf_str));
                                    for (uint32_t j = 0; j < kv->value.arr.n; ++j) {
                                        ok = ok && gguf_fread_str(&((struct gguf_str *) kv->value.arr.data)[j], file, &offset);
                                    }
                                } break;
                            case GGUF_TYPE_ARRAY:
                            case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
                        };
                    } break;
                case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
            };
            if (!ok) {
@ -18629,6 +18667,8 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
        ggml_set_no_alloc(ctx_data, params.no_alloc);
    }
    fclose(file);
    return ctx;
 }
@ -18651,6 +18691,20 @@ void gguf_free(struct gguf_context * ctx) {
                    free(kv->value.str.data);
                }
            }
            if (kv->type == GGUF_TYPE_ARRAY) {
                if (kv->value.arr.data) {
                    if (kv->value.arr.type == GGUF_TYPE_STRING) {
                        for (uint32_t j = 0; j < kv->value.arr.n; ++j) {
                            struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j];
                            if (str->data) {
                                free(str->data);
                            }
                        }
                    }
                    free(kv->value.arr.data);
                }
            }
        }
        GGML_ALIGNED_FREE(ctx->header.kv);
--- a/ggml.h
+++ b/ggml.h
@ -1631,6 +1631,7 @@ extern "C" {
        GGUF_TYPE_BOOL    = 7,
        GGUF_TYPE_STRING  = 8,
        GGUF_TYPE_ARRAY   = 9,
        GGUF_TYPE_COUNT,       // marks the end of the enum
    };
    struct gguf_context;
@ -1664,7 +1665,8 @@ extern "C" {
    GGML_API float        gguf_get_val_f32 (struct gguf_context * ctx, int i);
    GGML_API bool         gguf_get_val_bool(struct gguf_context * ctx, int i);
    GGML_API const char * gguf_get_val_str (struct gguf_context * ctx, int i);
-    // TODO: arr
+    GGML_API int          gguf_get_arr_n   (struct gguf_context * ctx, int i);
    GGML_API void         gguf_get_arr_data(struct gguf_context * ctx, int i, void * data);
    GGML_API int    gguf_get_n_tensors    (struct gguf_context * ctx);
    GGML_API size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i);