diff --git a/examples/gguf/gguf.cpp b/examples/gguf/gguf.cpp index d6a0691d0..c3494a343 100644 --- a/examples/gguf/gguf.cpp +++ b/examples/gguf/gguf.cpp @@ -29,7 +29,7 @@ void gguf_ex_write_u64(std::ofstream & fout, size_t val) { } template -void gguf_ex_write_param(std::ofstream & fout, const std::string & key, enum gguf_type type, const T & val) { +void gguf_ex_write_val(std::ofstream & fout, const std::string & key, enum gguf_type type, const T & val) { gguf_ex_write_str(fout, key); fout.write((const char *) &type, sizeof(type)); fout.write((const char *) &val, sizeof(val)); @@ -38,13 +38,65 @@ void gguf_ex_write_param(std::ofstream & fout, const std::string & key, enum ggu } template<> -void gguf_ex_write_param(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::string & val) { +void gguf_ex_write_val(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::string & val) { gguf_ex_write_str(fout, key); fout.write((const char *) &type, sizeof(type)); const int32_t n = val.size(); fout.write((const char *) &n, sizeof(n)); fout.write(val.c_str(), n); + + fprintf(stdout, "%s: write param: %s = %s\n", __func__, key.c_str(), val.c_str()); +} + +template +void gguf_ex_write_arr(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::vector & val) { + gguf_ex_write_str(fout, key); + { + const enum gguf_type tarr = GGUF_TYPE_ARRAY; + fout.write((const char *) &tarr, sizeof(tarr)); + } + + const int32_t n = val.size(); + fout.write((const char *) &type, sizeof(type)); + fout.write((const char *) &n, sizeof(n)); + fout.write((const char *) val.data(), n * sizeof(T)); + + fprintf(stdout, "%s: write param: %s = [", __func__, key.c_str()); + for (int i = 0; i < n; ++i) { + fprintf(stdout, "%s", to_string(val[i]).c_str()); + if (i < n - 1) { + fprintf(stdout, ", "); + } + } + fprintf(stdout, "]\n"); +} + +template<> +void gguf_ex_write_arr(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::vector & val) { + gguf_ex_write_str(fout, key); + { + const enum gguf_type tarr = GGUF_TYPE_ARRAY; + fout.write((const char *) &tarr, sizeof(tarr)); + } + + const int32_t n = val.size(); + fout.write((const char *) &type, sizeof(type)); + fout.write((const char *) &n, sizeof(n)); + for (int i = 0; i < n; ++i) { + const int32_t nstr = val[i].size(); + fout.write((const char *) &nstr, sizeof(nstr)); + fout.write(val[i].c_str(), nstr); + } + + fprintf(stdout, "%s: write param: %s = [", __func__, key.c_str()); + for (int i = 0; i < n; ++i) { + fprintf(stdout, "%s", val[i].c_str()); + if (i < n - 1) { + fprintf(stdout, ", "); + } + } + fprintf(stdout, "]\n"); } bool gguf_ex_write(const std::string & fname) { @@ -60,8 +112,9 @@ bool gguf_ex_write(const std::string & fname) { fout.write((const char *) &version, sizeof(version)); } + // NOTE: these have to match the output below! const int n_tensors = 10; - const int n_kv = 9; + const int n_kv = 12; fout.write((const char*) &n_tensors, sizeof(n_tensors)); fout.write((const char*) &n_kv, sizeof(n_kv)); @@ -70,17 +123,21 @@ bool gguf_ex_write(const std::string & fname) { // kv data { - gguf_ex_write_param< uint8_t>(fout, "some.parameter.uint8", GGUF_TYPE_UINT8, 0x12); - gguf_ex_write_param< int8_t>(fout, "some.parameter.int8", GGUF_TYPE_INT8, -0x13); - gguf_ex_write_param(fout, "some.parameter.uint16", GGUF_TYPE_UINT16, 0x1234); - gguf_ex_write_param< int16_t>(fout, "some.parameter.int16", GGUF_TYPE_INT16, -0x1235); - gguf_ex_write_param(fout, "some.parameter.uint32", GGUF_TYPE_UINT32, 0x12345678); - gguf_ex_write_param< int32_t>(fout, "some.parameter.int32", GGUF_TYPE_INT32, -0x12345679); + gguf_ex_write_val< uint8_t>(fout, "some.parameter.uint8", GGUF_TYPE_UINT8, 0x12); + gguf_ex_write_val< int8_t>(fout, "some.parameter.int8", GGUF_TYPE_INT8, -0x13); + gguf_ex_write_val(fout, "some.parameter.uint16", GGUF_TYPE_UINT16, 0x1234); + gguf_ex_write_val< int16_t>(fout, "some.parameter.int16", GGUF_TYPE_INT16, -0x1235); + gguf_ex_write_val(fout, "some.parameter.uint32", GGUF_TYPE_UINT32, 0x12345678); + gguf_ex_write_val< int32_t>(fout, "some.parameter.int32", GGUF_TYPE_INT32, -0x12345679); - gguf_ex_write_param (fout, "some.parameter.float32", GGUF_TYPE_FLOAT32, 0.123456789f); - gguf_ex_write_param (fout, "some.parameter.bool", GGUF_TYPE_BOOL, true); + gguf_ex_write_val (fout, "some.parameter.float32", GGUF_TYPE_FLOAT32, 0.123456789f); + gguf_ex_write_val (fout, "some.parameter.bool", GGUF_TYPE_BOOL, true); - gguf_ex_write_param(fout, "some.parameter.string", GGUF_TYPE_STRING, "hello world"); + gguf_ex_write_val(fout, "some.parameter.string", GGUF_TYPE_STRING, "hello world"); + + gguf_ex_write_arr (fout, "some.parameter.arr.i16", GGUF_TYPE_INT16, { 1, 2, 3, 4, }); + gguf_ex_write_arr (fout, "some.parameter.arr.f32", GGUF_TYPE_FLOAT32, { 3.145f, 2.718f, 1.414f, }); + gguf_ex_write_arr(fout, "some.parameter.arr.str", GGUF_TYPE_STRING, { "hello", "world", "!" }); } uint64_t offset_tensor = 0; @@ -203,13 +260,15 @@ bool gguf_ex_read_0(const std::string & fname) { fprintf(stdout, "%s: n_tensors: %d\n", __func__, n_tensors); for (int i = 0; i < n_tensors; ++i) { - const char * name = gguf_get_tensor_name(ctx, i); + const char * name = gguf_get_tensor_name (ctx, i); const size_t offset = gguf_get_tensor_offset(ctx, i); fprintf(stdout, "%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset); } } + gguf_free(ctx); + return true; } @@ -248,7 +307,7 @@ bool gguf_ex_read_1(const std::string & fname) { fprintf(stdout, "%s: n_tensors: %d\n", __func__, n_tensors); for (int i = 0; i < n_tensors; ++i) { - const char * name = gguf_get_tensor_name(ctx, i); + const char * name = gguf_get_tensor_name (ctx, i); const size_t offset = gguf_get_tensor_offset(ctx, i); fprintf(stdout, "%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset); diff --git a/ggml.c b/ggml.c index ebdb6536f..96c7ebd34 100644 --- a/ggml.c +++ b/ggml.c @@ -3698,7 +3698,6 @@ static const size_t GGML_TYPE_SIZE[GGML_TYPE_COUNT] = { }; static_assert(GGML_TYPE_COUNT == 19, "GGML_TYPE_SIZE is outdated"); - static const char * GGML_TYPE_NAME[GGML_TYPE_COUNT] = { [GGML_TYPE_F32] = "f32", [GGML_TYPE_F16] = "f16", @@ -18302,7 +18301,19 @@ struct gguf_str { char * data; }; -union gguf_value; +static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = { + [GGUF_TYPE_UINT8] = sizeof(uint8_t), + [GGUF_TYPE_INT8] = sizeof(int8_t), + [GGUF_TYPE_UINT16] = sizeof(uint16_t), + [GGUF_TYPE_INT16] = sizeof(int16_t), + [GGUF_TYPE_UINT32] = sizeof(uint32_t), + [GGUF_TYPE_INT32] = sizeof(int32_t), + [GGUF_TYPE_FLOAT32] = sizeof(float), + [GGUF_TYPE_BOOL] = sizeof(bool), + [GGUF_TYPE_STRING] = sizeof(struct gguf_str), + [GGUF_TYPE_ARRAY] = 0, // undefined +}; +static_assert(GGUF_TYPE_COUNT == 10, "GGUF_TYPE_COUNT != 10"); union gguf_value { uint8_t uint8; @@ -18320,7 +18331,7 @@ union gguf_value { enum gguf_type type; uint32_t n; - union gguf_value * arr; + void * data; } arr; }; @@ -18457,8 +18468,35 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (&kv->value.bool_, sizeof(kv->value.bool_), file, &offset); break; case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(&kv->value.str, file, &offset); break; case GGUF_TYPE_ARRAY: - GGML_ASSERT("gguf: array type not implemented"); - break; + { + ok = ok && gguf_fread_el(&kv->value.arr.type, sizeof(kv->value.arr.type), file, &offset); + ok = ok && gguf_fread_el(&kv->value.arr.n, sizeof(kv->value.arr.n), file, &offset); + + switch (kv->value.arr.type) { + case GGUF_TYPE_UINT8: + case GGUF_TYPE_INT8: + case GGUF_TYPE_UINT16: + case GGUF_TYPE_INT16: + case GGUF_TYPE_UINT32: + case GGUF_TYPE_INT32: + case GGUF_TYPE_FLOAT32: + case GGUF_TYPE_BOOL: + { + kv->value.arr.data = malloc(kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]); + ok = ok && gguf_fread_el(kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type], file, &offset); + } break; + case GGUF_TYPE_STRING: + { + kv->value.arr.data = malloc(kv->value.arr.n * sizeof(struct gguf_str)); + for (uint32_t j = 0; j < kv->value.arr.n; ++j) { + ok = ok && gguf_fread_str(&((struct gguf_str *) kv->value.arr.data)[j], file, &offset); + } + } break; + case GGUF_TYPE_ARRAY: + case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type"); + }; + } break; + case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type"); }; if (!ok) { @@ -18629,6 +18667,8 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p ggml_set_no_alloc(ctx_data, params.no_alloc); } + fclose(file); + return ctx; } @@ -18651,6 +18691,20 @@ void gguf_free(struct gguf_context * ctx) { free(kv->value.str.data); } } + + if (kv->type == GGUF_TYPE_ARRAY) { + if (kv->value.arr.data) { + if (kv->value.arr.type == GGUF_TYPE_STRING) { + for (uint32_t j = 0; j < kv->value.arr.n; ++j) { + struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j]; + if (str->data) { + free(str->data); + } + } + } + free(kv->value.arr.data); + } + } } GGML_ALIGNED_FREE(ctx->header.kv); diff --git a/ggml.h b/ggml.h index 91588895c..e857b3f14 100644 --- a/ggml.h +++ b/ggml.h @@ -1631,6 +1631,7 @@ extern "C" { GGUF_TYPE_BOOL = 7, GGUF_TYPE_STRING = 8, GGUF_TYPE_ARRAY = 9, + GGUF_TYPE_COUNT, // marks the end of the enum }; struct gguf_context; @@ -1664,7 +1665,8 @@ extern "C" { GGML_API float gguf_get_val_f32 (struct gguf_context * ctx, int i); GGML_API bool gguf_get_val_bool(struct gguf_context * ctx, int i); GGML_API const char * gguf_get_val_str (struct gguf_context * ctx, int i); - // TODO: arr + GGML_API int gguf_get_arr_n (struct gguf_context * ctx, int i); + GGML_API void gguf_get_arr_data(struct gguf_context * ctx, int i, void * data); GGML_API int gguf_get_n_tensors (struct gguf_context * ctx); GGML_API size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i);