From 5628ec71636e0b390213caa4c273d3ef8bbd7459 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 26 Jul 2023 20:04:22 +0300 Subject: [PATCH] gguf : read / write sample models --- examples/gguf/gguf.cpp | 323 ++++++++++++++++++++++++++++++++++++++++- ggml.c | 119 ++++++++------- ggml.h | 5 +- 3 files changed, 386 insertions(+), 61 deletions(-) diff --git a/examples/gguf/gguf.cpp b/examples/gguf/gguf.cpp index 602de519a..a5c442ac5 100644 --- a/examples/gguf/gguf.cpp +++ b/examples/gguf/gguf.cpp @@ -1,15 +1,326 @@ #include "ggml.h" #include +#include #include +#include +#include +#include -bool gguf_write(const std::string & fname) { +enum gguf_type { + GGUF_TYPE_UINT8 = 0, + GGUF_TYPE_INT8 = 1, + GGUF_TYPE_UINT16 = 2, + GGUF_TYPE_INT16 = 3, + GGUF_TYPE_UINT32 = 4, + GGUF_TYPE_INT32 = 5, + GGUF_TYPE_FLOAT32 = 6, + GGUF_TYPE_BOOL = 7, + GGUF_TYPE_STRING = 8, + GGUF_TYPE_ARRAY = 9, +}; +template +static std::string to_string(const T & val) { + std::stringstream ss; + ss << val; + return ss.str(); +} + +void gguf_ex_write_str(std::ofstream & fout, const std::string & val) { + const int32_t n = val.size(); + fout.write((const char *) &n, sizeof(n)); + fout.write(val.c_str(), n); +} + +void gguf_ex_write_i32(std::ofstream & fout, int32_t val) { + fout.write((const char *) &val, sizeof(val)); +} + +void gguf_ex_write_u64(std::ofstream & fout, size_t val) { + fout.write((const char *) &val, sizeof(val)); +} + +template +void gguf_ex_write_param(std::ofstream & fout, const std::string & key, enum gguf_type type, const T & val) { + gguf_ex_write_str(fout, key); + fout.write((const char *) &type, sizeof(type)); + fout.write((const char *) &val, sizeof(val)); + + fprintf(stdout, "%s: write param: %s = %s\n", __func__, key.c_str(), to_string(val).c_str()); +} + +template<> +void gguf_ex_write_param(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::string & val) { + gguf_ex_write_str(fout, key); + fout.write((const char *) &type, sizeof(type)); + + const int32_t n = val.size(); + fout.write((const char *) &n, sizeof(n)); + fout.write(val.c_str(), n); +} + +bool gguf_ex_write(const std::string & fname) { + std::ofstream fout(fname.c_str(), std::ios::binary); + + { + const int32_t magic = GGUF_MAGIC; + fout.write((const char *) &magic, sizeof(magic)); + } + + { + const int32_t version = GGUF_VERSION; + fout.write((const char *) &version, sizeof(version)); + } + + const int n_tensors = 10; + const int n_kv = 9; + + fout.write((const char*) &n_tensors, sizeof(n_tensors)); + fout.write((const char*) &n_kv, sizeof(n_kv)); + + fprintf(stdout, "%s: write header\n", __func__); + + // kv data + { + gguf_ex_write_param< uint8_t>(fout, "some.parameter.uint8", GGUF_TYPE_UINT8, 0x12); + gguf_ex_write_param< int8_t>(fout, "some.parameter.int8", GGUF_TYPE_INT8, -0x13); + gguf_ex_write_param(fout, "some.parameter.uint16", GGUF_TYPE_UINT16, 0x1234); + gguf_ex_write_param< int16_t>(fout, "some.parameter.int16", GGUF_TYPE_INT16, -0x1235); + gguf_ex_write_param(fout, "some.parameter.uint32", GGUF_TYPE_UINT32, 0x12345678); + gguf_ex_write_param< int32_t>(fout, "some.parameter.int32", GGUF_TYPE_INT32, -0x12345679); + + gguf_ex_write_param (fout, "some.parameter.float32", GGUF_TYPE_FLOAT32, 0.123456789f); + gguf_ex_write_param (fout, "some.parameter.bool", GGUF_TYPE_BOOL, true); + + gguf_ex_write_param(fout, "some.parameter.string", GGUF_TYPE_STRING, "hello world"); + } + + uint64_t offset_tensor = 0; + + struct ggml_init_params params = { + /*.mem_size =*/ 128ull*1024ull*1024ull, + /*.mem_buffer =*/ NULL, + /*.no_alloc =*/ false, + }; + + struct ggml_context * ctx_data = ggml_init(params); + + // tensor infos + for (int i = 0; i < n_tensors; ++i) { + const std::string name = "tensor_" + to_string(i); + + int64_t ne[GGML_MAX_DIMS] = { 1 }; + int32_t n_dims = rand() % GGML_MAX_DIMS + 1; + + for (int j = 0; j < n_dims; ++j) { + ne[j] = rand() % 10 + 1; + } + + struct ggml_tensor * cur = ggml_new_tensor(ctx_data, GGML_TYPE_F32, n_dims, ne); + ggml_set_name(cur, name.c_str()); + + { + float * data = (float *) cur->data; + for (int j = 0; j < ggml_nelements(cur); ++j) { + data[j] = 100 + i; + } + } + + fprintf(stdout, "%s: tensor: %s, %d dims, ne = [", __func__, name.c_str(), n_dims); + for (int j = 0; j < 4; ++j) { + fprintf(stdout, "%s%3d", j == 0 ? "" : ", ", (int) cur->ne[j]); + } + fprintf(stdout, "], offset_tensor = %6" PRIu64 "\n", offset_tensor); + + gguf_ex_write_str(fout, name); + gguf_ex_write_i32(fout, n_dims); + for (int j = 0; j < n_dims; ++j) { + gguf_ex_write_i32(fout, cur->ne[j]); + } + gguf_ex_write_i32(fout, cur->type); + gguf_ex_write_u64(fout, offset_tensor); + + offset_tensor += GGML_PAD(ggml_nbytes(cur), GGUF_DEFAULT_ALIGNMENT); + } + + const uint64_t offset_data = GGML_PAD((uint64_t) fout.tellp(), GGUF_DEFAULT_ALIGNMENT); + + fprintf(stdout, "%s: data offset = %" PRIu64 "\n", __func__, offset_data); + + { + const size_t pad = offset_data - fout.tellp(); + + for (size_t j = 0; j < pad; ++j) { + fout.put(0); + } + } + + for (int i = 0; i < n_tensors; ++i) { + fprintf(stdout, "%s: writing tensor %d data\n", __func__, i); + + const std::string name = "tensor_" + to_string(i); + + struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name.c_str()); + + fout.write((const char *) cur->data, ggml_nbytes(cur)); + + { + const size_t pad = GGML_PAD(ggml_nbytes(cur), GGUF_DEFAULT_ALIGNMENT) - ggml_nbytes(cur); + + for (size_t j = 0; j < pad; ++j) { + fout.put(0); + } + } + } + + fout.close(); + + fprintf(stdout, "%s: wrote file '%s;\n", __func__, fname.c_str()); + + ggml_free(ctx_data); return true; } -bool gguf_read(const std::string & fname) { +// just read tensor info +bool gguf_ex_read_0(const std::string & fname) { + struct gguf_init_params params = { + /*.no_alloc = */ false, + /*.ctx = */ NULL, + }; + + struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params); + + fprintf(stdout, "%s: version: %d\n", __func__, gguf_get_version(ctx)); + fprintf(stdout, "%s: alignment: %zu\n", __func__, gguf_get_alignment(ctx)); + fprintf(stdout, "%s: data offset: %zu\n", __func__, gguf_get_data_offset(ctx)); + + // kv + { + const int n_kv = gguf_get_n_kv(ctx); + + fprintf(stdout, "%s: n_kv: %d\n", __func__, n_kv); + + for (int i = 0; i < n_kv; ++i) { + const char * key = gguf_get_key(ctx, i); + + fprintf(stdout, "%s: kv[%d]: key = %s\n", __func__, i, key); + } + } + + // tensor info + { + const int n_tensors = gguf_get_n_tensors(ctx); + + fprintf(stdout, "%s: n_tensors: %d\n", __func__, n_tensors); + + for (int i = 0; i < n_tensors; ++i) { + const char * name = gguf_get_tensor_name(ctx, i); + const size_t offset = gguf_get_tensor_offset(ctx, i); + + fprintf(stdout, "%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset); + } + } + + return true; +} + +// read and create ggml_context containing the tensors and their data +bool gguf_ex_read_1(const std::string & fname) { + struct ggml_context * ctx_data = NULL; + + struct gguf_init_params params = { + /*.no_alloc = */ false, + /*.ctx = */ &ctx_data, + }; + + struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params); + + fprintf(stdout, "%s: version: %d\n", __func__, gguf_get_version(ctx)); + fprintf(stdout, "%s: alignment: %zu\n", __func__, gguf_get_alignment(ctx)); + fprintf(stdout, "%s: data offset: %zu\n", __func__, gguf_get_data_offset(ctx)); + + // kv + { + const int n_kv = gguf_get_n_kv(ctx); + + fprintf(stdout, "%s: n_kv: %d\n", __func__, n_kv); + + for (int i = 0; i < n_kv; ++i) { + const char * key = gguf_get_key(ctx, i); + + fprintf(stdout, "%s: kv[%d]: key = %s\n", __func__, i, key); + } + } + + // tensor info + { + const int n_tensors = gguf_get_n_tensors(ctx); + + fprintf(stdout, "%s: n_tensors: %d\n", __func__, n_tensors); + + for (int i = 0; i < n_tensors; ++i) { + const char * name = gguf_get_tensor_name(ctx, i); + const size_t offset = gguf_get_tensor_offset(ctx, i); + + fprintf(stdout, "%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset); + } + } + + // data + { + const int n_tensors = gguf_get_n_tensors(ctx); + + for (int i = 0; i < n_tensors; ++i) { + fprintf(stdout, "%s: reading tensor %d data\n", __func__, i); + + const std::string name = "tensor_" + to_string(i); + + struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name.c_str()); + + fprintf(stdout, "%s: tensor[%d]: n_dims = %d, name = %s, data = %p\n", + __func__, i, cur->n_dims, cur->name, cur->data); + + // check data + { + const float * data = (const float *) cur->data; + for (int j = 0; j < ggml_nelements(cur); ++j) { + if (data[j] != 100 + i) { + fprintf(stderr, "%s: tensor[%d]: data[%d] = %f\n", __func__, i, j, data[j]); + return false; + } + } + } + } + } + + fprintf(stdout, "%s: ctx_data size: %zu\n", __func__, ggml_get_mem_size(ctx_data)); + + ggml_free(ctx_data); + gguf_free(ctx); + + return true; +} + +// read just the tensor info and mmap the data in user code +bool gguf_ex_read_2(const std::string & fname) { + struct ggml_context * ctx_data = NULL; + + struct gguf_init_params params = { + /*.no_alloc = */ true, + /*.ctx = */ &ctx_data, + }; + + struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params); + + // TODO: mmap based on tensor infos + + fprintf(stdout, "%s: ctx_data size: %zu\n", __func__, ggml_get_mem_size(ctx_data)); + + ggml_free(ctx_data); + gguf_free(ctx); + return true; } @@ -20,14 +331,16 @@ int main(int argc, char ** argv) { } const std::string fname(argv[1]); - const std::string mode(argv[2]); + const std::string mode (argv[2]); GGML_ASSERT((mode == "r" || mode == "w") && "mode must be r or w"); if (mode == "w") { - GGML_ASSERT(gguf_write(fname) && "failed to write gguf file"); + GGML_ASSERT(gguf_ex_write(fname) && "failed to write gguf file"); } else if (mode == "r") { - GGML_ASSERT(gguf_read(fname) && "failed to read gguf file"); + GGML_ASSERT(gguf_ex_read_0(fname) && "failed to read gguf file"); + GGML_ASSERT(gguf_ex_read_1(fname) && "failed to read gguf file"); + GGML_ASSERT(gguf_ex_read_2(fname) && "failed to read gguf file"); } return 0; diff --git a/ggml.c b/ggml.c index e68e91e18..5736c800e 100644 --- a/ggml.c +++ b/ggml.c @@ -18364,7 +18364,7 @@ struct gguf_tensor_info { enum ggml_type type; - uint64_t offset; // offset from beginning of file, must be a multiple of `ALIGNMENT` + uint64_t offset; // offset from start of `data`, must be a multiple of `ALIGNMENT` }; struct gguf_context { @@ -18385,9 +18385,7 @@ static bool gguf_fread_el(void * dst, size_t size, FILE * file, size_t * offset) return n == size; } -static bool gguf_fread_str(void * dst, FILE * file, size_t * offset) { - struct gguf_str * p = (struct gguf_str *) dst; - +static bool gguf_fread_str(struct gguf_str * p, FILE * file, size_t * offset) { p->n = 0; p->data = NULL; @@ -18395,14 +18393,12 @@ static bool gguf_fread_str(void * dst, FILE * file, size_t * offset) { // TODO: how to avoid mallocs for strings? ok = ok && gguf_fread_el(&p->n, sizeof(p->n), file, offset); p->data = calloc(p->n + 1, 1); - ok = ok && gguf_fread_el(&p->data, p->n, file, offset); + ok = ok && gguf_fread_el( p->data, p->n, file, offset); return ok; } -struct gguf_context * gguf_init(const char * fname, struct gguf_init_params params) { - GGML_ASSERT(!params.load || params.malloc || params.ctx != NULL); - +struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) { FILE * file = fopen(fname, "rb"); if (!file) { return NULL; @@ -18446,10 +18442,14 @@ struct gguf_context * gguf_init(const char * fname, struct gguf_init_params para for (uint32_t i = 0; i < ctx->header.n_kv; ++i) { struct gguf_kv * kv = &ctx->header.kv[i]; + //fprintf(stderr, "%s: reading kv %d\n", __func__, i); + ok = ok && gguf_fread_str(&kv->key, file, &offset); //ok = ok && gguf_fread_el (&kv->n_bytes, sizeof(kv->n_bytes), file, &offset); ok = ok && gguf_fread_el (&kv->type, sizeof(kv->type), file, &offset); + //fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data); + switch (kv->type) { case GGUF_TYPE_UINT8: ok = ok && gguf_fread_el (&kv->value.uint8, sizeof(kv->value.uint8), file, &offset); break; case GGUF_TYPE_INT8: ok = ok && gguf_fread_el (&kv->value.int8, sizeof(kv->value.int8), file, &offset); break; @@ -18461,9 +18461,13 @@ struct gguf_context * gguf_init(const char * fname, struct gguf_init_params para case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (&kv->value.bool_, sizeof(kv->value.bool_), file, &offset); break; case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(&kv->value.str, file, &offset); break; case GGUF_TYPE_ARRAY: - GGML_ASSERT("gguf: array type not implemented"); - break; - }; + GGML_ASSERT("gguf: array type not implemented"); + break; + }; + + if (!ok) { + break; + } } if (!ok) { @@ -18478,12 +18482,14 @@ struct gguf_context * gguf_init(const char * fname, struct gguf_init_params para for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) { struct gguf_tensor_info * info = &ctx->infos[i]; - memset(info->ne, 1, sizeof(info->ne)); + for (int j = 0; j < GGML_MAX_DIMS; ++j) { + info->ne[j] = 1; + } ok = ok && gguf_fread_str(&info->name, file, &offset); ok = ok && gguf_fread_el (&info->n_dims, sizeof(info->n_dims), file, &offset); for (uint32_t j = 0; j < info->n_dims; ++j) { - ok = ok && gguf_fread_el (&info->ne[j], sizeof(info->ne[j]), file, &offset); + ok = ok && gguf_fread_el(&info->ne[j], sizeof(info->ne[j]), file, &offset); } //ok = ok && gguf_fread_el (&info->n_elms, sizeof(info->n_elms), file, &offset); ok = ok && gguf_fread_el (&info->type, sizeof(info->type), file, &offset); @@ -18536,28 +18542,30 @@ struct gguf_context * gguf_init(const char * fname, struct gguf_init_params para ctx->size_data += GGML_PAD(size_cur, ctx->alignment); } + // load the tensor data // TODO: simplify - if (params.load) { - if (params.malloc) { - ctx->data = GGML_ALIGNED_MALLOC(ctx->size_data); - fseek(file, ctx->offset, SEEK_SET); - ok = ok && gguf_fread_el(ctx->data, ctx->size_data, file, &offset); - } else { - const size_t mem_size = - ctx->header.n_tensors*ggml_tensor_overhead() + 1 + - ctx->size_data; + if (params.ctx != NULL) { + const size_t mem_size = + params.no_alloc ? + (ctx->header.n_tensors + 1)*ggml_tensor_overhead() : + (ctx->header.n_tensors + 1)*ggml_tensor_overhead() + ctx->size_data; - struct ggml_init_params pdata = { - .mem_size = mem_size, - .mem_buffer = NULL, - .no_alloc = false, - }; + struct ggml_init_params pdata = { + .mem_size = mem_size, + .mem_buffer = NULL, + .no_alloc = params.no_alloc, + }; - *params.ctx = ggml_init(pdata); + *params.ctx = ggml_init(pdata); - struct ggml_context * ctx_data = *params.ctx; + struct ggml_context * ctx_data = *params.ctx; - struct ggml_tensor * data = ggml_new_tensor_1d(ctx_data, GGML_TYPE_I8, ctx->size_data); + struct ggml_tensor * data = NULL; + + if (params.no_alloc == false) { + data = ggml_new_tensor_1d(ctx_data, GGML_TYPE_I8, ctx->size_data); + + ok = ok && data != NULL; // read the tensor data ok = ok && gguf_fread_el(data->data, ctx->size_data, file, &offset); @@ -18571,39 +18579,44 @@ struct gguf_context * gguf_init(const char * fname, struct gguf_init_params para } ctx->data = data->data; + } - // create the tensors - ggml_set_no_alloc(ctx_data, true); + ggml_set_no_alloc(ctx_data, true); - for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) { - const int64_t ne[GGML_MAX_DIMS] = { - ctx->infos[i].ne[0], - ctx->infos[i].ne[1], - ctx->infos[i].ne[2], - ctx->infos[i].ne[3], - }; + // create the tensors + for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) { + const int64_t ne[GGML_MAX_DIMS] = { + ctx->infos[i].ne[0], + ctx->infos[i].ne[1], + ctx->infos[i].ne[2], + ctx->infos[i].ne[3], + }; - struct ggml_tensor * cur = ggml_new_tensor(ctx_data, ctx->infos[i].type, ctx->infos[i].n_dims, ne); + struct ggml_tensor * cur = ggml_new_tensor(ctx_data, ctx->infos[i].type, ctx->infos[i].n_dims, ne); - ok = ok && cur != NULL; + ok = ok && cur != NULL; - if (!ok) { - break; - } - - cur->data = (char *) data->data + ctx->infos[i].offset - ctx->offset; - } + ggml_set_name(cur, ctx->infos[i].name.data); if (!ok) { - fprintf(stderr, "%s: failed to create tensors\n", __func__); - fclose(file); - ggml_free(ctx_data); - gguf_free(ctx); - return NULL; + break; } - ggml_set_no_alloc(ctx_data, false); + if (params.no_alloc == false) { + //cur->data = (char *) data->data + ctx->infos[i].offset - ctx->offset; // offset from start of file + cur->data = (char *) data->data + ctx->infos[i].offset; // offset from data + } } + + if (!ok) { + fprintf(stderr, "%s: failed to create tensors\n", __func__); + fclose(file); + ggml_free(ctx_data); + gguf_free(ctx); + return NULL; + } + + ggml_set_no_alloc(ctx_data, params.no_alloc); } if (!ok) { diff --git a/ggml.h b/ggml.h index 75a41a28f..e0abbbfdd 100644 --- a/ggml.h +++ b/ggml.h @@ -1622,10 +1622,9 @@ extern "C" { struct gguf_context; struct gguf_init_params { - bool load; // load the tensor data - bool malloc; // if false, create a ggml_context and allocate the tensor data in it - // if true, use malloc to allocate the tensor data instead + bool no_alloc; + // if not NULL, create a ggml_context and allocate the tensor data in it struct ggml_context ** ctx; };