mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-03 17:51:09 +01:00
gguf : add array support
This commit is contained in:
parent
d89533dff6
commit
d2b6ca13ad
@ -29,7 +29,7 @@ void gguf_ex_write_u64(std::ofstream & fout, size_t val) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void gguf_ex_write_param(std::ofstream & fout, const std::string & key, enum gguf_type type, const T & val) {
|
void gguf_ex_write_val(std::ofstream & fout, const std::string & key, enum gguf_type type, const T & val) {
|
||||||
gguf_ex_write_str(fout, key);
|
gguf_ex_write_str(fout, key);
|
||||||
fout.write((const char *) &type, sizeof(type));
|
fout.write((const char *) &type, sizeof(type));
|
||||||
fout.write((const char *) &val, sizeof(val));
|
fout.write((const char *) &val, sizeof(val));
|
||||||
@ -38,13 +38,65 @@ void gguf_ex_write_param(std::ofstream & fout, const std::string & key, enum ggu
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
void gguf_ex_write_param<std::string>(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::string & val) {
|
void gguf_ex_write_val<std::string>(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::string & val) {
|
||||||
gguf_ex_write_str(fout, key);
|
gguf_ex_write_str(fout, key);
|
||||||
fout.write((const char *) &type, sizeof(type));
|
fout.write((const char *) &type, sizeof(type));
|
||||||
|
|
||||||
const int32_t n = val.size();
|
const int32_t n = val.size();
|
||||||
fout.write((const char *) &n, sizeof(n));
|
fout.write((const char *) &n, sizeof(n));
|
||||||
fout.write(val.c_str(), n);
|
fout.write(val.c_str(), n);
|
||||||
|
|
||||||
|
fprintf(stdout, "%s: write param: %s = %s\n", __func__, key.c_str(), val.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
void gguf_ex_write_arr(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::vector<T> & val) {
|
||||||
|
gguf_ex_write_str(fout, key);
|
||||||
|
{
|
||||||
|
const enum gguf_type tarr = GGUF_TYPE_ARRAY;
|
||||||
|
fout.write((const char *) &tarr, sizeof(tarr));
|
||||||
|
}
|
||||||
|
|
||||||
|
const int32_t n = val.size();
|
||||||
|
fout.write((const char *) &type, sizeof(type));
|
||||||
|
fout.write((const char *) &n, sizeof(n));
|
||||||
|
fout.write((const char *) val.data(), n * sizeof(T));
|
||||||
|
|
||||||
|
fprintf(stdout, "%s: write param: %s = [", __func__, key.c_str());
|
||||||
|
for (int i = 0; i < n; ++i) {
|
||||||
|
fprintf(stdout, "%s", to_string(val[i]).c_str());
|
||||||
|
if (i < n - 1) {
|
||||||
|
fprintf(stdout, ", ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fprintf(stdout, "]\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
void gguf_ex_write_arr<std::string>(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::vector<std::string> & val) {
|
||||||
|
gguf_ex_write_str(fout, key);
|
||||||
|
{
|
||||||
|
const enum gguf_type tarr = GGUF_TYPE_ARRAY;
|
||||||
|
fout.write((const char *) &tarr, sizeof(tarr));
|
||||||
|
}
|
||||||
|
|
||||||
|
const int32_t n = val.size();
|
||||||
|
fout.write((const char *) &type, sizeof(type));
|
||||||
|
fout.write((const char *) &n, sizeof(n));
|
||||||
|
for (int i = 0; i < n; ++i) {
|
||||||
|
const int32_t nstr = val[i].size();
|
||||||
|
fout.write((const char *) &nstr, sizeof(nstr));
|
||||||
|
fout.write(val[i].c_str(), nstr);
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(stdout, "%s: write param: %s = [", __func__, key.c_str());
|
||||||
|
for (int i = 0; i < n; ++i) {
|
||||||
|
fprintf(stdout, "%s", val[i].c_str());
|
||||||
|
if (i < n - 1) {
|
||||||
|
fprintf(stdout, ", ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fprintf(stdout, "]\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
bool gguf_ex_write(const std::string & fname) {
|
bool gguf_ex_write(const std::string & fname) {
|
||||||
@ -60,8 +112,9 @@ bool gguf_ex_write(const std::string & fname) {
|
|||||||
fout.write((const char *) &version, sizeof(version));
|
fout.write((const char *) &version, sizeof(version));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NOTE: these have to match the output below!
|
||||||
const int n_tensors = 10;
|
const int n_tensors = 10;
|
||||||
const int n_kv = 9;
|
const int n_kv = 12;
|
||||||
|
|
||||||
fout.write((const char*) &n_tensors, sizeof(n_tensors));
|
fout.write((const char*) &n_tensors, sizeof(n_tensors));
|
||||||
fout.write((const char*) &n_kv, sizeof(n_kv));
|
fout.write((const char*) &n_kv, sizeof(n_kv));
|
||||||
@ -70,17 +123,21 @@ bool gguf_ex_write(const std::string & fname) {
|
|||||||
|
|
||||||
// kv data
|
// kv data
|
||||||
{
|
{
|
||||||
gguf_ex_write_param< uint8_t>(fout, "some.parameter.uint8", GGUF_TYPE_UINT8, 0x12);
|
gguf_ex_write_val< uint8_t>(fout, "some.parameter.uint8", GGUF_TYPE_UINT8, 0x12);
|
||||||
gguf_ex_write_param< int8_t>(fout, "some.parameter.int8", GGUF_TYPE_INT8, -0x13);
|
gguf_ex_write_val< int8_t>(fout, "some.parameter.int8", GGUF_TYPE_INT8, -0x13);
|
||||||
gguf_ex_write_param<uint16_t>(fout, "some.parameter.uint16", GGUF_TYPE_UINT16, 0x1234);
|
gguf_ex_write_val<uint16_t>(fout, "some.parameter.uint16", GGUF_TYPE_UINT16, 0x1234);
|
||||||
gguf_ex_write_param< int16_t>(fout, "some.parameter.int16", GGUF_TYPE_INT16, -0x1235);
|
gguf_ex_write_val< int16_t>(fout, "some.parameter.int16", GGUF_TYPE_INT16, -0x1235);
|
||||||
gguf_ex_write_param<uint32_t>(fout, "some.parameter.uint32", GGUF_TYPE_UINT32, 0x12345678);
|
gguf_ex_write_val<uint32_t>(fout, "some.parameter.uint32", GGUF_TYPE_UINT32, 0x12345678);
|
||||||
gguf_ex_write_param< int32_t>(fout, "some.parameter.int32", GGUF_TYPE_INT32, -0x12345679);
|
gguf_ex_write_val< int32_t>(fout, "some.parameter.int32", GGUF_TYPE_INT32, -0x12345679);
|
||||||
|
|
||||||
gguf_ex_write_param<float> (fout, "some.parameter.float32", GGUF_TYPE_FLOAT32, 0.123456789f);
|
gguf_ex_write_val<float> (fout, "some.parameter.float32", GGUF_TYPE_FLOAT32, 0.123456789f);
|
||||||
gguf_ex_write_param<bool> (fout, "some.parameter.bool", GGUF_TYPE_BOOL, true);
|
gguf_ex_write_val<bool> (fout, "some.parameter.bool", GGUF_TYPE_BOOL, true);
|
||||||
|
|
||||||
gguf_ex_write_param<std::string>(fout, "some.parameter.string", GGUF_TYPE_STRING, "hello world");
|
gguf_ex_write_val<std::string>(fout, "some.parameter.string", GGUF_TYPE_STRING, "hello world");
|
||||||
|
|
||||||
|
gguf_ex_write_arr<int16_t> (fout, "some.parameter.arr.i16", GGUF_TYPE_INT16, { 1, 2, 3, 4, });
|
||||||
|
gguf_ex_write_arr<float> (fout, "some.parameter.arr.f32", GGUF_TYPE_FLOAT32, { 3.145f, 2.718f, 1.414f, });
|
||||||
|
gguf_ex_write_arr<std::string>(fout, "some.parameter.arr.str", GGUF_TYPE_STRING, { "hello", "world", "!" });
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t offset_tensor = 0;
|
uint64_t offset_tensor = 0;
|
||||||
@ -203,13 +260,15 @@ bool gguf_ex_read_0(const std::string & fname) {
|
|||||||
fprintf(stdout, "%s: n_tensors: %d\n", __func__, n_tensors);
|
fprintf(stdout, "%s: n_tensors: %d\n", __func__, n_tensors);
|
||||||
|
|
||||||
for (int i = 0; i < n_tensors; ++i) {
|
for (int i = 0; i < n_tensors; ++i) {
|
||||||
const char * name = gguf_get_tensor_name(ctx, i);
|
const char * name = gguf_get_tensor_name (ctx, i);
|
||||||
const size_t offset = gguf_get_tensor_offset(ctx, i);
|
const size_t offset = gguf_get_tensor_offset(ctx, i);
|
||||||
|
|
||||||
fprintf(stdout, "%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
|
fprintf(stdout, "%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
gguf_free(ctx);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -248,7 +307,7 @@ bool gguf_ex_read_1(const std::string & fname) {
|
|||||||
fprintf(stdout, "%s: n_tensors: %d\n", __func__, n_tensors);
|
fprintf(stdout, "%s: n_tensors: %d\n", __func__, n_tensors);
|
||||||
|
|
||||||
for (int i = 0; i < n_tensors; ++i) {
|
for (int i = 0; i < n_tensors; ++i) {
|
||||||
const char * name = gguf_get_tensor_name(ctx, i);
|
const char * name = gguf_get_tensor_name (ctx, i);
|
||||||
const size_t offset = gguf_get_tensor_offset(ctx, i);
|
const size_t offset = gguf_get_tensor_offset(ctx, i);
|
||||||
|
|
||||||
fprintf(stdout, "%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
|
fprintf(stdout, "%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
|
||||||
|
64
ggml.c
64
ggml.c
@ -3698,7 +3698,6 @@ static const size_t GGML_TYPE_SIZE[GGML_TYPE_COUNT] = {
|
|||||||
};
|
};
|
||||||
static_assert(GGML_TYPE_COUNT == 19, "GGML_TYPE_SIZE is outdated");
|
static_assert(GGML_TYPE_COUNT == 19, "GGML_TYPE_SIZE is outdated");
|
||||||
|
|
||||||
|
|
||||||
static const char * GGML_TYPE_NAME[GGML_TYPE_COUNT] = {
|
static const char * GGML_TYPE_NAME[GGML_TYPE_COUNT] = {
|
||||||
[GGML_TYPE_F32] = "f32",
|
[GGML_TYPE_F32] = "f32",
|
||||||
[GGML_TYPE_F16] = "f16",
|
[GGML_TYPE_F16] = "f16",
|
||||||
@ -18302,7 +18301,19 @@ struct gguf_str {
|
|||||||
char * data;
|
char * data;
|
||||||
};
|
};
|
||||||
|
|
||||||
union gguf_value;
|
static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = {
|
||||||
|
[GGUF_TYPE_UINT8] = sizeof(uint8_t),
|
||||||
|
[GGUF_TYPE_INT8] = sizeof(int8_t),
|
||||||
|
[GGUF_TYPE_UINT16] = sizeof(uint16_t),
|
||||||
|
[GGUF_TYPE_INT16] = sizeof(int16_t),
|
||||||
|
[GGUF_TYPE_UINT32] = sizeof(uint32_t),
|
||||||
|
[GGUF_TYPE_INT32] = sizeof(int32_t),
|
||||||
|
[GGUF_TYPE_FLOAT32] = sizeof(float),
|
||||||
|
[GGUF_TYPE_BOOL] = sizeof(bool),
|
||||||
|
[GGUF_TYPE_STRING] = sizeof(struct gguf_str),
|
||||||
|
[GGUF_TYPE_ARRAY] = 0, // undefined
|
||||||
|
};
|
||||||
|
static_assert(GGUF_TYPE_COUNT == 10, "GGUF_TYPE_COUNT != 10");
|
||||||
|
|
||||||
union gguf_value {
|
union gguf_value {
|
||||||
uint8_t uint8;
|
uint8_t uint8;
|
||||||
@ -18320,7 +18331,7 @@ union gguf_value {
|
|||||||
enum gguf_type type;
|
enum gguf_type type;
|
||||||
|
|
||||||
uint32_t n;
|
uint32_t n;
|
||||||
union gguf_value * arr;
|
void * data;
|
||||||
} arr;
|
} arr;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -18457,8 +18468,35 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||||||
case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (&kv->value.bool_, sizeof(kv->value.bool_), file, &offset); break;
|
case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (&kv->value.bool_, sizeof(kv->value.bool_), file, &offset); break;
|
||||||
case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(&kv->value.str, file, &offset); break;
|
case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(&kv->value.str, file, &offset); break;
|
||||||
case GGUF_TYPE_ARRAY:
|
case GGUF_TYPE_ARRAY:
|
||||||
GGML_ASSERT("gguf: array type not implemented");
|
{
|
||||||
break;
|
ok = ok && gguf_fread_el(&kv->value.arr.type, sizeof(kv->value.arr.type), file, &offset);
|
||||||
|
ok = ok && gguf_fread_el(&kv->value.arr.n, sizeof(kv->value.arr.n), file, &offset);
|
||||||
|
|
||||||
|
switch (kv->value.arr.type) {
|
||||||
|
case GGUF_TYPE_UINT8:
|
||||||
|
case GGUF_TYPE_INT8:
|
||||||
|
case GGUF_TYPE_UINT16:
|
||||||
|
case GGUF_TYPE_INT16:
|
||||||
|
case GGUF_TYPE_UINT32:
|
||||||
|
case GGUF_TYPE_INT32:
|
||||||
|
case GGUF_TYPE_FLOAT32:
|
||||||
|
case GGUF_TYPE_BOOL:
|
||||||
|
{
|
||||||
|
kv->value.arr.data = malloc(kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
|
||||||
|
ok = ok && gguf_fread_el(kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type], file, &offset);
|
||||||
|
} break;
|
||||||
|
case GGUF_TYPE_STRING:
|
||||||
|
{
|
||||||
|
kv->value.arr.data = malloc(kv->value.arr.n * sizeof(struct gguf_str));
|
||||||
|
for (uint32_t j = 0; j < kv->value.arr.n; ++j) {
|
||||||
|
ok = ok && gguf_fread_str(&((struct gguf_str *) kv->value.arr.data)[j], file, &offset);
|
||||||
|
}
|
||||||
|
} break;
|
||||||
|
case GGUF_TYPE_ARRAY:
|
||||||
|
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
|
||||||
|
};
|
||||||
|
} break;
|
||||||
|
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
|
||||||
};
|
};
|
||||||
|
|
||||||
if (!ok) {
|
if (!ok) {
|
||||||
@ -18629,6 +18667,8 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||||||
ggml_set_no_alloc(ctx_data, params.no_alloc);
|
ggml_set_no_alloc(ctx_data, params.no_alloc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fclose(file);
|
||||||
|
|
||||||
return ctx;
|
return ctx;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -18651,6 +18691,20 @@ void gguf_free(struct gguf_context * ctx) {
|
|||||||
free(kv->value.str.data);
|
free(kv->value.str.data);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (kv->type == GGUF_TYPE_ARRAY) {
|
||||||
|
if (kv->value.arr.data) {
|
||||||
|
if (kv->value.arr.type == GGUF_TYPE_STRING) {
|
||||||
|
for (uint32_t j = 0; j < kv->value.arr.n; ++j) {
|
||||||
|
struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j];
|
||||||
|
if (str->data) {
|
||||||
|
free(str->data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(kv->value.arr.data);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_ALIGNED_FREE(ctx->header.kv);
|
GGML_ALIGNED_FREE(ctx->header.kv);
|
||||||
|
4
ggml.h
4
ggml.h
@ -1631,6 +1631,7 @@ extern "C" {
|
|||||||
GGUF_TYPE_BOOL = 7,
|
GGUF_TYPE_BOOL = 7,
|
||||||
GGUF_TYPE_STRING = 8,
|
GGUF_TYPE_STRING = 8,
|
||||||
GGUF_TYPE_ARRAY = 9,
|
GGUF_TYPE_ARRAY = 9,
|
||||||
|
GGUF_TYPE_COUNT, // marks the end of the enum
|
||||||
};
|
};
|
||||||
|
|
||||||
struct gguf_context;
|
struct gguf_context;
|
||||||
@ -1664,7 +1665,8 @@ extern "C" {
|
|||||||
GGML_API float gguf_get_val_f32 (struct gguf_context * ctx, int i);
|
GGML_API float gguf_get_val_f32 (struct gguf_context * ctx, int i);
|
||||||
GGML_API bool gguf_get_val_bool(struct gguf_context * ctx, int i);
|
GGML_API bool gguf_get_val_bool(struct gguf_context * ctx, int i);
|
||||||
GGML_API const char * gguf_get_val_str (struct gguf_context * ctx, int i);
|
GGML_API const char * gguf_get_val_str (struct gguf_context * ctx, int i);
|
||||||
// TODO: arr
|
GGML_API int gguf_get_arr_n (struct gguf_context * ctx, int i);
|
||||||
|
GGML_API void gguf_get_arr_data(struct gguf_context * ctx, int i, void * data);
|
||||||
|
|
||||||
GGML_API int gguf_get_n_tensors (struct gguf_context * ctx);
|
GGML_API int gguf_get_n_tensors (struct gguf_context * ctx);
|
||||||
GGML_API size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i);
|
GGML_API size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i);
|
||||||
|
Loading…
Reference in New Issue
Block a user