mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-19 08:20:10 +01:00
gguf : add array support
This commit is contained in:
parent
d89533dff6
commit
d2b6ca13ad
@ -29,7 +29,7 @@ void gguf_ex_write_u64(std::ofstream & fout, size_t val) {
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void gguf_ex_write_param(std::ofstream & fout, const std::string & key, enum gguf_type type, const T & val) {
|
||||
void gguf_ex_write_val(std::ofstream & fout, const std::string & key, enum gguf_type type, const T & val) {
|
||||
gguf_ex_write_str(fout, key);
|
||||
fout.write((const char *) &type, sizeof(type));
|
||||
fout.write((const char *) &val, sizeof(val));
|
||||
@ -38,13 +38,65 @@ void gguf_ex_write_param(std::ofstream & fout, const std::string & key, enum ggu
|
||||
}
|
||||
|
||||
template<>
|
||||
void gguf_ex_write_param<std::string>(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::string & val) {
|
||||
void gguf_ex_write_val<std::string>(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::string & val) {
|
||||
gguf_ex_write_str(fout, key);
|
||||
fout.write((const char *) &type, sizeof(type));
|
||||
|
||||
const int32_t n = val.size();
|
||||
fout.write((const char *) &n, sizeof(n));
|
||||
fout.write(val.c_str(), n);
|
||||
|
||||
fprintf(stdout, "%s: write param: %s = %s\n", __func__, key.c_str(), val.c_str());
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void gguf_ex_write_arr(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::vector<T> & val) {
|
||||
gguf_ex_write_str(fout, key);
|
||||
{
|
||||
const enum gguf_type tarr = GGUF_TYPE_ARRAY;
|
||||
fout.write((const char *) &tarr, sizeof(tarr));
|
||||
}
|
||||
|
||||
const int32_t n = val.size();
|
||||
fout.write((const char *) &type, sizeof(type));
|
||||
fout.write((const char *) &n, sizeof(n));
|
||||
fout.write((const char *) val.data(), n * sizeof(T));
|
||||
|
||||
fprintf(stdout, "%s: write param: %s = [", __func__, key.c_str());
|
||||
for (int i = 0; i < n; ++i) {
|
||||
fprintf(stdout, "%s", to_string(val[i]).c_str());
|
||||
if (i < n - 1) {
|
||||
fprintf(stdout, ", ");
|
||||
}
|
||||
}
|
||||
fprintf(stdout, "]\n");
|
||||
}
|
||||
|
||||
template<>
|
||||
void gguf_ex_write_arr<std::string>(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::vector<std::string> & val) {
|
||||
gguf_ex_write_str(fout, key);
|
||||
{
|
||||
const enum gguf_type tarr = GGUF_TYPE_ARRAY;
|
||||
fout.write((const char *) &tarr, sizeof(tarr));
|
||||
}
|
||||
|
||||
const int32_t n = val.size();
|
||||
fout.write((const char *) &type, sizeof(type));
|
||||
fout.write((const char *) &n, sizeof(n));
|
||||
for (int i = 0; i < n; ++i) {
|
||||
const int32_t nstr = val[i].size();
|
||||
fout.write((const char *) &nstr, sizeof(nstr));
|
||||
fout.write(val[i].c_str(), nstr);
|
||||
}
|
||||
|
||||
fprintf(stdout, "%s: write param: %s = [", __func__, key.c_str());
|
||||
for (int i = 0; i < n; ++i) {
|
||||
fprintf(stdout, "%s", val[i].c_str());
|
||||
if (i < n - 1) {
|
||||
fprintf(stdout, ", ");
|
||||
}
|
||||
}
|
||||
fprintf(stdout, "]\n");
|
||||
}
|
||||
|
||||
bool gguf_ex_write(const std::string & fname) {
|
||||
@ -60,8 +112,9 @@ bool gguf_ex_write(const std::string & fname) {
|
||||
fout.write((const char *) &version, sizeof(version));
|
||||
}
|
||||
|
||||
// NOTE: these have to match the output below!
|
||||
const int n_tensors = 10;
|
||||
const int n_kv = 9;
|
||||
const int n_kv = 12;
|
||||
|
||||
fout.write((const char*) &n_tensors, sizeof(n_tensors));
|
||||
fout.write((const char*) &n_kv, sizeof(n_kv));
|
||||
@ -70,17 +123,21 @@ bool gguf_ex_write(const std::string & fname) {
|
||||
|
||||
// kv data
|
||||
{
|
||||
gguf_ex_write_param< uint8_t>(fout, "some.parameter.uint8", GGUF_TYPE_UINT8, 0x12);
|
||||
gguf_ex_write_param< int8_t>(fout, "some.parameter.int8", GGUF_TYPE_INT8, -0x13);
|
||||
gguf_ex_write_param<uint16_t>(fout, "some.parameter.uint16", GGUF_TYPE_UINT16, 0x1234);
|
||||
gguf_ex_write_param< int16_t>(fout, "some.parameter.int16", GGUF_TYPE_INT16, -0x1235);
|
||||
gguf_ex_write_param<uint32_t>(fout, "some.parameter.uint32", GGUF_TYPE_UINT32, 0x12345678);
|
||||
gguf_ex_write_param< int32_t>(fout, "some.parameter.int32", GGUF_TYPE_INT32, -0x12345679);
|
||||
gguf_ex_write_val< uint8_t>(fout, "some.parameter.uint8", GGUF_TYPE_UINT8, 0x12);
|
||||
gguf_ex_write_val< int8_t>(fout, "some.parameter.int8", GGUF_TYPE_INT8, -0x13);
|
||||
gguf_ex_write_val<uint16_t>(fout, "some.parameter.uint16", GGUF_TYPE_UINT16, 0x1234);
|
||||
gguf_ex_write_val< int16_t>(fout, "some.parameter.int16", GGUF_TYPE_INT16, -0x1235);
|
||||
gguf_ex_write_val<uint32_t>(fout, "some.parameter.uint32", GGUF_TYPE_UINT32, 0x12345678);
|
||||
gguf_ex_write_val< int32_t>(fout, "some.parameter.int32", GGUF_TYPE_INT32, -0x12345679);
|
||||
|
||||
gguf_ex_write_param<float> (fout, "some.parameter.float32", GGUF_TYPE_FLOAT32, 0.123456789f);
|
||||
gguf_ex_write_param<bool> (fout, "some.parameter.bool", GGUF_TYPE_BOOL, true);
|
||||
gguf_ex_write_val<float> (fout, "some.parameter.float32", GGUF_TYPE_FLOAT32, 0.123456789f);
|
||||
gguf_ex_write_val<bool> (fout, "some.parameter.bool", GGUF_TYPE_BOOL, true);
|
||||
|
||||
gguf_ex_write_param<std::string>(fout, "some.parameter.string", GGUF_TYPE_STRING, "hello world");
|
||||
gguf_ex_write_val<std::string>(fout, "some.parameter.string", GGUF_TYPE_STRING, "hello world");
|
||||
|
||||
gguf_ex_write_arr<int16_t> (fout, "some.parameter.arr.i16", GGUF_TYPE_INT16, { 1, 2, 3, 4, });
|
||||
gguf_ex_write_arr<float> (fout, "some.parameter.arr.f32", GGUF_TYPE_FLOAT32, { 3.145f, 2.718f, 1.414f, });
|
||||
gguf_ex_write_arr<std::string>(fout, "some.parameter.arr.str", GGUF_TYPE_STRING, { "hello", "world", "!" });
|
||||
}
|
||||
|
||||
uint64_t offset_tensor = 0;
|
||||
@ -203,13 +260,15 @@ bool gguf_ex_read_0(const std::string & fname) {
|
||||
fprintf(stdout, "%s: n_tensors: %d\n", __func__, n_tensors);
|
||||
|
||||
for (int i = 0; i < n_tensors; ++i) {
|
||||
const char * name = gguf_get_tensor_name(ctx, i);
|
||||
const char * name = gguf_get_tensor_name (ctx, i);
|
||||
const size_t offset = gguf_get_tensor_offset(ctx, i);
|
||||
|
||||
fprintf(stdout, "%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
|
||||
}
|
||||
}
|
||||
|
||||
gguf_free(ctx);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -248,7 +307,7 @@ bool gguf_ex_read_1(const std::string & fname) {
|
||||
fprintf(stdout, "%s: n_tensors: %d\n", __func__, n_tensors);
|
||||
|
||||
for (int i = 0; i < n_tensors; ++i) {
|
||||
const char * name = gguf_get_tensor_name(ctx, i);
|
||||
const char * name = gguf_get_tensor_name (ctx, i);
|
||||
const size_t offset = gguf_get_tensor_offset(ctx, i);
|
||||
|
||||
fprintf(stdout, "%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
|
||||
|
64
ggml.c
64
ggml.c
@ -3698,7 +3698,6 @@ static const size_t GGML_TYPE_SIZE[GGML_TYPE_COUNT] = {
|
||||
};
|
||||
static_assert(GGML_TYPE_COUNT == 19, "GGML_TYPE_SIZE is outdated");
|
||||
|
||||
|
||||
static const char * GGML_TYPE_NAME[GGML_TYPE_COUNT] = {
|
||||
[GGML_TYPE_F32] = "f32",
|
||||
[GGML_TYPE_F16] = "f16",
|
||||
@ -18302,7 +18301,19 @@ struct gguf_str {
|
||||
char * data;
|
||||
};
|
||||
|
||||
union gguf_value;
|
||||
static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = {
|
||||
[GGUF_TYPE_UINT8] = sizeof(uint8_t),
|
||||
[GGUF_TYPE_INT8] = sizeof(int8_t),
|
||||
[GGUF_TYPE_UINT16] = sizeof(uint16_t),
|
||||
[GGUF_TYPE_INT16] = sizeof(int16_t),
|
||||
[GGUF_TYPE_UINT32] = sizeof(uint32_t),
|
||||
[GGUF_TYPE_INT32] = sizeof(int32_t),
|
||||
[GGUF_TYPE_FLOAT32] = sizeof(float),
|
||||
[GGUF_TYPE_BOOL] = sizeof(bool),
|
||||
[GGUF_TYPE_STRING] = sizeof(struct gguf_str),
|
||||
[GGUF_TYPE_ARRAY] = 0, // undefined
|
||||
};
|
||||
static_assert(GGUF_TYPE_COUNT == 10, "GGUF_TYPE_COUNT != 10");
|
||||
|
||||
union gguf_value {
|
||||
uint8_t uint8;
|
||||
@ -18320,7 +18331,7 @@ union gguf_value {
|
||||
enum gguf_type type;
|
||||
|
||||
uint32_t n;
|
||||
union gguf_value * arr;
|
||||
void * data;
|
||||
} arr;
|
||||
};
|
||||
|
||||
@ -18457,8 +18468,35 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
||||
case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (&kv->value.bool_, sizeof(kv->value.bool_), file, &offset); break;
|
||||
case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(&kv->value.str, file, &offset); break;
|
||||
case GGUF_TYPE_ARRAY:
|
||||
GGML_ASSERT("gguf: array type not implemented");
|
||||
break;
|
||||
{
|
||||
ok = ok && gguf_fread_el(&kv->value.arr.type, sizeof(kv->value.arr.type), file, &offset);
|
||||
ok = ok && gguf_fread_el(&kv->value.arr.n, sizeof(kv->value.arr.n), file, &offset);
|
||||
|
||||
switch (kv->value.arr.type) {
|
||||
case GGUF_TYPE_UINT8:
|
||||
case GGUF_TYPE_INT8:
|
||||
case GGUF_TYPE_UINT16:
|
||||
case GGUF_TYPE_INT16:
|
||||
case GGUF_TYPE_UINT32:
|
||||
case GGUF_TYPE_INT32:
|
||||
case GGUF_TYPE_FLOAT32:
|
||||
case GGUF_TYPE_BOOL:
|
||||
{
|
||||
kv->value.arr.data = malloc(kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
|
||||
ok = ok && gguf_fread_el(kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type], file, &offset);
|
||||
} break;
|
||||
case GGUF_TYPE_STRING:
|
||||
{
|
||||
kv->value.arr.data = malloc(kv->value.arr.n * sizeof(struct gguf_str));
|
||||
for (uint32_t j = 0; j < kv->value.arr.n; ++j) {
|
||||
ok = ok && gguf_fread_str(&((struct gguf_str *) kv->value.arr.data)[j], file, &offset);
|
||||
}
|
||||
} break;
|
||||
case GGUF_TYPE_ARRAY:
|
||||
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
|
||||
};
|
||||
} break;
|
||||
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
|
||||
};
|
||||
|
||||
if (!ok) {
|
||||
@ -18629,6 +18667,8 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
||||
ggml_set_no_alloc(ctx_data, params.no_alloc);
|
||||
}
|
||||
|
||||
fclose(file);
|
||||
|
||||
return ctx;
|
||||
}
|
||||
|
||||
@ -18651,6 +18691,20 @@ void gguf_free(struct gguf_context * ctx) {
|
||||
free(kv->value.str.data);
|
||||
}
|
||||
}
|
||||
|
||||
if (kv->type == GGUF_TYPE_ARRAY) {
|
||||
if (kv->value.arr.data) {
|
||||
if (kv->value.arr.type == GGUF_TYPE_STRING) {
|
||||
for (uint32_t j = 0; j < kv->value.arr.n; ++j) {
|
||||
struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j];
|
||||
if (str->data) {
|
||||
free(str->data);
|
||||
}
|
||||
}
|
||||
}
|
||||
free(kv->value.arr.data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GGML_ALIGNED_FREE(ctx->header.kv);
|
||||
|
4
ggml.h
4
ggml.h
@ -1631,6 +1631,7 @@ extern "C" {
|
||||
GGUF_TYPE_BOOL = 7,
|
||||
GGUF_TYPE_STRING = 8,
|
||||
GGUF_TYPE_ARRAY = 9,
|
||||
GGUF_TYPE_COUNT, // marks the end of the enum
|
||||
};
|
||||
|
||||
struct gguf_context;
|
||||
@ -1664,7 +1665,8 @@ extern "C" {
|
||||
GGML_API float gguf_get_val_f32 (struct gguf_context * ctx, int i);
|
||||
GGML_API bool gguf_get_val_bool(struct gguf_context * ctx, int i);
|
||||
GGML_API const char * gguf_get_val_str (struct gguf_context * ctx, int i);
|
||||
// TODO: arr
|
||||
GGML_API int gguf_get_arr_n (struct gguf_context * ctx, int i);
|
||||
GGML_API void gguf_get_arr_data(struct gguf_context * ctx, int i, void * data);
|
||||
|
||||
GGML_API int gguf_get_n_tensors (struct gguf_context * ctx);
|
||||
GGML_API size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i);
|
||||
|
Loading…
Reference in New Issue
Block a user