mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-22 09:39:08 +01:00
gguf : add comments
This commit is contained in:
parent
5628ec7163
commit
d8491fc7e3
44
ggml.c
44
ggml.c
@ -18407,19 +18407,25 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||||||
// offset from start of file
|
// offset from start of file
|
||||||
size_t offset = 0;
|
size_t offset = 0;
|
||||||
|
|
||||||
// check the magic before making allocations
|
|
||||||
uint32_t magic = 0;
|
uint32_t magic = 0;
|
||||||
|
|
||||||
|
// check the magic before making allocations
|
||||||
|
{
|
||||||
gguf_fread_el(&magic, sizeof(magic), file, &offset);
|
gguf_fread_el(&magic, sizeof(magic), file, &offset);
|
||||||
|
|
||||||
if (magic != GGUF_MAGIC) {
|
if (magic != GGUF_MAGIC) {
|
||||||
fprintf(stderr, "%s: invalid magic number %08x\n", __func__, magic);
|
fprintf(stderr, "%s: invalid magic number %08x\n", __func__, magic);
|
||||||
fclose(file);
|
fclose(file);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool ok = true;
|
bool ok = true;
|
||||||
|
|
||||||
struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
|
struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
|
||||||
|
|
||||||
|
// read the header
|
||||||
|
{
|
||||||
ctx->header.magic = magic;
|
ctx->header.magic = magic;
|
||||||
ctx->header.kv = NULL;
|
ctx->header.kv = NULL;
|
||||||
|
|
||||||
@ -18436,7 +18442,10 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||||||
gguf_free(ctx);
|
gguf_free(ctx);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// read the kv pairs
|
||||||
|
{
|
||||||
ctx->header.kv = GGML_ALIGNED_MALLOC(ctx->header.n_kv * sizeof(struct gguf_kv));
|
ctx->header.kv = GGML_ALIGNED_MALLOC(ctx->header.n_kv * sizeof(struct gguf_kv));
|
||||||
|
|
||||||
for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
|
for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
|
||||||
@ -18476,7 +18485,10 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||||||
gguf_free(ctx);
|
gguf_free(ctx);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// read the tensor infos
|
||||||
|
{
|
||||||
ctx->infos = GGML_ALIGNED_MALLOC(ctx->header.n_tensors * sizeof(struct gguf_tensor_info));
|
ctx->infos = GGML_ALIGNED_MALLOC(ctx->header.n_tensors * sizeof(struct gguf_tensor_info));
|
||||||
|
|
||||||
for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
|
for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
|
||||||
@ -18502,11 +18514,13 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ctx->alignment = GGUF_DEFAULT_ALIGNMENT;
|
ctx->alignment = GGUF_DEFAULT_ALIGNMENT;
|
||||||
|
|
||||||
// TODO: determine new alignment from kv if available
|
// TODO: determine new alignment from kv if available
|
||||||
|
|
||||||
|
// we require the data section to be aligned, so take into account any padding
|
||||||
{
|
{
|
||||||
const size_t offset_pad = offset % ctx->alignment;
|
const size_t offset_pad = offset % ctx->alignment;
|
||||||
|
|
||||||
@ -18516,10 +18530,13 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// store the current file offset - this is where the data section starts
|
||||||
ctx->offset = offset;
|
ctx->offset = offset;
|
||||||
|
|
||||||
ctx->size_data = 0;
|
// compute the total size of the data section, taking into account the alignment
|
||||||
|
{
|
||||||
|
|
||||||
|
ctx->size_data = 0;
|
||||||
for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
|
for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
|
||||||
struct gguf_tensor_info * info = &ctx->infos[i];
|
struct gguf_tensor_info * info = &ctx->infos[i];
|
||||||
|
|
||||||
@ -18541,13 +18558,18 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||||||
|
|
||||||
ctx->size_data += GGML_PAD(size_cur, ctx->alignment);
|
ctx->size_data += GGML_PAD(size_cur, ctx->alignment);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// load the tensor data
|
// load the tensor data only if requested
|
||||||
// TODO: simplify
|
|
||||||
if (params.ctx != NULL) {
|
if (params.ctx != NULL) {
|
||||||
|
// if the provided gguf_context is no_alloc, then we create "empty" tensors and do not read the binary blob
|
||||||
|
// otherwise, we load the binary blob into the created ggml_context as well, and point the "data" members of
|
||||||
|
// the ggml_tensor structs to the appropriate locations in the binary blob
|
||||||
|
|
||||||
|
// compute the exact size needed for the new ggml_context
|
||||||
const size_t mem_size =
|
const size_t mem_size =
|
||||||
params.no_alloc ?
|
params.no_alloc ?
|
||||||
(ctx->header.n_tensors + 1)*ggml_tensor_overhead() :
|
(ctx->header.n_tensors )*ggml_tensor_overhead() :
|
||||||
(ctx->header.n_tensors + 1)*ggml_tensor_overhead() + ctx->size_data;
|
(ctx->header.n_tensors + 1)*ggml_tensor_overhead() + ctx->size_data;
|
||||||
|
|
||||||
struct ggml_init_params pdata = {
|
struct ggml_init_params pdata = {
|
||||||
@ -18567,7 +18589,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||||||
|
|
||||||
ok = ok && data != NULL;
|
ok = ok && data != NULL;
|
||||||
|
|
||||||
// read the tensor data
|
// read the binary blob with the tensor data
|
||||||
ok = ok && gguf_fread_el(data->data, ctx->size_data, file, &offset);
|
ok = ok && gguf_fread_el(data->data, ctx->size_data, file, &offset);
|
||||||
|
|
||||||
if (!ok) {
|
if (!ok) {
|
||||||
@ -18602,6 +18624,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// point the data member to the appropriate location in the binary blob using the tensor infos
|
||||||
if (params.no_alloc == false) {
|
if (params.no_alloc == false) {
|
||||||
//cur->data = (char *) data->data + ctx->infos[i].offset - ctx->offset; // offset from start of file
|
//cur->data = (char *) data->data + ctx->infos[i].offset - ctx->offset; // offset from start of file
|
||||||
cur->data = (char *) data->data + ctx->infos[i].offset; // offset from data
|
cur->data = (char *) data->data + ctx->infos[i].offset; // offset from data
|
||||||
@ -18609,7 +18632,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!ok) {
|
if (!ok) {
|
||||||
fprintf(stderr, "%s: failed to create tensors\n", __func__);
|
fprintf(stderr, "%s: failed to read the tensor data\n", __func__);
|
||||||
fclose(file);
|
fclose(file);
|
||||||
ggml_free(ctx_data);
|
ggml_free(ctx_data);
|
||||||
gguf_free(ctx);
|
gguf_free(ctx);
|
||||||
@ -18619,13 +18642,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||||||
ggml_set_no_alloc(ctx_data, params.no_alloc);
|
ggml_set_no_alloc(ctx_data, params.no_alloc);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!ok) {
|
|
||||||
fprintf(stderr, "%s: failed to read tensor data\n", __func__);
|
|
||||||
fclose(file);
|
|
||||||
gguf_free(ctx);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
return ctx;
|
return ctx;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user