mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-19 08:20:10 +01:00
gguf : do not support passing existing ggml_context to gguf_init
This commit is contained in:
parent
860c9c63ce
commit
cb871fa022
51
ggml.c
51
ggml.c
@ -18388,6 +18388,8 @@ static bool gguf_fread_str(void * dst, FILE * file, size_t * offset) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct gguf_context * gguf_init(const char * fname, struct gguf_init_params params) {
|
struct gguf_context * gguf_init(const char * fname, struct gguf_init_params params) {
|
||||||
|
GGML_ASSERT(!params.load || params.malloc || params.ctx != NULL);
|
||||||
|
|
||||||
FILE * file = fopen(fname, "rb");
|
FILE * file = fopen(fname, "rb");
|
||||||
if (!file) {
|
if (!file) {
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -18518,8 +18520,7 @@ struct gguf_context * gguf_init(const char * fname, struct gguf_init_params para
|
|||||||
|
|
||||||
const size_t size_cur = (ne*ggml_type_size(info->type))/ggml_blck_size(info->type);
|
const size_t size_cur = (ne*ggml_type_size(info->type))/ggml_blck_size(info->type);
|
||||||
|
|
||||||
// TODO: pad size_cur to alignment
|
ctx->size_data += GGML_PAD(size_cur, ctx->alignment);
|
||||||
ctx->size_data += size_cur;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: simplify
|
// TODO: simplify
|
||||||
@ -18528,28 +18529,18 @@ struct gguf_context * gguf_init(const char * fname, struct gguf_init_params para
|
|||||||
ctx->data = GGML_ALIGNED_MALLOC(ctx->size_data);
|
ctx->data = GGML_ALIGNED_MALLOC(ctx->size_data);
|
||||||
fseek(file, ctx->offset, SEEK_SET);
|
fseek(file, ctx->offset, SEEK_SET);
|
||||||
ok = ok && gguf_fread_el(ctx->data, ctx->size_data, file, &offset);
|
ok = ok && gguf_fread_el(ctx->data, ctx->size_data, file, &offset);
|
||||||
} else if (params.ctx != NULL) {
|
} else {
|
||||||
bool ctx_new = false;
|
const size_t mem_size =
|
||||||
bool ctx_no_alloc = false;
|
ctx->header.n_tensors*ggml_tensor_overhead() + 1 +
|
||||||
|
ctx->size_data;
|
||||||
|
|
||||||
if (*params.ctx == NULL) {
|
struct ggml_init_params pdata = {
|
||||||
const size_t mem_size =
|
.mem_size = mem_size,
|
||||||
ctx->header.n_tensors*ggml_tensor_overhead() + 1 +
|
.mem_buffer = NULL,
|
||||||
ctx->size_data;
|
.no_alloc = false,
|
||||||
|
};
|
||||||
|
|
||||||
struct ggml_init_params pdata = {
|
*params.ctx = ggml_init(pdata);
|
||||||
.mem_size = mem_size,
|
|
||||||
.mem_buffer = NULL,
|
|
||||||
.no_alloc = false,
|
|
||||||
};
|
|
||||||
|
|
||||||
*params.ctx = ggml_init(pdata);
|
|
||||||
|
|
||||||
ctx_new = true;
|
|
||||||
} else {
|
|
||||||
ctx_no_alloc = ggml_get_no_alloc(*params.ctx);
|
|
||||||
ggml_set_no_alloc(*params.ctx, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct ggml_context * ctx_data = *params.ctx;
|
struct ggml_context * ctx_data = *params.ctx;
|
||||||
|
|
||||||
@ -18561,11 +18552,7 @@ struct gguf_context * gguf_init(const char * fname, struct gguf_init_params para
|
|||||||
if (!ok) {
|
if (!ok) {
|
||||||
fprintf(stderr, "%s: failed to read tensor data\n", __func__);
|
fprintf(stderr, "%s: failed to read tensor data\n", __func__);
|
||||||
fclose(file);
|
fclose(file);
|
||||||
if (ctx_new) {
|
ggml_free(ctx_data);
|
||||||
ggml_free(ctx_data);
|
|
||||||
} else {
|
|
||||||
ggml_set_no_alloc(ctx_data, ctx_no_alloc);
|
|
||||||
}
|
|
||||||
gguf_free(ctx);
|
gguf_free(ctx);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
@ -18597,18 +18584,12 @@ struct gguf_context * gguf_init(const char * fname, struct gguf_init_params para
|
|||||||
if (!ok) {
|
if (!ok) {
|
||||||
fprintf(stderr, "%s: failed to create tensors\n", __func__);
|
fprintf(stderr, "%s: failed to create tensors\n", __func__);
|
||||||
fclose(file);
|
fclose(file);
|
||||||
if (ctx_new) {
|
ggml_free(ctx_data);
|
||||||
ggml_free(ctx_data);
|
|
||||||
} else {
|
|
||||||
ggml_set_no_alloc(ctx_data, ctx_no_alloc);
|
|
||||||
}
|
|
||||||
gguf_free(ctx);
|
gguf_free(ctx);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_set_no_alloc(ctx_data, ctx_no_alloc);
|
ggml_set_no_alloc(ctx_data, false);
|
||||||
} else {
|
|
||||||
GGML_ASSERT("gguf: invalid params - load requires malloc or ctx");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
5
ggml.h
5
ggml.h
@ -1636,9 +1636,8 @@ extern "C" {
|
|||||||
|
|
||||||
struct gguf_init_params {
|
struct gguf_init_params {
|
||||||
bool load; // load the tensor data
|
bool load; // load the tensor data
|
||||||
bool malloc; // if false, use the provided ggml_context to allocate the tensor data
|
bool malloc; // if false, create a ggml_context and allocate the tensor data in it
|
||||||
// it no ggml_context is provided, it will be created
|
// if true, use malloc to allocate the tensor data instead
|
||||||
// if true, use malloc to allocate the tensor data
|
|
||||||
|
|
||||||
struct ggml_context ** ctx;
|
struct ggml_context ** ctx;
|
||||||
};
|
};
|
||||||
|
Loading…
Reference in New Issue
Block a user