mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-24 13:28:50 +01:00
finetune : keep allocs alive until all allocations are done (#4486)
This commit is contained in:
parent
0ffc92d2d2
commit
45668633fd
@ -1620,8 +1620,6 @@ int main(int argc, char ** argv) {
|
|||||||
opt->params.adam.gclip = params.common.adam_gclip;
|
opt->params.adam.gclip = params.common.adam_gclip;
|
||||||
opt->params.adam.eps_f = params.common.adam_eps_f;
|
opt->params.adam.eps_f = params.common.adam_eps_f;
|
||||||
|
|
||||||
ggml_allocr * alloc = NULL;
|
|
||||||
|
|
||||||
printf("%s: init model\n", __func__);
|
printf("%s: init model\n", __func__);
|
||||||
bool existed = load_checkpoint_lora_file(params.common.fn_checkpoint_in, &model, &lora, train);
|
bool existed = load_checkpoint_lora_file(params.common.fn_checkpoint_in, &model, &lora, train);
|
||||||
|
|
||||||
@ -1725,10 +1723,9 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
// allocate input tensors
|
// allocate input tensors
|
||||||
mem_input_data.resize(max_input_size);
|
mem_input_data.resize(max_input_size);
|
||||||
alloc = ggml_allocr_new(mem_input_data.data(), mem_input_data.size(), tensor_alignment);
|
ggml_allocr_t alloc_inps = ggml_allocr_new(mem_input_data.data(), mem_input_data.size(), tensor_alignment);
|
||||||
ggml_allocr_alloc(alloc, tokens_input);
|
ggml_allocr_alloc(alloc_inps, tokens_input);
|
||||||
ggml_allocr_alloc(alloc, target_probs);
|
ggml_allocr_alloc(alloc_inps, target_probs);
|
||||||
ggml_allocr_free(alloc);
|
|
||||||
|
|
||||||
// context for compute tensors without their data
|
// context for compute tensors without their data
|
||||||
const size_t estimated_compute_size_wo_data = (
|
const size_t estimated_compute_size_wo_data = (
|
||||||
@ -1755,7 +1752,7 @@ int main(int argc, char ** argv) {
|
|||||||
// find best evaluation order
|
// find best evaluation order
|
||||||
for (unsigned order = 0; order < (unsigned) GGML_CGRAPH_EVAL_ORDER_COUNT; ++order) {
|
for (unsigned order = 0; order < (unsigned) GGML_CGRAPH_EVAL_ORDER_COUNT; ++order) {
|
||||||
ctx_compute = ggml_init(ctx_compute_params);
|
ctx_compute = ggml_init(ctx_compute_params);
|
||||||
alloc = ggml_allocr_new_measure(tensor_alignment);
|
ggml_allocr_t alloc = ggml_allocr_new_measure(tensor_alignment);
|
||||||
gf = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true);
|
gf = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true);
|
||||||
gf->order = (enum ggml_cgraph_eval_order) order;
|
gf->order = (enum ggml_cgraph_eval_order) order;
|
||||||
gb = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true);
|
gb = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true);
|
||||||
@ -1788,7 +1785,7 @@ int main(int argc, char ** argv) {
|
|||||||
// allocate compute tensors
|
// allocate compute tensors
|
||||||
mem_compute_data.resize(max_compute_size);
|
mem_compute_data.resize(max_compute_size);
|
||||||
ctx_compute = ggml_init(ctx_compute_params);
|
ctx_compute = ggml_init(ctx_compute_params);
|
||||||
alloc = ggml_allocr_new(mem_compute_data.data(), mem_compute_data.size(), tensor_alignment);
|
ggml_allocr_t alloc = ggml_allocr_new(mem_compute_data.data(), mem_compute_data.size(), tensor_alignment);
|
||||||
gf = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true);
|
gf = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true);
|
||||||
gf->order = best_order;
|
gf->order = best_order;
|
||||||
gb = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true);
|
gb = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true);
|
||||||
@ -1804,6 +1801,8 @@ int main(int argc, char ** argv) {
|
|||||||
params.common.use_checkpointing
|
params.common.use_checkpointing
|
||||||
);
|
);
|
||||||
ggml_allocr_free(alloc);
|
ggml_allocr_free(alloc);
|
||||||
|
ggml_allocr_free(alloc_inps);
|
||||||
|
|
||||||
|
|
||||||
// tokenize data
|
// tokenize data
|
||||||
std::vector<llama_token> train_tokens;
|
std::vector<llama_token> train_tokens;
|
||||||
|
Loading…
Reference in New Issue
Block a user