mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 05:48:47 +01:00
finetune : rename feed-forward tensors (w1/w2/w3) (#4839)
* finetune: rename feed-forward tensors (w1/w2/w3) This commit renames the feed-forward tensors w1, w2 and w3 to ffn_gate, ffn_down and ffn_up respectively. The motivation for this change is to make it easier to understand the purpose of the tensors. This also seems to be inline with the names used in the llama_layer struct in llama.cpp. Signed-off-by: Daniel Bevenius <daniel.bevenius@gmail.com> * train-text-from-scratch: rename ff tensors This commit renames the feed-forward tensors w1, w2 and w3 to ffn_gate, ffn_down and ffn_up respectively. The motivation for this change is to make it easier to understand the purpose of the tensors. This also seems to be inline with the names used in the llama_layer struct in llama.cpp Signed-off-by: Daniel Bevenius <daniel.bevenius@gmail.com> --------- Signed-off-by: Daniel Bevenius <daniel.bevenius@gmail.com>
This commit is contained in:
parent
cf45252a7c
commit
263978904c
@ -80,9 +80,9 @@ The LORA rank can be configured for each model tensor type separately with these
|
|||||||
--rank-wk N LORA rank for wk tensor (default 4)
|
--rank-wk N LORA rank for wk tensor (default 4)
|
||||||
--rank-wv N LORA rank for wv tensor (default 4)
|
--rank-wv N LORA rank for wv tensor (default 4)
|
||||||
--rank-wo N LORA rank for wo tensor (default 4)
|
--rank-wo N LORA rank for wo tensor (default 4)
|
||||||
--rank-w1 N LORA rank for w1 tensor (default 4)
|
--rank-ffn_gate N LORA rank for ffn_gate tensor (default 4)
|
||||||
--rank-w2 N LORA rank for w2 tensor (default 4)
|
--rank-ffn_down N LORA rank for ffn_down tensor (default 4)
|
||||||
--rank-w3 N LORA rank for w3 tensor (default 4)
|
--rank-ffn_up N LORA rank for ffn_up tensor (default 4)
|
||||||
```
|
```
|
||||||
|
|
||||||
The LORA rank of 'norm' tensors should always be 1.
|
The LORA rank of 'norm' tensors should always be 1.
|
||||||
|
@ -60,9 +60,9 @@ struct my_llama_layer {
|
|||||||
struct ggml_tensor * ffn_norm;
|
struct ggml_tensor * ffn_norm;
|
||||||
|
|
||||||
// ff
|
// ff
|
||||||
struct ggml_tensor * w1;
|
struct ggml_tensor * ffn_gate; // w1
|
||||||
struct ggml_tensor * w2;
|
struct ggml_tensor * ffn_down; // w2
|
||||||
struct ggml_tensor * w3;
|
struct ggml_tensor * ffn_up; // w3
|
||||||
};
|
};
|
||||||
|
|
||||||
struct my_llama_model {
|
struct my_llama_model {
|
||||||
@ -85,9 +85,9 @@ struct my_llama_lora_hparams {
|
|||||||
uint32_t n_rank_wv = 4;
|
uint32_t n_rank_wv = 4;
|
||||||
uint32_t n_rank_wo = 4;
|
uint32_t n_rank_wo = 4;
|
||||||
uint32_t n_rank_ffn_norm = 1;
|
uint32_t n_rank_ffn_norm = 1;
|
||||||
uint32_t n_rank_w1 = 4;
|
uint32_t n_rank_ffn_gate = 4;
|
||||||
uint32_t n_rank_w2 = 4;
|
uint32_t n_rank_ffn_down = 4;
|
||||||
uint32_t n_rank_w3 = 4;
|
uint32_t n_rank_ffn_up = 4;
|
||||||
uint32_t n_rank_tok_embeddings = 4;
|
uint32_t n_rank_tok_embeddings = 4;
|
||||||
uint32_t n_rank_norm = 1;
|
uint32_t n_rank_norm = 1;
|
||||||
uint32_t n_rank_output = 4;
|
uint32_t n_rank_output = 4;
|
||||||
@ -117,12 +117,12 @@ struct my_llama_lora_layer {
|
|||||||
struct ggml_tensor * ffn_norm_b;
|
struct ggml_tensor * ffn_norm_b;
|
||||||
|
|
||||||
// ff
|
// ff
|
||||||
struct ggml_tensor * w1_a;
|
struct ggml_tensor * ffn_gate_a;
|
||||||
struct ggml_tensor * w1_b;
|
struct ggml_tensor * ffn_gate_b;
|
||||||
struct ggml_tensor * w2_a;
|
struct ggml_tensor * ffn_down_a;
|
||||||
struct ggml_tensor * w2_b;
|
struct ggml_tensor * ffn_down_b;
|
||||||
struct ggml_tensor * w3_a;
|
struct ggml_tensor * ffn_up_a;
|
||||||
struct ggml_tensor * w3_b;
|
struct ggml_tensor * ffn_up_b;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct my_llama_lora {
|
struct my_llama_lora {
|
||||||
@ -208,9 +208,9 @@ static void print_lora_params(struct my_llama_lora_hparams * params) {
|
|||||||
printf("%s: n_rank_wv : %u\n", __func__, params->n_rank_wv);
|
printf("%s: n_rank_wv : %u\n", __func__, params->n_rank_wv);
|
||||||
printf("%s: n_rank_wo : %u\n", __func__, params->n_rank_wo);
|
printf("%s: n_rank_wo : %u\n", __func__, params->n_rank_wo);
|
||||||
printf("%s: n_rank_ffn_norm : %u\n", __func__, params->n_rank_ffn_norm);
|
printf("%s: n_rank_ffn_norm : %u\n", __func__, params->n_rank_ffn_norm);
|
||||||
printf("%s: n_rank_w1 : %u\n", __func__, params->n_rank_w1);
|
printf("%s: n_rank_ffn_gate : %u\n", __func__, params->n_rank_ffn_gate);
|
||||||
printf("%s: n_rank_w2 : %u\n", __func__, params->n_rank_w2);
|
printf("%s: n_rank_ffn_down : %u\n", __func__, params->n_rank_ffn_down);
|
||||||
printf("%s: n_rank_w3 : %u\n", __func__, params->n_rank_w3);
|
printf("%s: n_rank_ffn_up : %u\n", __func__, params->n_rank_ffn_up);
|
||||||
printf("%s: n_rank_tok_embeddings : %u\n", __func__, params->n_rank_tok_embeddings);
|
printf("%s: n_rank_tok_embeddings : %u\n", __func__, params->n_rank_tok_embeddings);
|
||||||
printf("%s: n_rank_norm : %u\n", __func__, params->n_rank_norm);
|
printf("%s: n_rank_norm : %u\n", __func__, params->n_rank_norm);
|
||||||
printf("%s: n_rank_output : %u\n", __func__, params->n_rank_output);
|
printf("%s: n_rank_output : %u\n", __func__, params->n_rank_output);
|
||||||
@ -319,9 +319,9 @@ static void init_model(struct llama_model * input, struct my_llama_model * model
|
|||||||
layer.wv = llama_get_model_tensor(input, tni(LLM_TENSOR_ATTN_V, i));
|
layer.wv = llama_get_model_tensor(input, tni(LLM_TENSOR_ATTN_V, i));
|
||||||
layer.wo = llama_get_model_tensor(input, tni(LLM_TENSOR_ATTN_OUT, i));
|
layer.wo = llama_get_model_tensor(input, tni(LLM_TENSOR_ATTN_OUT, i));
|
||||||
layer.ffn_norm = llama_get_model_tensor(input, tni(LLM_TENSOR_FFN_NORM, i));
|
layer.ffn_norm = llama_get_model_tensor(input, tni(LLM_TENSOR_FFN_NORM, i));
|
||||||
layer.w1 = llama_get_model_tensor(input, tni(LLM_TENSOR_FFN_GATE, i));
|
layer.ffn_gate = llama_get_model_tensor(input, tni(LLM_TENSOR_FFN_GATE, i));
|
||||||
layer.w2 = llama_get_model_tensor(input, tni(LLM_TENSOR_FFN_DOWN, i));
|
layer.ffn_down = llama_get_model_tensor(input, tni(LLM_TENSOR_FFN_DOWN, i));
|
||||||
layer.w3 = llama_get_model_tensor(input, tni(LLM_TENSOR_FFN_UP, i));
|
layer.ffn_up = llama_get_model_tensor(input, tni(LLM_TENSOR_FFN_UP, i));
|
||||||
|
|
||||||
assert_shape_1d(layer.attention_norm, hparams.n_embd);
|
assert_shape_1d(layer.attention_norm, hparams.n_embd);
|
||||||
assert_shape_2d(layer.wq, hparams.n_embd, hparams.n_embd);
|
assert_shape_2d(layer.wq, hparams.n_embd, hparams.n_embd);
|
||||||
@ -329,9 +329,9 @@ static void init_model(struct llama_model * input, struct my_llama_model * model
|
|||||||
assert_shape_2d(layer.wv, hparams.n_embd, hparams.n_embd_gqa());
|
assert_shape_2d(layer.wv, hparams.n_embd, hparams.n_embd_gqa());
|
||||||
assert_shape_2d(layer.wo, hparams.n_embd, hparams.n_embd);
|
assert_shape_2d(layer.wo, hparams.n_embd, hparams.n_embd);
|
||||||
assert_shape_1d(layer.ffn_norm, hparams.n_embd);
|
assert_shape_1d(layer.ffn_norm, hparams.n_embd);
|
||||||
assert_shape_2d(layer.w1, hparams.n_embd, hparams.n_ff);
|
assert_shape_2d(layer.ffn_gate, hparams.n_embd, hparams.n_ff);
|
||||||
assert_shape_2d(layer.w2, hparams.n_ff, hparams.n_embd);
|
assert_shape_2d(layer.ffn_down, hparams.n_ff, hparams.n_embd);
|
||||||
assert_shape_2d(layer.w3, hparams.n_embd, hparams.n_ff);
|
assert_shape_2d(layer.ffn_up, hparams.n_embd, hparams.n_ff);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -362,12 +362,12 @@ static void set_param_lora(struct my_llama_lora * lora) {
|
|||||||
ggml_set_param(ctx, layer.wo_b);
|
ggml_set_param(ctx, layer.wo_b);
|
||||||
ggml_set_param(ctx, layer.ffn_norm_a);
|
ggml_set_param(ctx, layer.ffn_norm_a);
|
||||||
ggml_set_param(ctx, layer.ffn_norm_b);
|
ggml_set_param(ctx, layer.ffn_norm_b);
|
||||||
ggml_set_param(ctx, layer.w1_a);
|
ggml_set_param(ctx, layer.ffn_gate_a);
|
||||||
ggml_set_param(ctx, layer.w1_b);
|
ggml_set_param(ctx, layer.ffn_gate_b);
|
||||||
ggml_set_param(ctx, layer.w2_a);
|
ggml_set_param(ctx, layer.ffn_down_a);
|
||||||
ggml_set_param(ctx, layer.w2_b);
|
ggml_set_param(ctx, layer.ffn_down_b);
|
||||||
ggml_set_param(ctx, layer.w3_a);
|
ggml_set_param(ctx, layer.ffn_up_a);
|
||||||
ggml_set_param(ctx, layer.w3_b);
|
ggml_set_param(ctx, layer.ffn_up_b);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -435,12 +435,12 @@ static void init_lora(const struct my_llama_model * model, struct my_llama_lora
|
|||||||
layer.ffn_norm_a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, lparams.n_rank_ffn_norm, n_embd);
|
layer.ffn_norm_a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, lparams.n_rank_ffn_norm, n_embd);
|
||||||
layer.ffn_norm_b = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, lparams.n_rank_ffn_norm, 1);
|
layer.ffn_norm_b = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, lparams.n_rank_ffn_norm, 1);
|
||||||
|
|
||||||
layer.w1_a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, lparams.n_rank_w1, n_embd);
|
layer.ffn_gate_a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, lparams.n_rank_ffn_gate, n_embd);
|
||||||
layer.w1_b = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, lparams.n_rank_w1, n_ff);
|
layer.ffn_gate_b = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, lparams.n_rank_ffn_gate, n_ff);
|
||||||
layer.w2_a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, lparams.n_rank_w2, n_ff);
|
layer.ffn_down_a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, lparams.n_rank_ffn_down, n_ff);
|
||||||
layer.w2_b = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, lparams.n_rank_w2, n_embd);
|
layer.ffn_down_b = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, lparams.n_rank_ffn_down, n_embd);
|
||||||
layer.w3_a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, lparams.n_rank_w3, n_embd);
|
layer.ffn_up_a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, lparams.n_rank_ffn_up, n_embd);
|
||||||
layer.w3_b = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, lparams.n_rank_w3, n_ff);
|
layer.ffn_up_b = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, lparams.n_rank_ffn_up, n_ff);
|
||||||
|
|
||||||
ggml_set_name(layer.attention_norm_a, tni(LLM_TENSOR_ATTN_NORM, ".weight.lora_a", i));
|
ggml_set_name(layer.attention_norm_a, tni(LLM_TENSOR_ATTN_NORM, ".weight.lora_a", i));
|
||||||
ggml_set_name(layer.attention_norm_b, tni(LLM_TENSOR_ATTN_NORM, ".weight.lora_b", i));
|
ggml_set_name(layer.attention_norm_b, tni(LLM_TENSOR_ATTN_NORM, ".weight.lora_b", i));
|
||||||
@ -454,12 +454,12 @@ static void init_lora(const struct my_llama_model * model, struct my_llama_lora
|
|||||||
ggml_set_name(layer.wo_b, tni(LLM_TENSOR_ATTN_OUT, ".weight.lora_b", i));
|
ggml_set_name(layer.wo_b, tni(LLM_TENSOR_ATTN_OUT, ".weight.lora_b", i));
|
||||||
ggml_set_name(layer.ffn_norm_a, tni(LLM_TENSOR_FFN_NORM, ".weight.lora_a", i));
|
ggml_set_name(layer.ffn_norm_a, tni(LLM_TENSOR_FFN_NORM, ".weight.lora_a", i));
|
||||||
ggml_set_name(layer.ffn_norm_b, tni(LLM_TENSOR_FFN_NORM, ".weight.lora_b", i));
|
ggml_set_name(layer.ffn_norm_b, tni(LLM_TENSOR_FFN_NORM, ".weight.lora_b", i));
|
||||||
ggml_set_name(layer.w1_a, tni(LLM_TENSOR_FFN_GATE, ".weight.lora_a", i));
|
ggml_set_name(layer.ffn_gate_a, tni(LLM_TENSOR_FFN_GATE, ".weight.lora_a", i));
|
||||||
ggml_set_name(layer.w1_b, tni(LLM_TENSOR_FFN_GATE, ".weight.lora_b", i));
|
ggml_set_name(layer.ffn_gate_b, tni(LLM_TENSOR_FFN_GATE, ".weight.lora_b", i));
|
||||||
ggml_set_name(layer.w2_a, tni(LLM_TENSOR_FFN_DOWN, ".weight.lora_a", i));
|
ggml_set_name(layer.ffn_down_a, tni(LLM_TENSOR_FFN_DOWN, ".weight.lora_a", i));
|
||||||
ggml_set_name(layer.w2_b, tni(LLM_TENSOR_FFN_DOWN, ".weight.lora_b", i));
|
ggml_set_name(layer.ffn_down_b, tni(LLM_TENSOR_FFN_DOWN, ".weight.lora_b", i));
|
||||||
ggml_set_name(layer.w3_a, tni(LLM_TENSOR_FFN_UP, ".weight.lora_a", i));
|
ggml_set_name(layer.ffn_up_a, tni(LLM_TENSOR_FFN_UP, ".weight.lora_a", i));
|
||||||
ggml_set_name(layer.w3_b, tni(LLM_TENSOR_FFN_UP, ".weight.lora_b", i));
|
ggml_set_name(layer.ffn_up_b, tni(LLM_TENSOR_FFN_UP, ".weight.lora_b", i));
|
||||||
}
|
}
|
||||||
|
|
||||||
set_param_lora(lora);
|
set_param_lora(lora);
|
||||||
@ -497,12 +497,12 @@ static void randomize_lora(struct my_llama_lora * lora, int seed, float mean, fl
|
|||||||
randomize_tensor_normal(layer.ffn_norm_a, rnd);
|
randomize_tensor_normal(layer.ffn_norm_a, rnd);
|
||||||
ggml_set_zero(layer.ffn_norm_b);
|
ggml_set_zero(layer.ffn_norm_b);
|
||||||
|
|
||||||
randomize_tensor_normal(layer.w1_a, rnd);
|
randomize_tensor_normal(layer.ffn_gate_a, rnd);
|
||||||
ggml_set_zero(layer.w1_b);
|
ggml_set_zero(layer.ffn_gate_b);
|
||||||
randomize_tensor_normal(layer.w2_a, rnd);
|
randomize_tensor_normal(layer.ffn_down_a, rnd);
|
||||||
ggml_set_zero(layer.w2_b);
|
ggml_set_zero(layer.ffn_down_b);
|
||||||
randomize_tensor_normal(layer.w3_a, rnd);
|
randomize_tensor_normal(layer.ffn_up_a, rnd);
|
||||||
ggml_set_zero(layer.w3_b);
|
ggml_set_zero(layer.ffn_up_b);
|
||||||
}
|
}
|
||||||
|
|
||||||
free_random_normal_distribution(rnd);
|
free_random_normal_distribution(rnd);
|
||||||
@ -610,13 +610,13 @@ static struct ggml_tensor * llama_build_lora_finetune_graphs(
|
|||||||
|
|
||||||
struct ggml_tensor * attention_norm = add_to_f32(ctx, layer.attention_norm, ggml_mul_mat(ctx, llayer.attention_norm_a, llayer.attention_norm_b));
|
struct ggml_tensor * attention_norm = add_to_f32(ctx, layer.attention_norm, ggml_mul_mat(ctx, llayer.attention_norm_a, llayer.attention_norm_b));
|
||||||
struct ggml_tensor * ffn_norm = add_to_f32(ctx, layer.ffn_norm, ggml_mul_mat(ctx, llayer.ffn_norm_a, llayer.ffn_norm_b));
|
struct ggml_tensor * ffn_norm = add_to_f32(ctx, layer.ffn_norm, ggml_mul_mat(ctx, llayer.ffn_norm_a, llayer.ffn_norm_b));
|
||||||
struct ggml_tensor * wq = add_to_f32(ctx, layer.wq, ggml_mul_mat(ctx, llayer.wq_a, llayer.wq_b));
|
struct ggml_tensor * wq = add_to_f32(ctx, layer.wq, ggml_mul_mat(ctx, llayer.wq_a, llayer.wq_b));
|
||||||
struct ggml_tensor * wk = add_to_f32(ctx, layer.wk, ggml_mul_mat(ctx, llayer.wk_a, llayer.wk_b));
|
struct ggml_tensor * wk = add_to_f32(ctx, layer.wk, ggml_mul_mat(ctx, llayer.wk_a, llayer.wk_b));
|
||||||
struct ggml_tensor * wv = add_to_f32(ctx, layer.wv, ggml_mul_mat(ctx, llayer.wv_a, llayer.wv_b));
|
struct ggml_tensor * wv = add_to_f32(ctx, layer.wv, ggml_mul_mat(ctx, llayer.wv_a, llayer.wv_b));
|
||||||
struct ggml_tensor * wo = add_to_f32(ctx, layer.wo, ggml_mul_mat(ctx, llayer.wo_a, llayer.wo_b));
|
struct ggml_tensor * wo = add_to_f32(ctx, layer.wo, ggml_mul_mat(ctx, llayer.wo_a, llayer.wo_b));
|
||||||
struct ggml_tensor * w1 = add_to_f32(ctx, layer.w1, ggml_mul_mat(ctx, llayer.w1_a, llayer.w1_b));
|
struct ggml_tensor * ffn_gate = add_to_f32(ctx, layer.ffn_gate, ggml_mul_mat(ctx, llayer.ffn_gate_a, llayer.ffn_gate_b));
|
||||||
struct ggml_tensor * w2 = add_to_f32(ctx, layer.w2, ggml_mul_mat(ctx, llayer.w2_a, llayer.w2_b));
|
struct ggml_tensor * ffn_down = add_to_f32(ctx, layer.ffn_down, ggml_mul_mat(ctx, llayer.ffn_down_a, llayer.ffn_down_b));
|
||||||
struct ggml_tensor * w3 = add_to_f32(ctx, layer.w3, ggml_mul_mat(ctx, llayer.w3_a, llayer.w3_b));
|
struct ggml_tensor * ffn_up = add_to_f32(ctx, layer.ffn_up, ggml_mul_mat(ctx, llayer.ffn_up_a, llayer.ffn_up_b));
|
||||||
|
|
||||||
struct ggml_tensor * t02 = ggml_rms_norm (ctx, cur, rms_norm_eps); set_name(t02, "t02"); assert_shape_2d(t02, n_embd, N*n_batch);
|
struct ggml_tensor * t02 = ggml_rms_norm (ctx, cur, rms_norm_eps); set_name(t02, "t02"); assert_shape_2d(t02, n_embd, N*n_batch);
|
||||||
struct ggml_tensor * t03 = ggml_repeat (ctx, attention_norm, t02); set_name(t03, "t03"); assert_shape_2d(t03, n_embd, N*n_batch);
|
struct ggml_tensor * t03 = ggml_repeat (ctx, attention_norm, t02); set_name(t03, "t03"); assert_shape_2d(t03, n_embd, N*n_batch);
|
||||||
@ -659,11 +659,11 @@ static struct ggml_tensor * llama_build_lora_finetune_graphs(
|
|||||||
struct ggml_tensor * t22 = ggml_rms_norm (ctx, t21, rms_norm_eps); set_name(t22, "t22"); assert_shape_2d(t22, n_embd, N*n_batch);
|
struct ggml_tensor * t22 = ggml_rms_norm (ctx, t21, rms_norm_eps); set_name(t22, "t22"); assert_shape_2d(t22, n_embd, N*n_batch);
|
||||||
struct ggml_tensor * t23 = ggml_repeat (ctx, ffn_norm, t22); set_name(t23, "t23"); assert_shape_2d(t23, n_embd, N*n_batch);
|
struct ggml_tensor * t23 = ggml_repeat (ctx, ffn_norm, t22); set_name(t23, "t23"); assert_shape_2d(t23, n_embd, N*n_batch);
|
||||||
struct ggml_tensor * t24 = ggml_mul (ctx, t23, t22); set_name(t24, "t24"); assert_shape_2d(t24, n_embd, N*n_batch);
|
struct ggml_tensor * t24 = ggml_mul (ctx, t23, t22); set_name(t24, "t24"); assert_shape_2d(t24, n_embd, N*n_batch);
|
||||||
struct ggml_tensor * t25 = ggml_mul_mat (ctx, w3, t24); set_name(t25, "t25"); assert_shape_2d(t25, n_ff, N*n_batch);
|
struct ggml_tensor * t25 = ggml_mul_mat (ctx, ffn_up, t24); set_name(t25, "t25"); assert_shape_2d(t25, n_ff, N*n_batch);
|
||||||
struct ggml_tensor * t26 = ggml_mul_mat (ctx, w1, t24); set_name(t26, "t26"); assert_shape_2d(t26, n_ff, N*n_batch);
|
struct ggml_tensor * t26 = ggml_mul_mat (ctx, ffn_gate, t24); set_name(t26, "t26"); assert_shape_2d(t26, n_ff, N*n_batch);
|
||||||
struct ggml_tensor * t27 = ggml_silu (ctx, t26); set_name(t27, "t27"); assert_shape_2d(t27, n_ff, N*n_batch);
|
struct ggml_tensor * t27 = ggml_silu (ctx, t26); set_name(t27, "t27"); assert_shape_2d(t27, n_ff, N*n_batch);
|
||||||
struct ggml_tensor * t28 = ggml_mul (ctx, t27, t25); set_name(t28, "t28"); assert_shape_2d(t28, n_ff, N*n_batch);
|
struct ggml_tensor * t28 = ggml_mul (ctx, t27, t25); set_name(t28, "t28"); assert_shape_2d(t28, n_ff, N*n_batch);
|
||||||
struct ggml_tensor * t29 = ggml_mul_mat (ctx, w2, t28); set_name(t29, "t29"); assert_shape_2d(t29, n_embd, N*n_batch);
|
struct ggml_tensor * t29 = ggml_mul_mat (ctx, ffn_down, t28); set_name(t29, "t29"); assert_shape_2d(t29, n_embd, N*n_batch);
|
||||||
struct ggml_tensor * t30 = ggml_add (ctx, t29, t21); set_name(t30, "t30"); assert_shape_2d(t30, n_embd, N*n_batch);
|
struct ggml_tensor * t30 = ggml_add (ctx, t29, t21); set_name(t30, "t30"); assert_shape_2d(t30, n_embd, N*n_batch);
|
||||||
cur = t30;
|
cur = t30;
|
||||||
if (enable_checkpointing) {
|
if (enable_checkpointing) {
|
||||||
@ -723,9 +723,9 @@ static struct ggml_tensor * llama_build_lora_finetune_graphs(
|
|||||||
ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, layer.wk, 1.0f));
|
ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, layer.wk, 1.0f));
|
||||||
ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, layer.wv, 1.0f));
|
ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, layer.wv, 1.0f));
|
||||||
ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, layer.wo, 1.0f));
|
ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, layer.wo, 1.0f));
|
||||||
ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, layer.w1, 1.0f));
|
ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, layer.ffn_gate, 1.0f));
|
||||||
ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, layer.w2, 1.0f));
|
ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, layer.ffn_down, 1.0f));
|
||||||
ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, layer.w3, 1.0f));
|
ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, layer.ffn_up, 1.0f));
|
||||||
}
|
}
|
||||||
|
|
||||||
// allocating checkpoints in one block to reduce memory fragmentation
|
// allocating checkpoints in one block to reduce memory fragmentation
|
||||||
@ -798,9 +798,9 @@ static void load_llama_lora_gguf(struct gguf_context * fctx, struct ggml_context
|
|||||||
GGUF_GET_KEY(fctx, lora->hparams.n_rank_wv, gguf_get_val_u32, GGUF_TYPE_UINT32, true, LLM_KV_TRAINING_LORA_RANK_ATTN_V);
|
GGUF_GET_KEY(fctx, lora->hparams.n_rank_wv, gguf_get_val_u32, GGUF_TYPE_UINT32, true, LLM_KV_TRAINING_LORA_RANK_ATTN_V);
|
||||||
GGUF_GET_KEY(fctx, lora->hparams.n_rank_wo, gguf_get_val_u32, GGUF_TYPE_UINT32, true, LLM_KV_TRAINING_LORA_RANK_ATTN_OUT);
|
GGUF_GET_KEY(fctx, lora->hparams.n_rank_wo, gguf_get_val_u32, GGUF_TYPE_UINT32, true, LLM_KV_TRAINING_LORA_RANK_ATTN_OUT);
|
||||||
GGUF_GET_KEY(fctx, lora->hparams.n_rank_ffn_norm, gguf_get_val_u32, GGUF_TYPE_UINT32, true, LLM_KV_TRAINING_LORA_RANK_FFN_NORM);
|
GGUF_GET_KEY(fctx, lora->hparams.n_rank_ffn_norm, gguf_get_val_u32, GGUF_TYPE_UINT32, true, LLM_KV_TRAINING_LORA_RANK_FFN_NORM);
|
||||||
GGUF_GET_KEY(fctx, lora->hparams.n_rank_w1, gguf_get_val_u32, GGUF_TYPE_UINT32, true, LLM_KV_TRAINING_LORA_RANK_FFN_GATE);
|
GGUF_GET_KEY(fctx, lora->hparams.n_rank_ffn_gate, gguf_get_val_u32, GGUF_TYPE_UINT32, true, LLM_KV_TRAINING_LORA_RANK_FFN_GATE);
|
||||||
GGUF_GET_KEY(fctx, lora->hparams.n_rank_w2, gguf_get_val_u32, GGUF_TYPE_UINT32, true, LLM_KV_TRAINING_LORA_RANK_FFN_DOWN);
|
GGUF_GET_KEY(fctx, lora->hparams.n_rank_ffn_down, gguf_get_val_u32, GGUF_TYPE_UINT32, true, LLM_KV_TRAINING_LORA_RANK_FFN_DOWN);
|
||||||
GGUF_GET_KEY(fctx, lora->hparams.n_rank_w3, gguf_get_val_u32, GGUF_TYPE_UINT32, true, LLM_KV_TRAINING_LORA_RANK_FFN_UP);
|
GGUF_GET_KEY(fctx, lora->hparams.n_rank_ffn_up, gguf_get_val_u32, GGUF_TYPE_UINT32, true, LLM_KV_TRAINING_LORA_RANK_FFN_UP);
|
||||||
|
|
||||||
init_lora(model, lora);
|
init_lora(model, lora);
|
||||||
|
|
||||||
@ -825,12 +825,12 @@ static void load_llama_lora_gguf(struct gguf_context * fctx, struct ggml_context
|
|||||||
copy_tensor_by_name(layer.wo_b, f_ggml_ctx, ggml_get_name(layer.wo_b));
|
copy_tensor_by_name(layer.wo_b, f_ggml_ctx, ggml_get_name(layer.wo_b));
|
||||||
copy_tensor_by_name(layer.ffn_norm_a, f_ggml_ctx, ggml_get_name(layer.ffn_norm_a));
|
copy_tensor_by_name(layer.ffn_norm_a, f_ggml_ctx, ggml_get_name(layer.ffn_norm_a));
|
||||||
copy_tensor_by_name(layer.ffn_norm_b, f_ggml_ctx, ggml_get_name(layer.ffn_norm_b));
|
copy_tensor_by_name(layer.ffn_norm_b, f_ggml_ctx, ggml_get_name(layer.ffn_norm_b));
|
||||||
copy_tensor_by_name(layer.w1_a, f_ggml_ctx, ggml_get_name(layer.w1_a));
|
copy_tensor_by_name(layer.ffn_gate_a, f_ggml_ctx, ggml_get_name(layer.ffn_gate_a));
|
||||||
copy_tensor_by_name(layer.w1_b, f_ggml_ctx, ggml_get_name(layer.w1_b));
|
copy_tensor_by_name(layer.ffn_gate_b, f_ggml_ctx, ggml_get_name(layer.ffn_gate_b));
|
||||||
copy_tensor_by_name(layer.w2_a, f_ggml_ctx, ggml_get_name(layer.w2_a));
|
copy_tensor_by_name(layer.ffn_down_a, f_ggml_ctx, ggml_get_name(layer.ffn_down_a));
|
||||||
copy_tensor_by_name(layer.w2_b, f_ggml_ctx, ggml_get_name(layer.w2_b));
|
copy_tensor_by_name(layer.ffn_down_b, f_ggml_ctx, ggml_get_name(layer.ffn_down_b));
|
||||||
copy_tensor_by_name(layer.w3_a, f_ggml_ctx, ggml_get_name(layer.w3_a));
|
copy_tensor_by_name(layer.ffn_up_a, f_ggml_ctx, ggml_get_name(layer.ffn_up_a));
|
||||||
copy_tensor_by_name(layer.w3_b, f_ggml_ctx, ggml_get_name(layer.w3_b));
|
copy_tensor_by_name(layer.ffn_up_b, f_ggml_ctx, ggml_get_name(layer.ffn_up_b));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -868,9 +868,9 @@ static void save_llama_lora_gguf(struct gguf_context * fctx, struct my_llama_mod
|
|||||||
gguf_set_val_u32(fctx, LLM_KV_TRAINING_LORA_RANK_ATTN_V, lora->hparams.n_rank_wv);
|
gguf_set_val_u32(fctx, LLM_KV_TRAINING_LORA_RANK_ATTN_V, lora->hparams.n_rank_wv);
|
||||||
gguf_set_val_u32(fctx, LLM_KV_TRAINING_LORA_RANK_ATTN_OUT, lora->hparams.n_rank_wo);
|
gguf_set_val_u32(fctx, LLM_KV_TRAINING_LORA_RANK_ATTN_OUT, lora->hparams.n_rank_wo);
|
||||||
gguf_set_val_u32(fctx, LLM_KV_TRAINING_LORA_RANK_FFN_NORM, lora->hparams.n_rank_ffn_norm);
|
gguf_set_val_u32(fctx, LLM_KV_TRAINING_LORA_RANK_FFN_NORM, lora->hparams.n_rank_ffn_norm);
|
||||||
gguf_set_val_u32(fctx, LLM_KV_TRAINING_LORA_RANK_FFN_GATE, lora->hparams.n_rank_w1);
|
gguf_set_val_u32(fctx, LLM_KV_TRAINING_LORA_RANK_FFN_GATE, lora->hparams.n_rank_ffn_gate);
|
||||||
gguf_set_val_u32(fctx, LLM_KV_TRAINING_LORA_RANK_FFN_DOWN, lora->hparams.n_rank_w2);
|
gguf_set_val_u32(fctx, LLM_KV_TRAINING_LORA_RANK_FFN_DOWN, lora->hparams.n_rank_ffn_down);
|
||||||
gguf_set_val_u32(fctx, LLM_KV_TRAINING_LORA_RANK_FFN_UP, lora->hparams.n_rank_w3);
|
gguf_set_val_u32(fctx, LLM_KV_TRAINING_LORA_RANK_FFN_UP, lora->hparams.n_rank_ffn_up);
|
||||||
|
|
||||||
gguf_add_tensor(fctx, lora->tok_embeddings_a);
|
gguf_add_tensor(fctx, lora->tok_embeddings_a);
|
||||||
gguf_add_tensor(fctx, lora->tok_embeddings_b);
|
gguf_add_tensor(fctx, lora->tok_embeddings_b);
|
||||||
@ -894,12 +894,12 @@ static void save_llama_lora_gguf(struct gguf_context * fctx, struct my_llama_mod
|
|||||||
gguf_add_tensor(fctx, layer.wo_b);
|
gguf_add_tensor(fctx, layer.wo_b);
|
||||||
gguf_add_tensor(fctx, layer.ffn_norm_a);
|
gguf_add_tensor(fctx, layer.ffn_norm_a);
|
||||||
gguf_add_tensor(fctx, layer.ffn_norm_b);
|
gguf_add_tensor(fctx, layer.ffn_norm_b);
|
||||||
gguf_add_tensor(fctx, layer.w1_a);
|
gguf_add_tensor(fctx, layer.ffn_gate_a);
|
||||||
gguf_add_tensor(fctx, layer.w1_b);
|
gguf_add_tensor(fctx, layer.ffn_gate_b);
|
||||||
gguf_add_tensor(fctx, layer.w2_a);
|
gguf_add_tensor(fctx, layer.ffn_down_a);
|
||||||
gguf_add_tensor(fctx, layer.w2_b);
|
gguf_add_tensor(fctx, layer.ffn_down_b);
|
||||||
gguf_add_tensor(fctx, layer.w3_a);
|
gguf_add_tensor(fctx, layer.ffn_up_a);
|
||||||
gguf_add_tensor(fctx, layer.w3_b);
|
gguf_add_tensor(fctx, layer.ffn_up_b);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1104,12 +1104,12 @@ static void save_as_llama_lora(const char * filename, struct my_llama_lora * lor
|
|||||||
write_tensor(&file, layer.wo_b, tni(LLM_TENSOR_ATTN_OUT, i, ".weight.loraB"));
|
write_tensor(&file, layer.wo_b, tni(LLM_TENSOR_ATTN_OUT, i, ".weight.loraB"));
|
||||||
write_tensor(&file, layer.ffn_norm_a, tni(LLM_TENSOR_FFN_NORM, i, ".weight.loraA"));
|
write_tensor(&file, layer.ffn_norm_a, tni(LLM_TENSOR_FFN_NORM, i, ".weight.loraA"));
|
||||||
write_tensor(&file, layer.ffn_norm_b, tni(LLM_TENSOR_FFN_NORM, i, ".weight.loraB"));
|
write_tensor(&file, layer.ffn_norm_b, tni(LLM_TENSOR_FFN_NORM, i, ".weight.loraB"));
|
||||||
write_tensor(&file, layer.w1_a, tni(LLM_TENSOR_FFN_GATE, i, ".weight.loraA"));
|
write_tensor(&file, layer.ffn_gate_a, tni(LLM_TENSOR_FFN_GATE, i, ".weight.loraA"));
|
||||||
write_tensor(&file, layer.w1_b, tni(LLM_TENSOR_FFN_GATE, i, ".weight.loraB"));
|
write_tensor(&file, layer.ffn_gate_b, tni(LLM_TENSOR_FFN_GATE, i, ".weight.loraB"));
|
||||||
write_tensor(&file, layer.w2_a, tni(LLM_TENSOR_FFN_DOWN, i, ".weight.loraA"));
|
write_tensor(&file, layer.ffn_down_a, tni(LLM_TENSOR_FFN_DOWN, i, ".weight.loraA"));
|
||||||
write_tensor(&file, layer.w2_b, tni(LLM_TENSOR_FFN_DOWN, i, ".weight.loraB"));
|
write_tensor(&file, layer.ffn_down_b, tni(LLM_TENSOR_FFN_DOWN, i, ".weight.loraB"));
|
||||||
write_tensor(&file, layer.w3_a, tni(LLM_TENSOR_FFN_UP, i, ".weight.loraA"));
|
write_tensor(&file, layer.ffn_up_a, tni(LLM_TENSOR_FFN_UP, i, ".weight.loraA"));
|
||||||
write_tensor(&file, layer.w3_b, tni(LLM_TENSOR_FFN_UP, i, ".weight.loraB"));
|
write_tensor(&file, layer.ffn_up_b, tni(LLM_TENSOR_FFN_UP, i, ".weight.loraB"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1139,9 +1139,9 @@ struct train_params {
|
|||||||
uint32_t n_rank_wv;
|
uint32_t n_rank_wv;
|
||||||
uint32_t n_rank_wo;
|
uint32_t n_rank_wo;
|
||||||
uint32_t n_rank_ffn_norm;
|
uint32_t n_rank_ffn_norm;
|
||||||
uint32_t n_rank_w1;
|
uint32_t n_rank_ffn_gate;
|
||||||
uint32_t n_rank_w2;
|
uint32_t n_rank_ffn_down;
|
||||||
uint32_t n_rank_w3;
|
uint32_t n_rank_ffn_up;
|
||||||
uint32_t n_rank_tok_embeddings;
|
uint32_t n_rank_tok_embeddings;
|
||||||
uint32_t n_rank_norm;
|
uint32_t n_rank_norm;
|
||||||
uint32_t n_rank_output;
|
uint32_t n_rank_output;
|
||||||
@ -1152,9 +1152,9 @@ struct train_params {
|
|||||||
bool custom_n_rank_wv;
|
bool custom_n_rank_wv;
|
||||||
bool custom_n_rank_wo;
|
bool custom_n_rank_wo;
|
||||||
bool custom_n_rank_ffn_norm;
|
bool custom_n_rank_ffn_norm;
|
||||||
bool custom_n_rank_w1;
|
bool custom_n_rank_ffn_gate;
|
||||||
bool custom_n_rank_w2;
|
bool custom_n_rank_ffn_down;
|
||||||
bool custom_n_rank_w3;
|
bool custom_n_rank_ffn_up;
|
||||||
bool custom_n_rank_tok_embeddings;
|
bool custom_n_rank_tok_embeddings;
|
||||||
bool custom_n_rank_norm;
|
bool custom_n_rank_norm;
|
||||||
bool custom_n_rank_output;
|
bool custom_n_rank_output;
|
||||||
@ -1186,9 +1186,9 @@ static struct train_params get_default_train_params() {
|
|||||||
params.n_rank_wv = 4;
|
params.n_rank_wv = 4;
|
||||||
params.n_rank_wo = 4;
|
params.n_rank_wo = 4;
|
||||||
params.n_rank_ffn_norm = 1;
|
params.n_rank_ffn_norm = 1;
|
||||||
params.n_rank_w1 = 4;
|
params.n_rank_ffn_gate = 4;
|
||||||
params.n_rank_w2 = 4;
|
params.n_rank_ffn_down = 4;
|
||||||
params.n_rank_w3 = 4;
|
params.n_rank_ffn_up = 4;
|
||||||
params.n_rank_tok_embeddings = 4;
|
params.n_rank_tok_embeddings = 4;
|
||||||
params.n_rank_norm = 1;
|
params.n_rank_norm = 1;
|
||||||
params.n_rank_output = 4;
|
params.n_rank_output = 4;
|
||||||
@ -1199,9 +1199,9 @@ static struct train_params get_default_train_params() {
|
|||||||
params.custom_n_rank_wv = false;
|
params.custom_n_rank_wv = false;
|
||||||
params.custom_n_rank_wo = false;
|
params.custom_n_rank_wo = false;
|
||||||
params.custom_n_rank_ffn_norm = false;
|
params.custom_n_rank_ffn_norm = false;
|
||||||
params.custom_n_rank_w1 = false;
|
params.custom_n_rank_ffn_gate = false;
|
||||||
params.custom_n_rank_w2 = false;
|
params.custom_n_rank_ffn_down = false;
|
||||||
params.custom_n_rank_w3 = false;
|
params.custom_n_rank_ffn_up = false;
|
||||||
params.custom_n_rank_tok_embeddings = false;
|
params.custom_n_rank_tok_embeddings = false;
|
||||||
params.custom_n_rank_norm = false;
|
params.custom_n_rank_norm = false;
|
||||||
params.custom_n_rank_output = false;
|
params.custom_n_rank_output = false;
|
||||||
@ -1232,9 +1232,9 @@ static void train_print_usage(int argc, char ** argv, const struct train_params
|
|||||||
fprintf(stderr, " --rank-wk N LORA rank for wk tensor, overrides default rank.\n");
|
fprintf(stderr, " --rank-wk N LORA rank for wk tensor, overrides default rank.\n");
|
||||||
fprintf(stderr, " --rank-wv N LORA rank for wv tensor, overrides default rank.\n");
|
fprintf(stderr, " --rank-wv N LORA rank for wv tensor, overrides default rank.\n");
|
||||||
fprintf(stderr, " --rank-wo N LORA rank for wo tensor, overrides default rank.\n");
|
fprintf(stderr, " --rank-wo N LORA rank for wo tensor, overrides default rank.\n");
|
||||||
fprintf(stderr, " --rank-w1 N LORA rank for w1 tensor, overrides default rank.\n");
|
fprintf(stderr, " --rank-ffn_gate N LORA rank for ffn_gate tensor, overrides default rank.\n");
|
||||||
fprintf(stderr, " --rank-w2 N LORA rank for w2 tensor, overrides default rank.\n");
|
fprintf(stderr, " --rank-ffn_down N LORA rank for ffn_down tensor, overrides default rank.\n");
|
||||||
fprintf(stderr, " --rank-w3 N LORA rank for w3 tensor, overrides default rank.\n");
|
fprintf(stderr, " --rank-ffn_up N LORA rank for ffn_up tensor, overrides default rank.\n");
|
||||||
|
|
||||||
print_common_train_usage(argc, argv, ¶ms->common);
|
print_common_train_usage(argc, argv, ¶ms->common);
|
||||||
}
|
}
|
||||||
@ -1369,27 +1369,27 @@ static bool train_params_parse(int argc, char ** argv, struct train_params * par
|
|||||||
}
|
}
|
||||||
params->n_rank_wo = std::stoi(argv[i]);
|
params->n_rank_wo = std::stoi(argv[i]);
|
||||||
params->custom_n_rank_wo = true;
|
params->custom_n_rank_wo = true;
|
||||||
} else if (arg == "--rank-w1") {
|
} else if (arg == "--rank-ffn_gate") {
|
||||||
if (++i >= argc) {
|
if (++i >= argc) {
|
||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
params->n_rank_w1 = std::stoi(argv[i]);
|
params->n_rank_ffn_gate = std::stoi(argv[i]);
|
||||||
params->custom_n_rank_w1 = true;
|
params->custom_n_rank_ffn_gate = true;
|
||||||
} else if (arg == "--rank-w2") {
|
} else if (arg == "--rank-ffn_down") {
|
||||||
if (++i >= argc) {
|
if (++i >= argc) {
|
||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
params->n_rank_w2 = std::stoi(argv[i]);
|
params->n_rank_ffn_down = std::stoi(argv[i]);
|
||||||
params->custom_n_rank_w2 = true;
|
params->custom_n_rank_ffn_down = true;
|
||||||
} else if (arg == "--rank-w3") {
|
} else if (arg == "--rank-ffn_up") {
|
||||||
if (++i >= argc) {
|
if (++i >= argc) {
|
||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
params->n_rank_w3 = std::stoi(argv[i]);
|
params->n_rank_ffn_up = std::stoi(argv[i]);
|
||||||
params->custom_n_rank_w3 = true;
|
params->custom_n_rank_ffn_up = true;
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
||||||
train_print_usage(argc, argv, &default_params);
|
train_print_usage(argc, argv, &default_params);
|
||||||
@ -1452,12 +1452,12 @@ static int64_t get_parameter_count(struct my_llama_lora* lora) {
|
|||||||
nx += ggml_nelements(layer.wo_b);
|
nx += ggml_nelements(layer.wo_b);
|
||||||
nx += ggml_nelements(layer.ffn_norm_a);
|
nx += ggml_nelements(layer.ffn_norm_a);
|
||||||
nx += ggml_nelements(layer.ffn_norm_b);
|
nx += ggml_nelements(layer.ffn_norm_b);
|
||||||
nx += ggml_nelements(layer.w1_a);
|
nx += ggml_nelements(layer.ffn_gate_a);
|
||||||
nx += ggml_nelements(layer.w1_b);
|
nx += ggml_nelements(layer.ffn_gate_b);
|
||||||
nx += ggml_nelements(layer.w2_a);
|
nx += ggml_nelements(layer.ffn_down_a);
|
||||||
nx += ggml_nelements(layer.w2_b);
|
nx += ggml_nelements(layer.ffn_down_b);
|
||||||
nx += ggml_nelements(layer.w3_a);
|
nx += ggml_nelements(layer.ffn_up_a);
|
||||||
nx += ggml_nelements(layer.w3_b);
|
nx += ggml_nelements(layer.ffn_up_b);
|
||||||
}
|
}
|
||||||
return nx;
|
return nx;
|
||||||
}
|
}
|
||||||
@ -1511,9 +1511,9 @@ int main(int argc, char ** argv) {
|
|||||||
uint32_t n_rank_wv = params.custom_n_rank_wv ? params.n_rank_wv : params.lora_r;
|
uint32_t n_rank_wv = params.custom_n_rank_wv ? params.n_rank_wv : params.lora_r;
|
||||||
uint32_t n_rank_wo = params.custom_n_rank_wo ? params.n_rank_wo : params.lora_r;
|
uint32_t n_rank_wo = params.custom_n_rank_wo ? params.n_rank_wo : params.lora_r;
|
||||||
uint32_t n_rank_ffn_norm = params.custom_n_rank_ffn_norm ? params.n_rank_ffn_norm : 1;
|
uint32_t n_rank_ffn_norm = params.custom_n_rank_ffn_norm ? params.n_rank_ffn_norm : 1;
|
||||||
uint32_t n_rank_w1 = params.custom_n_rank_w1 ? params.n_rank_w1 : params.lora_r;
|
uint32_t n_rank_ffn_gate = params.custom_n_rank_ffn_gate ? params.n_rank_ffn_gate : params.lora_r;
|
||||||
uint32_t n_rank_w2 = params.custom_n_rank_w2 ? params.n_rank_w2 : params.lora_r;
|
uint32_t n_rank_ffn_down = params.custom_n_rank_ffn_down ? params.n_rank_ffn_down : params.lora_r;
|
||||||
uint32_t n_rank_w3 = params.custom_n_rank_w3 ? params.n_rank_w3 : params.lora_r;
|
uint32_t n_rank_ffn_up = params.custom_n_rank_ffn_up ? params.n_rank_ffn_up : params.lora_r;
|
||||||
uint32_t n_rank_tok_embeddings = params.custom_n_rank_tok_embeddings ? params.n_rank_tok_embeddings : params.lora_r;
|
uint32_t n_rank_tok_embeddings = params.custom_n_rank_tok_embeddings ? params.n_rank_tok_embeddings : params.lora_r;
|
||||||
uint32_t n_rank_norm = params.custom_n_rank_norm ? params.n_rank_norm : 1;
|
uint32_t n_rank_norm = params.custom_n_rank_norm ? params.n_rank_norm : 1;
|
||||||
uint32_t n_rank_output = params.custom_n_rank_output ? params.n_rank_output : params.lora_r;
|
uint32_t n_rank_output = params.custom_n_rank_output ? params.n_rank_output : params.lora_r;
|
||||||
@ -1523,9 +1523,9 @@ int main(int argc, char ** argv) {
|
|||||||
lora.hparams.n_rank_wv = n_rank_wv;
|
lora.hparams.n_rank_wv = n_rank_wv;
|
||||||
lora.hparams.n_rank_wo = n_rank_wo;
|
lora.hparams.n_rank_wo = n_rank_wo;
|
||||||
lora.hparams.n_rank_ffn_norm = n_rank_ffn_norm;
|
lora.hparams.n_rank_ffn_norm = n_rank_ffn_norm;
|
||||||
lora.hparams.n_rank_w1 = n_rank_w1;
|
lora.hparams.n_rank_ffn_gate = n_rank_ffn_gate;
|
||||||
lora.hparams.n_rank_w2 = n_rank_w2;
|
lora.hparams.n_rank_ffn_down = n_rank_ffn_down;
|
||||||
lora.hparams.n_rank_w3 = n_rank_w3;
|
lora.hparams.n_rank_ffn_up = n_rank_ffn_up;
|
||||||
lora.hparams.n_rank_tok_embeddings = n_rank_tok_embeddings;
|
lora.hparams.n_rank_tok_embeddings = n_rank_tok_embeddings;
|
||||||
lora.hparams.n_rank_norm = n_rank_norm;
|
lora.hparams.n_rank_norm = n_rank_norm;
|
||||||
lora.hparams.n_rank_output = n_rank_output;
|
lora.hparams.n_rank_output = n_rank_output;
|
||||||
@ -1566,9 +1566,9 @@ int main(int argc, char ** argv) {
|
|||||||
|| (lora.hparams.n_rank_wv != n_rank_wv)
|
|| (lora.hparams.n_rank_wv != n_rank_wv)
|
||||||
|| (lora.hparams.n_rank_wo != n_rank_wo)
|
|| (lora.hparams.n_rank_wo != n_rank_wo)
|
||||||
|| (lora.hparams.n_rank_ffn_norm != n_rank_ffn_norm)
|
|| (lora.hparams.n_rank_ffn_norm != n_rank_ffn_norm)
|
||||||
|| (lora.hparams.n_rank_w1 != n_rank_w1)
|
|| (lora.hparams.n_rank_ffn_gate != n_rank_ffn_gate)
|
||||||
|| (lora.hparams.n_rank_w2 != n_rank_w2)
|
|| (lora.hparams.n_rank_ffn_down != n_rank_ffn_down)
|
||||||
|| (lora.hparams.n_rank_w3 != n_rank_w3)
|
|| (lora.hparams.n_rank_ffn_up != n_rank_ffn_up)
|
||||||
|| (lora.hparams.n_rank_tok_embeddings != n_rank_tok_embeddings)
|
|| (lora.hparams.n_rank_tok_embeddings != n_rank_tok_embeddings)
|
||||||
|| (lora.hparams.n_rank_norm != n_rank_norm)
|
|| (lora.hparams.n_rank_norm != n_rank_norm)
|
||||||
|| (lora.hparams.n_rank_output != n_rank_output)
|
|| (lora.hparams.n_rank_output != n_rank_output)
|
||||||
|
@ -50,9 +50,9 @@ struct my_llama_layer {
|
|||||||
struct ggml_tensor * ffn_norm;
|
struct ggml_tensor * ffn_norm;
|
||||||
|
|
||||||
// ff
|
// ff
|
||||||
struct ggml_tensor * w1;
|
struct ggml_tensor * ffn_gate; // w1
|
||||||
struct ggml_tensor * w2;
|
struct ggml_tensor * ffn_down; // w2
|
||||||
struct ggml_tensor * w3;
|
struct ggml_tensor * ffn_up; // w3
|
||||||
};
|
};
|
||||||
|
|
||||||
struct my_llama_model {
|
struct my_llama_model {
|
||||||
@ -140,9 +140,9 @@ static void set_param_model(struct my_llama_model * model) {
|
|||||||
ggml_set_param(ctx, layer.wv);
|
ggml_set_param(ctx, layer.wv);
|
||||||
ggml_set_param(ctx, layer.wo);
|
ggml_set_param(ctx, layer.wo);
|
||||||
ggml_set_param(ctx, layer.ffn_norm);
|
ggml_set_param(ctx, layer.ffn_norm);
|
||||||
ggml_set_param(ctx, layer.w1);
|
ggml_set_param(ctx, layer.ffn_gate);
|
||||||
ggml_set_param(ctx, layer.w2);
|
ggml_set_param(ctx, layer.ffn_down);
|
||||||
ggml_set_param(ctx, layer.w3);
|
ggml_set_param(ctx, layer.ffn_up);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -198,9 +198,9 @@ static void init_model(struct my_llama_model * model) {
|
|||||||
|
|
||||||
layer.ffn_norm = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd);
|
layer.ffn_norm = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd);
|
||||||
|
|
||||||
layer.w1 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_ff);
|
layer.ffn_gate = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_ff);
|
||||||
layer.w2 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_ff, n_embd);
|
layer.ffn_down = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_ff, n_embd);
|
||||||
layer.w3 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_ff);
|
layer.ffn_up = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_ff);
|
||||||
|
|
||||||
ggml_set_name(layer.attention_norm, tni(LLM_TENSOR_ATTN_NORM, i));
|
ggml_set_name(layer.attention_norm, tni(LLM_TENSOR_ATTN_NORM, i));
|
||||||
|
|
||||||
@ -211,9 +211,9 @@ static void init_model(struct my_llama_model * model) {
|
|||||||
|
|
||||||
ggml_set_name(layer.ffn_norm, tni(LLM_TENSOR_FFN_NORM, i));
|
ggml_set_name(layer.ffn_norm, tni(LLM_TENSOR_FFN_NORM, i));
|
||||||
|
|
||||||
ggml_set_name(layer.w1, tni(LLM_TENSOR_FFN_GATE, i));
|
ggml_set_name(layer.ffn_gate, tni(LLM_TENSOR_FFN_GATE, i));
|
||||||
ggml_set_name(layer.w2, tni(LLM_TENSOR_FFN_DOWN, i));
|
ggml_set_name(layer.ffn_down, tni(LLM_TENSOR_FFN_DOWN, i));
|
||||||
ggml_set_name(layer.w3, tni(LLM_TENSOR_FFN_UP, i));
|
ggml_set_name(layer.ffn_up, tni(LLM_TENSOR_FFN_UP, i));
|
||||||
}
|
}
|
||||||
|
|
||||||
set_param_model(model);
|
set_param_model(model);
|
||||||
@ -244,9 +244,9 @@ static void randomize_model(struct my_llama_model * model, int seed, float mean,
|
|||||||
|
|
||||||
randomize_tensor_normal(layer.ffn_norm, rnd);
|
randomize_tensor_normal(layer.ffn_norm, rnd);
|
||||||
|
|
||||||
randomize_tensor_normal(layer.w1, rnd);
|
randomize_tensor_normal(layer.ffn_gate, rnd);
|
||||||
randomize_tensor_normal(layer.w2, rnd);
|
randomize_tensor_normal(layer.ffn_down, rnd);
|
||||||
randomize_tensor_normal(layer.w3, rnd);
|
randomize_tensor_normal(layer.ffn_up, rnd);
|
||||||
}
|
}
|
||||||
|
|
||||||
free_random_normal_distribution(rnd);
|
free_random_normal_distribution(rnd);
|
||||||
@ -356,11 +356,11 @@ static struct ggml_tensor * llama_build_train_graphs(
|
|||||||
struct ggml_tensor * t22 = ggml_rms_norm (ctx, t21, f_norm_rms_eps); set_name(t22, "t22"); assert_shape_2d(t22, n_embd, N*n_batch);
|
struct ggml_tensor * t22 = ggml_rms_norm (ctx, t21, f_norm_rms_eps); set_name(t22, "t22"); assert_shape_2d(t22, n_embd, N*n_batch);
|
||||||
struct ggml_tensor * t23 = ggml_repeat (ctx, layer.ffn_norm, t22); set_name(t23, "t23"); assert_shape_2d(t23, n_embd, N*n_batch);
|
struct ggml_tensor * t23 = ggml_repeat (ctx, layer.ffn_norm, t22); set_name(t23, "t23"); assert_shape_2d(t23, n_embd, N*n_batch);
|
||||||
struct ggml_tensor * t24 = ggml_mul (ctx, t23, t22); set_name(t24, "t24"); assert_shape_2d(t24, n_embd, N*n_batch);
|
struct ggml_tensor * t24 = ggml_mul (ctx, t23, t22); set_name(t24, "t24"); assert_shape_2d(t24, n_embd, N*n_batch);
|
||||||
struct ggml_tensor * t25 = ggml_mul_mat (ctx, layer.w3, t24); set_name(t25, "t25"); assert_shape_2d(t25, n_ff, N*n_batch);
|
struct ggml_tensor * t25 = ggml_mul_mat (ctx, layer.ffn_up, t24); set_name(t25, "t25"); assert_shape_2d(t25, n_ff, N*n_batch);
|
||||||
struct ggml_tensor * t26 = ggml_mul_mat (ctx, layer.w1, t24); set_name(t26, "t26"); assert_shape_2d(t26, n_ff, N*n_batch);
|
struct ggml_tensor * t26 = ggml_mul_mat (ctx, layer.ffn_gate, t24); set_name(t26, "t26"); assert_shape_2d(t26, n_ff, N*n_batch);
|
||||||
struct ggml_tensor * t27 = ggml_silu (ctx, t26); set_name(t27, "t27"); assert_shape_2d(t27, n_ff, N*n_batch);
|
struct ggml_tensor * t27 = ggml_silu (ctx, t26); set_name(t27, "t27"); assert_shape_2d(t27, n_ff, N*n_batch);
|
||||||
struct ggml_tensor * t28 = ggml_mul (ctx, t27, t25); set_name(t28, "t28"); assert_shape_2d(t28, n_ff, N*n_batch);
|
struct ggml_tensor * t28 = ggml_mul (ctx, t27, t25); set_name(t28, "t28"); assert_shape_2d(t28, n_ff, N*n_batch);
|
||||||
struct ggml_tensor * t29 = ggml_mul_mat (ctx, layer.w2, t28); set_name(t29, "t29"); assert_shape_2d(t29, n_embd, N*n_batch);
|
struct ggml_tensor * t29 = ggml_mul_mat (ctx, layer.ffn_down, t28); set_name(t29, "t29"); assert_shape_2d(t29, n_embd, N*n_batch);
|
||||||
struct ggml_tensor * t30 = ggml_add (ctx, t29, t21); set_name(t30, "t30"); assert_shape_2d(t30, n_embd, N*n_batch);
|
struct ggml_tensor * t30 = ggml_add (ctx, t29, t21); set_name(t30, "t30"); assert_shape_2d(t30, n_embd, N*n_batch);
|
||||||
cur = t30;
|
cur = t30;
|
||||||
checkpoints.push_back(cur);
|
checkpoints.push_back(cur);
|
||||||
@ -521,9 +521,9 @@ static void load_llama_model_gguf(struct gguf_context * fctx, struct ggml_contex
|
|||||||
copy_tensor_by_name(layer.wv, f_ggml_ctx, tni(LLM_TENSOR_ATTN_V, i));
|
copy_tensor_by_name(layer.wv, f_ggml_ctx, tni(LLM_TENSOR_ATTN_V, i));
|
||||||
copy_tensor_by_name(layer.wo, f_ggml_ctx, tni(LLM_TENSOR_ATTN_OUT, i));
|
copy_tensor_by_name(layer.wo, f_ggml_ctx, tni(LLM_TENSOR_ATTN_OUT, i));
|
||||||
copy_tensor_by_name(layer.ffn_norm, f_ggml_ctx, tni(LLM_TENSOR_FFN_NORM, i));
|
copy_tensor_by_name(layer.ffn_norm, f_ggml_ctx, tni(LLM_TENSOR_FFN_NORM, i));
|
||||||
copy_tensor_by_name(layer.w1, f_ggml_ctx, tni(LLM_TENSOR_FFN_GATE, i));
|
copy_tensor_by_name(layer.ffn_gate, f_ggml_ctx, tni(LLM_TENSOR_FFN_GATE, i));
|
||||||
copy_tensor_by_name(layer.w2, f_ggml_ctx, tni(LLM_TENSOR_FFN_DOWN, i));
|
copy_tensor_by_name(layer.ffn_down, f_ggml_ctx, tni(LLM_TENSOR_FFN_DOWN, i));
|
||||||
copy_tensor_by_name(layer.w3, f_ggml_ctx, tni(LLM_TENSOR_FFN_UP, i));
|
copy_tensor_by_name(layer.ffn_up, f_ggml_ctx, tni(LLM_TENSOR_FFN_UP, i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -664,9 +664,9 @@ static void save_llama_model_gguf(struct gguf_context * fctx, const char * fn_vo
|
|||||||
gguf_add_tensor(fctx, layer.wv);
|
gguf_add_tensor(fctx, layer.wv);
|
||||||
gguf_add_tensor(fctx, layer.wo);
|
gguf_add_tensor(fctx, layer.wo);
|
||||||
gguf_add_tensor(fctx, layer.ffn_norm);
|
gguf_add_tensor(fctx, layer.ffn_norm);
|
||||||
gguf_add_tensor(fctx, layer.w1);
|
gguf_add_tensor(fctx, layer.ffn_gate);
|
||||||
gguf_add_tensor(fctx, layer.w2);
|
gguf_add_tensor(fctx, layer.ffn_down);
|
||||||
gguf_add_tensor(fctx, layer.w3);
|
gguf_add_tensor(fctx, layer.ffn_up);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -915,9 +915,9 @@ static int64_t get_parameter_count(struct my_llama_model* model) {
|
|||||||
nx += ggml_nelements(layer.wv);
|
nx += ggml_nelements(layer.wv);
|
||||||
nx += ggml_nelements(layer.wo);
|
nx += ggml_nelements(layer.wo);
|
||||||
nx += ggml_nelements(layer.ffn_norm);
|
nx += ggml_nelements(layer.ffn_norm);
|
||||||
nx += ggml_nelements(layer.w1);
|
nx += ggml_nelements(layer.ffn_gate);
|
||||||
nx += ggml_nelements(layer.w2);
|
nx += ggml_nelements(layer.ffn_down);
|
||||||
nx += ggml_nelements(layer.w3);
|
nx += ggml_nelements(layer.ffn_up);
|
||||||
}
|
}
|
||||||
return nx;
|
return nx;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user