mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-27 04:23:06 +01:00
export-lora : fix tok_embd tensor
This commit is contained in:
parent
80d0d6b4b7
commit
510b626c03
@ -345,8 +345,18 @@ struct lora_merge_ctx {
|
||||
gf = ggml_new_graph(ctx0);
|
||||
struct ggml_tensor * cur = inp_base;
|
||||
for (size_t i = 0; i < adapters.size(); ++i) {
|
||||
struct ggml_tensor * a_T = ggml_cont(ctx0, ggml_transpose(ctx0, ggml_cast(ctx0, inp_a[i], GGML_TYPE_F32)));
|
||||
struct ggml_tensor * delta = ggml_mul_mat(ctx0, a_T, ggml_cast(ctx0, inp_b[i], GGML_TYPE_F32));
|
||||
struct ggml_tensor * delta;
|
||||
bool is_tok_embd = string_starts_with(name_base, "token_embd");
|
||||
if (is_tok_embd) {
|
||||
printf("%s : detected token embeddings tensor\n", __func__);
|
||||
delta = ggml_mul_mat(ctx0,
|
||||
ggml_cast(ctx0, inp_b[i], GGML_TYPE_F32),
|
||||
ggml_cast(ctx0, inp_a[i], GGML_TYPE_F32));
|
||||
} else {
|
||||
delta = ggml_mul_mat(ctx0,
|
||||
ggml_cont(ctx0, ggml_transpose(ctx0, ggml_cast(ctx0, inp_a[i], GGML_TYPE_F32))),
|
||||
ggml_cast(ctx0, inp_b[i], GGML_TYPE_F32));
|
||||
}
|
||||
// scale
|
||||
const float alpha = adapters[i]->alpha;
|
||||
const float rank = (float) inp_b[i]->ne[0];
|
||||
|
Loading…
Reference in New Issue
Block a user