metal : simplify soft_max encoding

ggml-ci
2025-01-30 13:53:03 +01:00 · 2023-11-29 17:30:19 +02:00 · 2023-11-29 17:30:19 +02:00 · 580fe2064c
commit 580fe2064c
parent 390a445906
2 changed files with 2 additions and 7 deletions
--- a/ggml-metal.m
+++ b/ggml-metal.m
@ -1040,12 +1040,7 @@ void ggml_metal_graph_compute(
                            const float scale = ((float *) dst->op_params)[0];

                            [encoder setBuffer:id_src0 offset:offs_src0   atIndex:0];
-                            if (id_src1) {
-                                [encoder setBuffer:id_src1 offset:offs_src1   atIndex:1];
-                            } else {
-                                [encoder setBuffer:nil     offset:0           atIndex:1];
-                            }
-
+                            [encoder setBuffer:id_src1 offset:offs_src1   atIndex:1];
                            [encoder setBuffer:id_dst  offset:offs_dst    atIndex:2];
                            [encoder setBytes:&ne00  length:sizeof(ne00)  atIndex:3];
                            [encoder setBytes:&ne01  length:sizeof(ne01)  atIndex:4];
--- a/llama.cpp
+++ b/llama.cpp
@ -3705,8 +3705,8 @@ static struct ggml_tensor * llm_build_kqv(
    struct ggml_tensor * kq = ggml_mul_mat(ctx, k, q);
    cb(kq, "kq", il);

-    // TODO: !!!!!!!!!
    if (max_alibi_bias > 0.0f) {
+        // temporary branch until we figure out how to handle ggml_alibi through ggml_add
        kq = ggml_scale(ctx, kq, kq_scale);
        cb(kq, "kq_scaled", il);