From 580fe2064cc439a588c56b791a2ecbe07d35bcba Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 29 Nov 2023 17:30:19 +0200 Subject: [PATCH] metal : simplify soft_max encoding ggml-ci --- ggml-metal.m | 7 +------ llama.cpp | 2 +- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/ggml-metal.m b/ggml-metal.m index 0b468bea0..58149a487 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -1040,12 +1040,7 @@ void ggml_metal_graph_compute( const float scale = ((float *) dst->op_params)[0]; [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0]; - if (id_src1) { - [encoder setBuffer:id_src1 offset:offs_src1 atIndex:1]; - } else { - [encoder setBuffer:nil offset:0 atIndex:1]; - } - + [encoder setBuffer:id_src1 offset:offs_src1 atIndex:1]; [encoder setBuffer:id_dst offset:offs_dst atIndex:2]; [encoder setBytes:&ne00 length:sizeof(ne00) atIndex:3]; [encoder setBytes:&ne01 length:sizeof(ne01) atIndex:4]; diff --git a/llama.cpp b/llama.cpp index 2c13aeb50..7b261b73e 100644 --- a/llama.cpp +++ b/llama.cpp @@ -3705,8 +3705,8 @@ static struct ggml_tensor * llm_build_kqv( struct ggml_tensor * kq = ggml_mul_mat(ctx, k, q); cb(kq, "kq", il); - // TODO: !!!!!!!!! if (max_alibi_bias > 0.0f) { + // temporary branch until we figure out how to handle ggml_alibi through ggml_add kq = ggml_scale(ctx, kq, kq_scale); cb(kq, "kq_scaled", il);