diff --git a/ggml-metal.m b/ggml-metal.m index 0b468bea0..58149a487 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -1040,12 +1040,7 @@ void ggml_metal_graph_compute( const float scale = ((float *) dst->op_params)[0]; [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0]; - if (id_src1) { - [encoder setBuffer:id_src1 offset:offs_src1 atIndex:1]; - } else { - [encoder setBuffer:nil offset:0 atIndex:1]; - } - + [encoder setBuffer:id_src1 offset:offs_src1 atIndex:1]; [encoder setBuffer:id_dst offset:offs_dst atIndex:2]; [encoder setBytes:&ne00 length:sizeof(ne00) atIndex:3]; [encoder setBytes:&ne01 length:sizeof(ne01) atIndex:4]; diff --git a/llama.cpp b/llama.cpp index 2c13aeb50..7b261b73e 100644 --- a/llama.cpp +++ b/llama.cpp @@ -3705,8 +3705,8 @@ static struct ggml_tensor * llm_build_kqv( struct ggml_tensor * kq = ggml_mul_mat(ctx, k, q); cb(kq, "kq", il); - // TODO: !!!!!!!!! if (max_alibi_bias > 0.0f) { + // temporary branch until we figure out how to handle ggml_alibi through ggml_add kq = ggml_scale(ctx, kq, kq_scale); cb(kq, "kq_scaled", il);