From 6b86bcffac842f35e03e608ce62065a361ffac0c Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 30 Nov 2023 20:40:47 +0200 Subject: [PATCH] cuda : increase max block size to 1024 --- ggml-cuda.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 080193cbd..98343d208 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -443,7 +443,7 @@ static_assert(sizeof(block_q6_K) == sizeof(ggml_fp16_t) + 13*QK_K/16, "wrong q6_ #define CUDA_SCALE_BLOCK_SIZE 256 #define CUDA_CLAMP_BLOCK_SIZE 256 #define CUDA_ROPE_BLOCK_SIZE 256 -#define CUDA_SOFT_MAX_BLOCK_SIZE 512 +#define CUDA_SOFT_MAX_BLOCK_SIZE 1024 #define CUDA_ALIBI_BLOCK_SIZE 32 #define CUDA_DIAG_MASK_INF_BLOCK_SIZE 32 #define CUDA_QUANTIZE_BLOCK_SIZE 256