mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-29 07:34:18 +01:00
cuda : increase max block size to 1024
This commit is contained in:
parent
62532c05aa
commit
6b86bcffac
@ -443,7 +443,7 @@ static_assert(sizeof(block_q6_K) == sizeof(ggml_fp16_t) + 13*QK_K/16, "wrong q6_
|
||||
#define CUDA_SCALE_BLOCK_SIZE 256
|
||||
#define CUDA_CLAMP_BLOCK_SIZE 256
|
||||
#define CUDA_ROPE_BLOCK_SIZE 256
|
||||
#define CUDA_SOFT_MAX_BLOCK_SIZE 512
|
||||
#define CUDA_SOFT_MAX_BLOCK_SIZE 1024
|
||||
#define CUDA_ALIBI_BLOCK_SIZE 32
|
||||
#define CUDA_DIAG_MASK_INF_BLOCK_SIZE 32
|
||||
#define CUDA_QUANTIZE_BLOCK_SIZE 256
|
||||
|
Loading…
Reference in New Issue
Block a user