mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-27 22:59:24 +01:00
CUDA: revert part of the RDNA1 optimizations (#8309)
The change on the launch_bounds was causing a small performance drop in perplexity of 25 t/s
This commit is contained in:
parent
d12f781074
commit
0a423800ff
@ -2263,9 +2263,9 @@ static __device__ void mul_mat_q_process_tile(
|
|||||||
|
|
||||||
template <ggml_type type, int mmq_x, int nwarps, bool need_check>
|
template <ggml_type type, int mmq_x, int nwarps, bool need_check>
|
||||||
#if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
|
#if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
|
||||||
#if defined(RDNA3) || defined(RDNA2) || defined(RDNA1)
|
#if defined(RDNA3) || defined(RDNA2)
|
||||||
__launch_bounds__(WARP_SIZE*nwarps, 2)
|
__launch_bounds__(WARP_SIZE*nwarps, 2)
|
||||||
#endif // defined(RDNA3) || defined(RDNA2) || defined(RDNA1)
|
#endif // defined(RDNA3) || defined(RDNA2)
|
||||||
#else
|
#else
|
||||||
#if __CUDA_ARCH__ >= CC_VOLTA
|
#if __CUDA_ARCH__ >= CC_VOLTA
|
||||||
__launch_bounds__(WARP_SIZE*nwarps, 1)
|
__launch_bounds__(WARP_SIZE*nwarps, 1)
|
||||||
|
Loading…
Reference in New Issue
Block a user