CUDA: revert part of the RDNA1 optimizations (#8309)

The change on the launch_bounds was causing a small performance drop in perplexity of 25 t/s
This commit is contained in:
Daniele 2024-07-05 07:06:09 +00:00 committed by GitHub
parent d12f781074
commit 0a423800ff
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -2263,9 +2263,9 @@ static __device__ void mul_mat_q_process_tile(
template <ggml_type type, int mmq_x, int nwarps, bool need_check>
#if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
#if defined(RDNA3) || defined(RDNA2) || defined(RDNA1)
#if defined(RDNA3) || defined(RDNA2)
__launch_bounds__(WARP_SIZE*nwarps, 2)
#endif // defined(RDNA3) || defined(RDNA2) || defined(RDNA1)
#endif // defined(RDNA3) || defined(RDNA2)
#else
#if __CUDA_ARCH__ >= CC_VOLTA
__launch_bounds__(WARP_SIZE*nwarps, 1)