ggml : use __builtin_amdgcn_sudot4 in __dp4a for gfx11 (#4787)

This commit is contained in:
Konstantin Zhuravlyov 2024-01-07 01:52:42 -05:00 committed by GitHub
parent 67984921a7
commit 63ee677efd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -183,7 +183,7 @@ static __device__ __forceinline__ int __vsubss4(const int a, const int b) {
static __device__ __forceinline__ int __dp4a(const int a, const int b, int c) { static __device__ __forceinline__ int __dp4a(const int a, const int b, int c) {
#if defined(__gfx906__) || defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx1030__) #if defined(__gfx906__) || defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx1030__)
c = __builtin_amdgcn_sdot4(a, b, c, false); c = __builtin_amdgcn_sdot4(a, b, c, false);
#elif defined(__gfx1100__) #elif defined(RDNA3)
c = __builtin_amdgcn_sudot4( true, a, true, b, c, false); c = __builtin_amdgcn_sudot4( true, a, true, b, c, false);
#elif defined(__gfx1010__) || defined(__gfx900__) #elif defined(__gfx1010__) || defined(__gfx900__)
int tmp1; int tmp1;