mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 05:48:47 +01:00
ggml : use __builtin_amdgcn_sudot4 in __dp4a for gfx11 (#4787)
This commit is contained in:
parent
67984921a7
commit
63ee677efd
@ -183,7 +183,7 @@ static __device__ __forceinline__ int __vsubss4(const int a, const int b) {
|
|||||||
static __device__ __forceinline__ int __dp4a(const int a, const int b, int c) {
|
static __device__ __forceinline__ int __dp4a(const int a, const int b, int c) {
|
||||||
#if defined(__gfx906__) || defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx1030__)
|
#if defined(__gfx906__) || defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx1030__)
|
||||||
c = __builtin_amdgcn_sdot4(a, b, c, false);
|
c = __builtin_amdgcn_sdot4(a, b, c, false);
|
||||||
#elif defined(__gfx1100__)
|
#elif defined(RDNA3)
|
||||||
c = __builtin_amdgcn_sudot4( true, a, true, b, c, false);
|
c = __builtin_amdgcn_sudot4( true, a, true, b, c, false);
|
||||||
#elif defined(__gfx1010__) || defined(__gfx900__)
|
#elif defined(__gfx1010__) || defined(__gfx900__)
|
||||||
int tmp1;
|
int tmp1;
|
||||||
|
Loading…
Reference in New Issue
Block a user