AMD: parse the architecture as supplied by gcnArchName

The value provided by minor is truncated for AMD, parse the value returned by gcnArchName instead to retrieve an accurate ID.

We can also use the common value for GCN4, as gfx800, to avoid missing compatible devices.
This commit is contained in:
Jon Haus 2025-01-13 13:22:49 -05:00
parent 3edfa7d375
commit f77ea24a1a
5 changed files with 796 additions and 12 deletions

View File

@ -46,20 +46,20 @@
#define GGML_CUDA_CC_VOLTA 700
#define GGML_CUDA_CC_TURING 750
#define GGML_CUDA_CC_AMPERE 800
#define GGML_CUDA_CC_OFFSET_AMD 1000000
#define GGML_CUDA_CC_OFFSET_AMD 0x1000000
// GCN/CNDA, wave size is 64
#define GGML_CUDA_CC_GCN4 (GGML_CUDA_CC_OFFSET_AMD + 803) // Tonga, Fiji, Polaris, minimum for fast fp16
#define GGML_CUDA_CC_VEGA (GGML_CUDA_CC_OFFSET_AMD + 900) // Vega56/64, minimum for fp16 dual issue
#define GGML_CUDA_CC_VEGA20 (GGML_CUDA_CC_OFFSET_AMD + 906) // MI50/Radeon VII, minimum for dp4a
#define GGML_CUDA_CC_CDNA (GGML_CUDA_CC_OFFSET_AMD + 908) // MI100, minimum for MFMA, acc registers
#define GGML_CUDA_CC_CDNA2 (GGML_CUDA_CC_OFFSET_AMD + 910) // MI210, minimum acc register renameing
#define GGML_CUDA_CC_CDNA3 (GGML_CUDA_CC_OFFSET_AMD + 942) // MI300
#define GGML_CUDA_CC_GCN4 (GGML_CUDA_CC_OFFSET_AMD + 0x800) // Tonga, Fiji, Polaris, minimum for fast fp16
#define GGML_CUDA_CC_VEGA (GGML_CUDA_CC_OFFSET_AMD + 0x900) // Vega56/64, minimum for fp16 dual issue
#define GGML_CUDA_CC_VEGA20 (GGML_CUDA_CC_OFFSET_AMD + 0x906) // MI50/Radeon VII, minimum for dp4a
#define GGML_CUDA_CC_CDNA (GGML_CUDA_CC_OFFSET_AMD + 0x908) // MI100, minimum for MFMA, acc registers
#define GGML_CUDA_CC_CDNA2 (GGML_CUDA_CC_OFFSET_AMD + 0x910) // MI210, minimum acc register renameing
#define GGML_CUDA_CC_CDNA3 (GGML_CUDA_CC_OFFSET_AMD + 0x942) // MI300
// RNDA removes MFMA, dp4a, xnack, acc registers, wave size is 32
#define GGML_CUDA_CC_RDNA1 (GGML_CUDA_CC_OFFSET_AMD + 1010) // RX 5000
#define GGML_CUDA_CC_RDNA2 (GGML_CUDA_CC_OFFSET_AMD + 1030) // RX 6000, minimum for dp4a
#define GGML_CUDA_CC_RDNA3 (GGML_CUDA_CC_OFFSET_AMD + 1100) // RX 7000, minimum for WMMA
#define GGML_CUDA_CC_RDNA1 (GGML_CUDA_CC_OFFSET_AMD + 0x1010) // RX 5000
#define GGML_CUDA_CC_RDNA2 (GGML_CUDA_CC_OFFSET_AMD + 0x1030) // RX 6000, minimum for dp4a
#define GGML_CUDA_CC_RDNA3 (GGML_CUDA_CC_OFFSET_AMD + 0x1100) // RX 7000, minimum for WMMA
#define GGML_CUDA_CC_QY1 210
#define GGML_CUDA_CC_QY2 220

View File

@ -119,6 +119,59 @@ static cudaError_t ggml_cuda_device_malloc(void ** ptr, size_t size, int device)
#endif
}
#if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
int ggml_cuda_parse_id(char devName[]) {
// A list of possible Target IDs can be found under the rocclr/clr repo in device.cpp
// these values are not stable so this is susceptible to breakage
// https://github.com/ROCm/clr/blob/amd-staging/rocclr/device/device.cpp
int archMajor = 0x0;
int archMinor = 0x0;
int archNum = GGML_CUDA_CC_OFFSET_AMD;
int archLen = strlen(devName);
char archName[archLen + 1];
// strip leading 'gfx' while copying into our buffer
if (archLen > 3) {
strcpy(archName, &devName[3]);
archLen -= 3;
}
// trim trailing :xnack- or :sramecc- statuses
archLen = strcspn(archName, ":");
archName[archLen] = '\0';
// tease out the version information
if (archLen > 8) {
// versions labeled generic use '-' as delimiter
// strip the trailing "-generic" then iterate through what remains
if ((strstr(archName, "-generic"))) {
archName[archLen - 8] = '\0';
char * pch;
if ((pch = strtok(archName, "-"))) {
archMajor = (int)strtoul(pch, 0, 16);
if ((pch = strtok(NULL, "-"))) {
archMinor = 0x10 * (int)strtoul(pch, 0, 16);
}
}
}
} else if (archLen >= 3) {
// last two digits should be the minor * 0x10 + stepping
archMinor = (int)strtoul(&archName[archLen - 2], 0, 16);
archName[archLen - 2] = '\0';
// only the major version remains
archMajor = (int)strtoul(archName, 0, 16);
}
archNum += archMajor * 0x100;
// be inclusive of the full gfx8 line for backward compatibility (Carrizu APUs, etc.)
if (archMajor != 8) {
archNum += archMinor;
}
return archNum;
}
#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
static ggml_cuda_device_info ggml_cuda_init() {
#ifdef __HIP_PLATFORM_AMD__
// Workaround for a rocBLAS bug when using multiple graphics cards:
@ -169,7 +222,6 @@ static ggml_cuda_device_info ggml_cuda_init() {
cudaDeviceProp prop;
CUDA_CHECK(cudaGetDeviceProperties(&prop, id));
GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s\n", id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
info.default_tensor_split[id] = total_vram;
total_vram += prop.totalGlobalMem;
@ -178,10 +230,29 @@ static ggml_cuda_device_info ggml_cuda_init() {
info.devices[id].smpb = prop.sharedMemPerBlock;
#if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
info.devices[id].smpbo = prop.sharedMemPerBlock;
info.devices[id].cc = 100*prop.major + 10*prop.minor + GGML_CUDA_CC_OFFSET_AMD;
info.devices[id].cc = ggml_cuda_parse_id(prop.gcnArchName);
if ((info.devices[id].cc & 0xff00) == 0x0) {
GGML_LOG_WARN("invalid architecture ID received for device %d %s: %s cc %d.%d\n",
id, prop.name, prop.gcnArchName, prop.major, prop.minor);
// Fallback to prop.major and prop.minor
if (prop.major > 0) {
info.devices[id].cc = GGML_CUDA_CC_OFFSET_AMD + prop.major * 0x100;
// be inclusive of the full gfx8 line for backward compatibility (Carrizu APUs, etc.)
if (prop.minor != 8) {
info.devices[id].cc += prop.minor * 0x10;
}
}
}
GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s\n",
id, prop.name, prop.gcnArchName, info.devices[id].cc & 0xffff, device_vmm ? "yes" : "no");
#else
info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
info.devices[id].cc = 100*prop.major + 10*prop.minor;
GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s\n",
id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
}

107
scripts/fetch-amd-ids.py Executable file
View File

@ -0,0 +1,107 @@
#!/bin/env python3
import _io
import re
import os
import sys
from datetime import date
from pathlib import Path
from urllib import request
from urllib.request import urlopen
reUrl = re.compile('^(http(s|)://)(www.|)[a-zA-Z0-9.]*/.*$')
reSupportedIsas = re.compile('.*static constexpr Isa supportedIsas_.*')
reTarget = re.compile('.*{([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*)},.*')
src = "https://raw.githubusercontent.com/ROCm/clr/refs/heads/amd-staging/rocclr/device/device.cpp"
srcType = 'url'
targets = []
def parse(items):
assert(type(items) == list )
depth = 0
i = 0
for line in items:
i += 1
line = str(line.encode("utf-8"))
if re.match(reSupportedIsas, line):
depth += 1
continue
if depth:
for char in line:
if char == '}':
depth -= 1
if depth < 1:
break
elif char == '{':
depth += 1
if depth < 1:
break
if re.match(reTarget, line):
itms = reTarget.split(line)
targets.append((itms[1].strip(' "'),itms[5].strip(' '),itms[6].strip(' '),itms[7].strip(' ')))
if __name__ == '__main__':
buffer=""
if len(sys.argv) > 1:
src = sys.argv[1]
if re.fullmatch(reUrl, src):
srcType = 'url'
else:
srcType = 'file'
if not os.path.exists(src):
raise FileNotFoundError
_src = Path(src)
if not _src.exists():
raise FileNotFoundError
if srcType == "url":
urlreq = request.Request(src)
data = urlopen(urlreq)
buffer = str(data.read().decode("utf-8"))
parse(buffer.splitlines())
else:
try:
num_lines = -1
with open(_src, 'r') as fileIn:
buffer = fileIn.readlines()
parse(buffer)
except Exception as exception:
print(exception)
finally:
if isinstance(fileIn, _io.TextIOWrapper) and not fileIn.close:
fileIn.close()
if len(targets) == 0:
print(f'No items found in {src}!', file=sys.stderr)
exit(1)
i = 0
print(f'struct target '"{")
print(f' char id[256];')
print(f' char major;')
print(f' char minor;')
print(f' char step;')
print("};")
print('')
print(f'// Automatically generated on {date.today()} from "{src}"')
print(f'struct target targets[{len(targets)}];')
for itm in targets:
assert(type(itm) == tuple)
print(f'strcpy(targets[{i}].id, "{itm[0]}");')
print(f'targets[{i}].major = {itm[1]};')
print(f'targets[{i}].minor = {itm[2]};')
print(f'targets[{i}].step = {itm[3]};')
i += 1

View File

@ -143,6 +143,7 @@ if (NOT GGML_BACKEND_DL)
llama_target_and_test(test-rope.cpp)
endif()
# llama_target_and_test(test-parse-amd-ids.c)
# dummy executable - not installed
get_filename_component(TEST_TARGET test-c.c NAME_WE)

605
tests/test-parse-amd-ids.c Normal file
View File

@ -0,0 +1,605 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
//#include "ggml-cuda/ggml-cuda.cu"
//ToDo: This needs to be properly integrated
const int GGML_CUDA_CC_OFFSET_AMD = 0x1000000;
struct target {
char id[256];
char major;
char minor;
char step;
};
int ggml_cuda_parse_id(char devName[]) {
// A list of possible Target IDs can be found under the rocclr/clr repo in device.cpp
// these values are not stable so this is susceptible to breakage
// https://github.com/ROCm/clr/blob/amd-staging/rocclr/device/device.cpp
int archMajor = 0x0;
int archMinor = 0x0;
int archNum = GGML_CUDA_CC_OFFSET_AMD;
int archLen = strlen(devName);
char archName[archLen + 1];
// strip leading 'gfx' while copying into our buffer
if (archLen > 3) {
strcpy(archName, &devName[3]);
archLen -= 3;
}
// trim trailing :xnack- or :sramecc- statuses
archLen = strcspn(archName, ":");
archName[archLen] = '\0';
// tease out the version information
if (archLen > 8) {
// versions labeled generic use '-' as delimiter
// strip the trailing "-generic" then iterate through what remains
if (strstr(archName, "-generic")) {
archName[archLen - 8] = '\0';
char * pch;
if (pch = strtok(archName, "-")) {
archMajor = (int)strtoul(pch, 0, 16);
if (pch = strtok(NULL, "-")) {
archMinor = 0x10 * (int)strtoul(pch, 0, 16);
}
}
}
} else if (archLen >= 3) {
// last two digits should be the minor * 0x10 + stepping
archMinor = (int)strtoul(&archName[archLen - 2], 0, 16);
archName[archLen - 2] = '\0';
// only the major version remains
archMajor = (int)strtoul(archName, 0, 16);
}
archNum += archMajor * 0x100;
// be inclusive of the full gfx8 line for backward compatibility (Carrizu APUs, etc.)
if (archMajor != 8) {
archNum += archMinor;
}
return archNum;
}
// Automatically generated 2025-01-17 from https://raw.githubusercontent.com/ROCm/clr/refs/heads/amd-staging/rocclr/device/device.cpp
int main() {
struct target targets[124];
strcpy(targets[0].id, "gfx801");
targets[0].major = 8;
targets[0].minor = 0;
targets[0].step = 1;
strcpy(targets[1].id, "gfx801:xnack-");
targets[1].major = 8;
targets[1].minor = 0;
targets[1].step = 1;
strcpy(targets[2].id, "gfx801:xnack+");
targets[2].major = 8;
targets[2].minor = 0;
targets[2].step = 1;
strcpy(targets[3].id, "gfx802");
targets[3].major = 8;
targets[3].minor = 0;
targets[3].step = 2;
strcpy(targets[4].id, "gfx803");
targets[4].major = 8;
targets[4].minor = 0;
targets[4].step = 3;
strcpy(targets[5].id, "gfx805");
targets[5].major = 8;
targets[5].minor = 0;
targets[5].step = 5;
strcpy(targets[6].id, "gfx810");
targets[6].major = 8;
targets[6].minor = 1;
targets[6].step = 0;
strcpy(targets[7].id, "gfx810:xnack-");
targets[7].major = 8;
targets[7].minor = 1;
targets[7].step = 0;
strcpy(targets[8].id, "gfx810:xnack+");
targets[8].major = 8;
targets[8].minor = 1;
targets[8].step = 0;
strcpy(targets[9].id, "gfx900");
targets[9].major = 9;
targets[9].minor = 0;
targets[9].step = 0;
strcpy(targets[10].id, "gfx900:xnack-");
targets[10].major = 9;
targets[10].minor = 0;
targets[10].step = 0;
strcpy(targets[11].id, "gfx900:xnack+");
targets[11].major = 9;
targets[11].minor = 0;
targets[11].step = 0;
strcpy(targets[12].id, "gfx902");
targets[12].major = 9;
targets[12].minor = 0;
targets[12].step = 2;
strcpy(targets[13].id, "gfx902:xnack-");
targets[13].major = 9;
targets[13].minor = 0;
targets[13].step = 2;
strcpy(targets[14].id, "gfx902:xnack+");
targets[14].major = 9;
targets[14].minor = 0;
targets[14].step = 2;
strcpy(targets[15].id, "gfx904");
targets[15].major = 9;
targets[15].minor = 0;
targets[15].step = 4;
strcpy(targets[16].id, "gfx904:xnack-");
targets[16].major = 9;
targets[16].minor = 0;
targets[16].step = 4;
strcpy(targets[17].id, "gfx904:xnack+");
targets[17].major = 9;
targets[17].minor = 0;
targets[17].step = 4;
strcpy(targets[18].id, "gfx906");
targets[18].major = 9;
targets[18].minor = 0;
targets[18].step = 6;
strcpy(targets[19].id, "gfx906:sramecc-");
targets[19].major = 9;
targets[19].minor = 0;
targets[19].step = 6;
strcpy(targets[20].id, "gfx906:sramecc+");
targets[20].major = 9;
targets[20].minor = 0;
targets[20].step = 6;
strcpy(targets[21].id, "gfx906:xnack-");
targets[21].major = 9;
targets[21].minor = 0;
targets[21].step = 6;
strcpy(targets[22].id, "gfx906:xnack+");
targets[22].major = 9;
targets[22].minor = 0;
targets[22].step = 6;
strcpy(targets[23].id, "gfx906:sramecc-:xnack-");
targets[23].major = 9;
targets[23].minor = 0;
targets[23].step = 6;
strcpy(targets[24].id, "gfx906:sramecc-:xnack+");
targets[24].major = 9;
targets[24].minor = 0;
targets[24].step = 6;
strcpy(targets[25].id, "gfx906:sramecc+:xnack-");
targets[25].major = 9;
targets[25].minor = 0;
targets[25].step = 6;
strcpy(targets[26].id, "gfx906:sramecc+:xnack+");
targets[26].major = 9;
targets[26].minor = 0;
targets[26].step = 6;
strcpy(targets[27].id, "gfx908");
targets[27].major = 9;
targets[27].minor = 0;
targets[27].step = 8;
strcpy(targets[28].id, "gfx908:sramecc-");
targets[28].major = 9;
targets[28].minor = 0;
targets[28].step = 8;
strcpy(targets[29].id, "gfx908:sramecc+");
targets[29].major = 9;
targets[29].minor = 0;
targets[29].step = 8;
strcpy(targets[30].id, "gfx908:xnack-");
targets[30].major = 9;
targets[30].minor = 0;
targets[30].step = 8;
strcpy(targets[31].id, "gfx908:xnack+");
targets[31].major = 9;
targets[31].minor = 0;
targets[31].step = 8;
strcpy(targets[32].id, "gfx908:sramecc-:xnack-");
targets[32].major = 9;
targets[32].minor = 0;
targets[32].step = 8;
strcpy(targets[33].id, "gfx908:sramecc-:xnack+");
targets[33].major = 9;
targets[33].minor = 0;
targets[33].step = 8;
strcpy(targets[34].id, "gfx908:sramecc+:xnack-");
targets[34].major = 9;
targets[34].minor = 0;
targets[34].step = 8;
strcpy(targets[35].id, "gfx908:sramecc+:xnack+");
targets[35].major = 9;
targets[35].minor = 0;
targets[35].step = 8;
strcpy(targets[36].id, "gfx909");
targets[36].major = 9;
targets[36].minor = 0;
targets[36].step = 2;
strcpy(targets[37].id, "gfx909:xnack-");
targets[37].major = 9;
targets[37].minor = 0;
targets[37].step = 2;
strcpy(targets[38].id, "gfx909:xnack+");
targets[38].major = 9;
targets[38].minor = 0;
targets[38].step = 2;
strcpy(targets[39].id, "gfx90a");
targets[39].major = 9;
targets[39].minor = 0;
targets[39].step = 10;
strcpy(targets[40].id, "gfx90a:sramecc-");
targets[40].major = 9;
targets[40].minor = 0;
targets[40].step = 10;
strcpy(targets[41].id, "gfx90a:sramecc+");
targets[41].major = 9;
targets[41].minor = 0;
targets[41].step = 10;
strcpy(targets[42].id, "gfx90a:xnack-");
targets[42].major = 9;
targets[42].minor = 0;
targets[42].step = 10;
strcpy(targets[43].id, "gfx90a:xnack+");
targets[43].major = 9;
targets[43].minor = 0;
targets[43].step = 10;
strcpy(targets[44].id, "gfx90a:sramecc-:xnack-");
targets[44].major = 9;
targets[44].minor = 0;
targets[44].step = 10;
strcpy(targets[45].id, "gfx90a:sramecc-:xnack+");
targets[45].major = 9;
targets[45].minor = 0;
targets[45].step = 10;
strcpy(targets[46].id, "gfx90a:sramecc+:xnack-");
targets[46].major = 9;
targets[46].minor = 0;
targets[46].step = 10;
strcpy(targets[47].id, "gfx90a:sramecc+:xnack+");
targets[47].major = 9;
targets[47].minor = 0;
targets[47].step = 10;
strcpy(targets[48].id, "gfx940");
targets[48].major = 9;
targets[48].minor = 4;
targets[48].step = 0;
strcpy(targets[49].id, "gfx940:sramecc-");
targets[49].major = 9;
targets[49].minor = 4;
targets[49].step = 0;
strcpy(targets[50].id, "gfx940:sramecc+");
targets[50].major = 9;
targets[50].minor = 4;
targets[50].step = 0;
strcpy(targets[51].id, "gfx940:xnack-");
targets[51].major = 9;
targets[51].minor = 4;
targets[51].step = 0;
strcpy(targets[52].id, "gfx940:xnack+");
targets[52].major = 9;
targets[52].minor = 4;
targets[52].step = 0;
strcpy(targets[53].id, "gfx940:sramecc-:xnack-");
targets[53].major = 9;
targets[53].minor = 4;
targets[53].step = 0;
strcpy(targets[54].id, "gfx940:sramecc-:xnack+");
targets[54].major = 9;
targets[54].minor = 4;
targets[54].step = 0;
strcpy(targets[55].id, "gfx940:sramecc+:xnack-");
targets[55].major = 9;
targets[55].minor = 4;
targets[55].step = 0;
strcpy(targets[56].id, "gfx940:sramecc+:xnack+");
targets[56].major = 9;
targets[56].minor = 4;
targets[56].step = 0;
strcpy(targets[57].id, "gfx941");
targets[57].major = 9;
targets[57].minor = 4;
targets[57].step = 1;
strcpy(targets[58].id, "gfx941:sramecc-");
targets[58].major = 9;
targets[58].minor = 4;
targets[58].step = 1;
strcpy(targets[59].id, "gfx941:sramecc+");
targets[59].major = 9;
targets[59].minor = 4;
targets[59].step = 1;
strcpy(targets[60].id, "gfx941:xnack-");
targets[60].major = 9;
targets[60].minor = 4;
targets[60].step = 1;
strcpy(targets[61].id, "gfx941:xnack+");
targets[61].major = 9;
targets[61].minor = 4;
targets[61].step = 1;
strcpy(targets[62].id, "gfx941:sramecc-:xnack-");
targets[62].major = 9;
targets[62].minor = 4;
targets[62].step = 1;
strcpy(targets[63].id, "gfx941:sramecc-:xnack+");
targets[63].major = 9;
targets[63].minor = 4;
targets[63].step = 1;
strcpy(targets[64].id, "gfx941:sramecc+:xnack-");
targets[64].major = 9;
targets[64].minor = 4;
targets[64].step = 1;
strcpy(targets[65].id, "gfx941:sramecc+:xnack+");
targets[65].major = 9;
targets[65].minor = 4;
targets[65].step = 1;
strcpy(targets[66].id, "gfx942");
targets[66].major = 9;
targets[66].minor = 4;
targets[66].step = 2;
strcpy(targets[67].id, "gfx942:sramecc-");
targets[67].major = 9;
targets[67].minor = 4;
targets[67].step = 2;
strcpy(targets[68].id, "gfx942:sramecc+");
targets[68].major = 9;
targets[68].minor = 4;
targets[68].step = 2;
strcpy(targets[69].id, "gfx942:xnack-");
targets[69].major = 9;
targets[69].minor = 4;
targets[69].step = 2;
strcpy(targets[70].id, "gfx942:xnack+");
targets[70].major = 9;
targets[70].minor = 4;
targets[70].step = 2;
strcpy(targets[71].id, "gfx942:sramecc-:xnack-");
targets[71].major = 9;
targets[71].minor = 4;
targets[71].step = 2;
strcpy(targets[72].id, "gfx942:sramecc-:xnack+");
targets[72].major = 9;
targets[72].minor = 4;
targets[72].step = 2;
strcpy(targets[73].id, "gfx942:sramecc+:xnack-");
targets[73].major = 9;
targets[73].minor = 4;
targets[73].step = 2;
strcpy(targets[74].id, "gfx942:sramecc+:xnack+");
targets[74].major = 9;
targets[74].minor = 4;
targets[74].step = 2;
strcpy(targets[75].id, "gfx90c");
targets[75].major = 9;
targets[75].minor = 0;
targets[75].step = 12;
strcpy(targets[76].id, "gfx90c:xnack-");
targets[76].major = 9;
targets[76].minor = 0;
targets[76].step = 12;
strcpy(targets[77].id, "gfx90c:xnack+");
targets[77].major = 9;
targets[77].minor = 0;
targets[77].step = 12;
strcpy(targets[78].id, "gfx9-generic");
targets[78].major = 9;
targets[78].minor = 0;
targets[78].step = 0;
strcpy(targets[79].id, "gfx9-generic:xnack-");
targets[79].major = 9;
targets[79].minor = 0;
targets[79].step = 0;
strcpy(targets[80].id, "gfx9-generic:xnack+");
targets[80].major = 9;
targets[80].minor = 0;
targets[80].step = 0;
strcpy(targets[81].id, "gfx9-4-generic");
targets[81].major = 9;
targets[81].minor = 4;
targets[81].step = 0;
strcpy(targets[82].id, "gfx9-4-generic:sramecc-");
targets[82].major = 9;
targets[82].minor = 4;
targets[82].step = 0;
strcpy(targets[83].id, "gfx9-4-generic:sramecc+");
targets[83].major = 9;
targets[83].minor = 4;
targets[83].step = 0;
strcpy(targets[84].id, "gfx9-4-generic:xnack-");
targets[84].major = 9;
targets[84].minor = 4;
targets[84].step = 0;
strcpy(targets[85].id, "gfx9-4-generic:xnack+");
targets[85].major = 9;
targets[85].minor = 4;
targets[85].step = 0;
strcpy(targets[86].id, "gfx9-4-generic:sramecc-:xnack-");
targets[86].major = 9;
targets[86].minor = 4;
targets[86].step = 0;
strcpy(targets[87].id, "gfx9-4-generic:sramecc-:xnack+");
targets[87].major = 9;
targets[87].minor = 4;
targets[87].step = 0;
strcpy(targets[88].id, "gfx9-4-generic:sramecc+:xnack-");
targets[88].major = 9;
targets[88].minor = 4;
targets[88].step = 0;
strcpy(targets[89].id, "gfx9-4-generic:sramecc+:xnack+");
targets[89].major = 9;
targets[89].minor = 4;
targets[89].step = 0;
strcpy(targets[90].id, "gfx1010");
targets[90].major = 10;
targets[90].minor = 1;
targets[90].step = 0;
strcpy(targets[91].id, "gfx1010:xnack-");
targets[91].major = 10;
targets[91].minor = 1;
targets[91].step = 0;
strcpy(targets[92].id, "gfx1010:xnack+");
targets[92].major = 10;
targets[92].minor = 1;
targets[92].step = 0;
strcpy(targets[93].id, "gfx1011");
targets[93].major = 10;
targets[93].minor = 1;
targets[93].step = 1;
strcpy(targets[94].id, "gfx1011:xnack-");
targets[94].major = 10;
targets[94].minor = 1;
targets[94].step = 1;
strcpy(targets[95].id, "gfx1011:xnack+");
targets[95].major = 10;
targets[95].minor = 1;
targets[95].step = 1;
strcpy(targets[96].id, "gfx1012");
targets[96].major = 10;
targets[96].minor = 1;
targets[96].step = 2;
strcpy(targets[97].id, "gfx1012:xnack-");
targets[97].major = 10;
targets[97].minor = 1;
targets[97].step = 2;
strcpy(targets[98].id, "gfx1012:xnack+");
targets[98].major = 10;
targets[98].minor = 1;
targets[98].step = 2;
strcpy(targets[99].id, "gfx1013");
targets[99].major = 10;
targets[99].minor = 1;
targets[99].step = 3;
strcpy(targets[100].id, "gfx1013:xnack-");
targets[100].major = 10;
targets[100].minor = 1;
targets[100].step = 3;
strcpy(targets[101].id, "gfx1013:xnack+");
targets[101].major = 10;
targets[101].minor = 1;
targets[101].step = 3;
strcpy(targets[102].id, "gfx10-1-generic");
targets[102].major = 10;
targets[102].minor = 1;
targets[102].step = 0;
strcpy(targets[103].id, "gfx10-1-generic:xnack-");
targets[103].major = 10;
targets[103].minor = 1;
targets[103].step = 0;
strcpy(targets[104].id, "gfx10-1-generic:xnack+");
targets[104].major = 10;
targets[104].minor = 1;
targets[104].step = 0;
strcpy(targets[105].id, "gfx1030");
targets[105].major = 10;
targets[105].minor = 3;
targets[105].step = 0;
strcpy(targets[106].id, "gfx1031");
targets[106].major = 10;
targets[106].minor = 3;
targets[106].step = 1;
strcpy(targets[107].id, "gfx1032");
targets[107].major = 10;
targets[107].minor = 3;
targets[107].step = 2;
strcpy(targets[108].id, "gfx1033");
targets[108].major = 10;
targets[108].minor = 3;
targets[108].step = 3;
strcpy(targets[109].id, "gfx1034");
targets[109].major = 10;
targets[109].minor = 3;
targets[109].step = 4;
strcpy(targets[110].id, "gfx1035");
targets[110].major = 10;
targets[110].minor = 3;
targets[110].step = 5;
strcpy(targets[111].id, "gfx1036");
targets[111].major = 10;
targets[111].minor = 3;
targets[111].step = 6;
strcpy(targets[112].id, "gfx10-3-generic");
targets[112].major = 10;
targets[112].minor = 3;
targets[112].step = 0;
strcpy(targets[113].id, "gfx1100");
targets[113].major = 11;
targets[113].minor = 0;
targets[113].step = 0;
strcpy(targets[114].id, "gfx1101");
targets[114].major = 11;
targets[114].minor = 0;
targets[114].step = 1;
strcpy(targets[115].id, "gfx1102");
targets[115].major = 11;
targets[115].minor = 0;
targets[115].step = 2;
strcpy(targets[116].id, "gfx1103");
targets[116].major = 11;
targets[116].minor = 0;
targets[116].step = 3;
strcpy(targets[117].id, "gfx1150");
targets[117].major = 11;
targets[117].minor = 5;
targets[117].step = 0;
strcpy(targets[118].id, "gfx1151");
targets[118].major = 11;
targets[118].minor = 5;
targets[118].step = 1;
strcpy(targets[119].id, "gfx1152");
targets[119].major = 11;
targets[119].minor = 5;
targets[119].step = 2;
strcpy(targets[120].id, "gfx11-generic");
targets[120].major = 11;
targets[120].minor = 0;
targets[120].step = 0;
strcpy(targets[121].id, "gfx1200");
targets[121].major = 12;
targets[121].minor = 0;
targets[121].step = 0;
strcpy(targets[122].id, "gfx1201");
targets[122].major = 12;
targets[122].minor = 0;
targets[122].step = 1;
strcpy(targets[123].id, "gfx12-generic");
targets[123].major = 12;
targets[123].minor = 0;
targets[123].step = 0;
int verReturned;
int verActual;
char * result;
char pass[] = "OK ";
char fail[] = "FAIL";
int total = 0;
int good = 0;
for (int i = 0; i < sizeof(targets) / sizeof(struct target); i++) {
result = fail;
total += 1;
verActual = (targets[i].major % 10) * 0x10 * 0x10;
verActual += (targets[i].major / 10) * 0x10 * 0x100;
if (targets[i].major != 8) {
verActual += targets[i].minor * 0x10;
verActual += targets[i].step;
}
verReturned = ggml_cuda_parse_id(targets[i].id);
if (verActual + GGML_CUDA_CC_OFFSET_AMD == verReturned) {
result = pass;
good += 1;
} else {
// gfx909 is mapped to 902
if (verActual == 0x902 && (verReturned & 0xffff) == 0x909) {
result = pass;
good += 1;
}
}
printf("%03d: %s: Actual: 0x%04x, Returned: 0x%04x, ID: %s\n",
i, result, verActual, verReturned & 0xffff, targets[i].id);
}
printf("Total: %d Passed: %d Failed: %d\n", total, good, total - good);
return total - good;
}