HIP: force max threads per block to be 1024 (#11621)

Some old/vendor forked version of llvm still use 256. Explicitly set it to 1024 to align with upstream llvm. Signed-off-by: fxzjshm <fxzjshm@163.com>
2025-02-05 16:10:42 +01:00 · 2025-02-05 02:18:38 +08:00 · 2025-02-05 02:18:38 +08:00 · 3ec9fd4b77
commit 3ec9fd4b77
parent 3962fc1a79
1 changed files with 3 additions and 0 deletions
--- a/ggml/src/ggml-hip/CMakeLists.txt
+++ b/ggml/src/ggml-hip/CMakeLists.txt
@ -46,6 +46,9 @@ endif()

 message(STATUS "HIP and hipBLAS found")

+# Workaround old compilers
+set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} --gpu-max-threads-per-block=1024")
+
 file(GLOB   GGML_HEADERS_ROCM "../ggml-cuda/*.cuh")
 list(APPEND GGML_HEADERS_ROCM "../../include/ggml-cuda.h")