HIP: force max threads per block to be 1024 (#11621)

Some old/vendor forked version of llvm still use 256. Explicitly set it to 1024 to align with upstream llvm.

Signed-off-by: fxzjshm <fxzjshm@163.com>
This commit is contained in:
fxzjshm 2025-02-05 02:18:38 +08:00 committed by GitHub
parent 3962fc1a79
commit 3ec9fd4b77
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -46,6 +46,9 @@ endif()
message(STATUS "HIP and hipBLAS found")
# Workaround old compilers
set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} --gpu-max-threads-per-block=1024")
file(GLOB GGML_HEADERS_ROCM "../ggml-cuda/*.cuh")
list(APPEND GGML_HEADERS_ROCM "../../include/ggml-cuda.h")