mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-02-05 16:10:42 +01:00
Hip: disable VMM on hip as it seams that it dosent work in some configurations (#11420)
This commit is contained in:
parent
ca6baf76c1
commit
26771a1491
@ -155,6 +155,7 @@ option(GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp on
|
|||||||
|
|
||||||
option(GGML_HIP "ggml: use HIP" OFF)
|
option(GGML_HIP "ggml: use HIP" OFF)
|
||||||
option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental, slow" OFF)
|
option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental, slow" OFF)
|
||||||
|
option(GGML_HIP_NO_VMM "ggml: do not try to use HIP VMM" ON)
|
||||||
option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF)
|
option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF)
|
||||||
option(GGML_VULKAN "ggml: use Vulkan" OFF)
|
option(GGML_VULKAN "ggml: use Vulkan" OFF)
|
||||||
option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF)
|
option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF)
|
||||||
|
@ -131,6 +131,10 @@ typedef float dfloat; // dequantize float
|
|||||||
typedef float2 dfloat2;
|
typedef float2 dfloat2;
|
||||||
#endif // GGML_CUDA_F16
|
#endif // GGML_CUDA_F16
|
||||||
|
|
||||||
|
#if (!defined(GGML_USE_HIP) && !defined(GGML_CUDA_NO_VMM)) || (defined(GGML_USE_HIP) && !defined(GGML_HIP_NO_VMM))
|
||||||
|
#define GGML_USE_VMM
|
||||||
|
#endif // (!defined(GGML_USE_HIP) && !defined(GGML_CUDA_NO_VMM)) || (defined(GGML_USE_HIP) && !defined(GGML_HIP_NO_VMM))
|
||||||
|
|
||||||
#if (defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) || __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL
|
#if (defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) || __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL
|
||||||
#define FP16_AVAILABLE
|
#define FP16_AVAILABLE
|
||||||
#endif // (defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) || __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL
|
#endif // (defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) || __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL
|
||||||
|
@ -152,7 +152,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
|||||||
for (int id = 0; id < info.device_count; ++id) {
|
for (int id = 0; id < info.device_count; ++id) {
|
||||||
int device_vmm = 0;
|
int device_vmm = 0;
|
||||||
|
|
||||||
#if !defined(GGML_CUDA_NO_VMM)
|
#if defined(GGML_USE_VMM)
|
||||||
CUdevice device;
|
CUdevice device;
|
||||||
CU_CHECK(cuDeviceGet(&device, id));
|
CU_CHECK(cuDeviceGet(&device, id));
|
||||||
CU_CHECK(cuDeviceGetAttribute(&device_vmm, CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED, device));
|
CU_CHECK(cuDeviceGetAttribute(&device_vmm, CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED, device));
|
||||||
@ -164,7 +164,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
|||||||
alloc_prop.location.id = id;
|
alloc_prop.location.id = id;
|
||||||
CU_CHECK(cuMemGetAllocationGranularity(&info.devices[id].vmm_granularity, &alloc_prop, CU_MEM_ALLOC_GRANULARITY_RECOMMENDED));
|
CU_CHECK(cuMemGetAllocationGranularity(&info.devices[id].vmm_granularity, &alloc_prop, CU_MEM_ALLOC_GRANULARITY_RECOMMENDED));
|
||||||
}
|
}
|
||||||
#endif // !defined(GGML_CUDA_NO_VMM)
|
#endif // defined(GGML_USE_VMM)
|
||||||
info.devices[id].vmm = !!device_vmm;
|
info.devices[id].vmm = !!device_vmm;
|
||||||
|
|
||||||
cudaDeviceProp prop;
|
cudaDeviceProp prop;
|
||||||
@ -300,7 +300,7 @@ struct ggml_cuda_pool_leg : public ggml_cuda_pool {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// pool with virtual memory
|
// pool with virtual memory
|
||||||
#if !defined(GGML_CUDA_NO_VMM)
|
#if defined(GGML_USE_VMM)
|
||||||
struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
|
struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
|
||||||
static const size_t CUDA_POOL_VMM_MAX_SIZE = 1ull << 35; // 32 GB
|
static const size_t CUDA_POOL_VMM_MAX_SIZE = 1ull << 35; // 32 GB
|
||||||
|
|
||||||
@ -408,14 +408,14 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
|
|||||||
GGML_ASSERT(ptr == (void *) ((char *)(pool_addr) + pool_used));
|
GGML_ASSERT(ptr == (void *) ((char *)(pool_addr) + pool_used));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
#endif // !defined(GGML_CUDA_NO_VMM)
|
#endif // defined(GGML_USE_VMM)
|
||||||
|
|
||||||
std::unique_ptr<ggml_cuda_pool> ggml_backend_cuda_context::new_pool_for_device(int device) {
|
std::unique_ptr<ggml_cuda_pool> ggml_backend_cuda_context::new_pool_for_device(int device) {
|
||||||
#if !defined(GGML_CUDA_NO_VMM)
|
#if defined(GGML_USE_VMM)
|
||||||
if (ggml_cuda_info().devices[device].vmm) {
|
if (ggml_cuda_info().devices[device].vmm) {
|
||||||
return std::unique_ptr<ggml_cuda_pool>(new ggml_cuda_pool_vmm(device));
|
return std::unique_ptr<ggml_cuda_pool>(new ggml_cuda_pool_vmm(device));
|
||||||
}
|
}
|
||||||
#endif // !defined(GGML_CUDA_NO_VMM)
|
#endif // defined(GGML_USE_VMM)
|
||||||
return std::unique_ptr<ggml_cuda_pool>(new ggml_cuda_pool_leg(device));
|
return std::unique_ptr<ggml_cuda_pool>(new ggml_cuda_pool_leg(device));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3250,7 +3250,7 @@ static ggml_backend_feature * ggml_backend_cuda_get_features(ggml_backend_reg_t
|
|||||||
features.push_back({ "FORCE_CUBLAS", "1" });
|
features.push_back({ "FORCE_CUBLAS", "1" });
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef GGML_CUDA_NO_VMM
|
#ifndef GGML_USE_VMM
|
||||||
features.push_back({ "NO_VMM", "1" });
|
features.push_back({ "NO_VMM", "1" });
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -96,8 +96,8 @@ if (GGML_HIP_GRAPHS)
|
|||||||
add_compile_definitions(GGML_HIP_GRAPHS)
|
add_compile_definitions(GGML_HIP_GRAPHS)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (GGML_CUDA_NO_VMM)
|
if (GGML_HIP_NO_VMM)
|
||||||
add_compile_definitions(GGML_CUDA_NO_VMM)
|
add_compile_definitions(GGML_HIP_NO_VMM)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (CXX_IS_HIPCC)
|
if (CXX_IS_HIPCC)
|
||||||
|
Loading…
Reference in New Issue
Block a user