mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 21:37:19 +01:00
ggml : add max buffer sizes to opencl and metal backends (#5181)
This commit is contained in:
parent
172ac82629
commit
fbe7dfa53c
12
ggml-metal.m
12
ggml-metal.m
@ -2375,6 +2375,16 @@ GGML_CALL static size_t ggml_backend_metal_buffer_type_get_alignment(ggml_backen
|
|||||||
UNUSED(buft);
|
UNUSED(buft);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GGML_CALL static size_t ggml_backend_metal_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) {
|
||||||
|
id<MTLDevice> device = ggml_backend_metal_get_device();
|
||||||
|
size_t max_size = device.maxBufferLength;
|
||||||
|
ggml_backend_metal_free_device();
|
||||||
|
|
||||||
|
return max_size;
|
||||||
|
|
||||||
|
UNUSED(buft);
|
||||||
|
}
|
||||||
|
|
||||||
GGML_CALL static bool ggml_backend_metal_buffer_type_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend) {
|
GGML_CALL static bool ggml_backend_metal_buffer_type_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend) {
|
||||||
return ggml_backend_is_metal(backend) || ggml_backend_is_cpu(backend);
|
return ggml_backend_is_metal(backend) || ggml_backend_is_cpu(backend);
|
||||||
|
|
||||||
@ -2393,7 +2403,7 @@ GGML_CALL ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void) {
|
|||||||
/* .get_name = */ ggml_backend_metal_buffer_type_get_name,
|
/* .get_name = */ ggml_backend_metal_buffer_type_get_name,
|
||||||
/* .alloc_buffer = */ ggml_backend_metal_buffer_type_alloc_buffer,
|
/* .alloc_buffer = */ ggml_backend_metal_buffer_type_alloc_buffer,
|
||||||
/* .get_alignment = */ ggml_backend_metal_buffer_type_get_alignment,
|
/* .get_alignment = */ ggml_backend_metal_buffer_type_get_alignment,
|
||||||
/* .get_max_size = */ NULL, // TODO: return device.maxBufferLength
|
/* .get_max_size = */ ggml_backend_metal_buffer_type_get_max_size,
|
||||||
/* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
|
/* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
|
||||||
/* .supports_backend = */ ggml_backend_metal_buffer_type_supports_backend,
|
/* .supports_backend = */ ggml_backend_metal_buffer_type_supports_backend,
|
||||||
/* .is_host = */ ggml_backend_metal_buffer_type_is_host,
|
/* .is_host = */ ggml_backend_metal_buffer_type_is_host,
|
||||||
|
@ -2125,6 +2125,15 @@ static size_t ggml_backend_opencl_buffer_type_get_alignment(ggml_backend_buffer_
|
|||||||
GGML_UNUSED(buffer_type);
|
GGML_UNUSED(buffer_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static size_t ggml_backend_opencl_buffer_type_get_max_size(ggml_backend_buffer_type_t buffer_type) {
|
||||||
|
static size_t max_size = -1;
|
||||||
|
if (max_size == (size_t)-1) {
|
||||||
|
ggml_cl_init();
|
||||||
|
clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &max_size, NULL);
|
||||||
|
}
|
||||||
|
return max_size;
|
||||||
|
}
|
||||||
|
|
||||||
static bool ggml_backend_opencl_buffer_type_supports_backend(ggml_backend_buffer_type_t buffer_type, ggml_backend_t backend) {
|
static bool ggml_backend_opencl_buffer_type_supports_backend(ggml_backend_buffer_type_t buffer_type, ggml_backend_t backend) {
|
||||||
//return ggml_backend_is_opencl(backend); // opencl must be used through the cpu backend
|
//return ggml_backend_is_opencl(backend); // opencl must be used through the cpu backend
|
||||||
return ggml_backend_is_cpu(backend);
|
return ggml_backend_is_cpu(backend);
|
||||||
@ -2136,7 +2145,7 @@ static ggml_backend_buffer_type_i ggml_backend_opencl_buffer_type_interface = {
|
|||||||
/* .get_name = */ ggml_backend_opencl_buffer_type_name,
|
/* .get_name = */ ggml_backend_opencl_buffer_type_name,
|
||||||
/* .alloc_buffer = */ ggml_backend_opencl_buffer_type_alloc_buffer,
|
/* .alloc_buffer = */ ggml_backend_opencl_buffer_type_alloc_buffer,
|
||||||
/* .get_alignment = */ ggml_backend_opencl_buffer_type_get_alignment,
|
/* .get_alignment = */ ggml_backend_opencl_buffer_type_get_alignment,
|
||||||
/* .get_max_size = */ NULL, // TODO: return from device info
|
/* .get_max_size = */ ggml_backend_opencl_buffer_type_get_max_size,
|
||||||
/* .get_alloc_size = */ NULL,
|
/* .get_alloc_size = */ NULL,
|
||||||
/* .supports_backend = */ ggml_backend_opencl_buffer_type_supports_backend,
|
/* .supports_backend = */ ggml_backend_opencl_buffer_type_supports_backend,
|
||||||
/* .is_host = */ NULL,
|
/* .is_host = */ NULL,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user