mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-27 06:39:25 +01:00
CUDA: no -sm row for very small matrices (#10185)
This commit is contained in:
parent
2a82891a85
commit
4a8ccb37ad
@ -2978,6 +2978,17 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
|
|||||||
{
|
{
|
||||||
struct ggml_tensor * a = op->src[0];
|
struct ggml_tensor * a = op->src[0];
|
||||||
struct ggml_tensor * b = op->src[1];
|
struct ggml_tensor * b = op->src[1];
|
||||||
|
// for small weight matrices the active device can end up without any rows, don't use row split in those cases
|
||||||
|
// this avoids some edge cases (and the performance would not be good anyways)
|
||||||
|
if (a->buffer && ggml_backend_buft_is_cuda_split(a->buffer->buft)) {
|
||||||
|
ggml_backend_cuda_split_buffer_type_context * buft_ctx = (ggml_backend_cuda_split_buffer_type_context *) a->buffer->buft->context;
|
||||||
|
int64_t row_low;
|
||||||
|
int64_t row_high;
|
||||||
|
get_row_split(&row_low, &row_high, a, buft_ctx->tensor_split, dev_ctx->device);
|
||||||
|
if (row_low == row_high) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (b->type == GGML_TYPE_F16 && a->type != GGML_TYPE_F16) {
|
if (b->type == GGML_TYPE_F16 && a->type != GGML_TYPE_F16) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user