mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-13 22:08:55 +01:00
parent
a4200cafad
commit
5d9e59979c
@ -317,8 +317,6 @@ static bool ggml_backend_amx_device_supports_op(ggml_backend_dev_t dev, const st
|
||||
const enum ggml_type type = src0->type;
|
||||
const int64_t ne0 = op->ne[0];
|
||||
|
||||
bool is_training = src0->grad || src1->grad;
|
||||
|
||||
// amx kernels enables for Q4_0, Q4_1, Q8_0, F16
|
||||
// Q4_K, Q5_K, Q6_K, IQ4_XS enabled for QK_K = 256
|
||||
bool has_amx_kernels = qtype_has_amx_kernels(type) || (type == GGML_TYPE_F16);
|
||||
@ -326,7 +324,6 @@ static bool ggml_backend_amx_device_supports_op(ggml_backend_dev_t dev, const st
|
||||
bool can_use_amx =
|
||||
is_contiguous_2d(src0) && // src0 must be contiguous
|
||||
is_contiguous_2d(src1) && // src1 must be contiguous
|
||||
!is_training && // inference only
|
||||
src1->type == GGML_TYPE_F32 && // src1 must be float32
|
||||
has_amx_kernels && // with amx kernel impls
|
||||
ne0 % (TILE_N * 2) == 0; // out_features is 32x
|
||||
|
Loading…
Reference in New Issue
Block a user