diff --git a/ggml/src/ggml-cuda/mmq.cu b/ggml/src/ggml-cuda/mmq.cu index 66036bdd1..2c10bbd37 100644 --- a/ggml/src/ggml-cuda/mmq.cu +++ b/ggml/src/ggml-cuda/mmq.cu @@ -29,9 +29,9 @@ void ggml_cuda_op_mul_mat_q( const mmq_args args = {src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stride00, src1_padded_row_size, src1_ncols, ne11, nrows_dst}; switch (src0->type) { -// case GGML_TYPE_Q4_0: -// mul_mat_q_case(ctx, args, stream); -// break; + case GGML_TYPE_Q4_0: + mul_mat_q_case(ctx, args, stream); + break; // case GGML_TYPE_Q4_1: // mul_mat_q_case(ctx, args, stream); // break; diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu b/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu new file mode 100644 index 000000000..0c0b0c8a8 --- /dev/null +++ b/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q4_0);