From 62d7b6c87fe0056fe64115dedded8b78fddf14e3 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 14 Aug 2024 13:37:03 +0300 Subject: [PATCH] cuda : re-add q4_0 --- ggml/src/ggml-cuda/mmq.cu | 6 +++--- ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu | 5 +++++ 2 files changed, 8 insertions(+), 3 deletions(-) create mode 100644 ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu diff --git a/ggml/src/ggml-cuda/mmq.cu b/ggml/src/ggml-cuda/mmq.cu index 66036bdd1..2c10bbd37 100644 --- a/ggml/src/ggml-cuda/mmq.cu +++ b/ggml/src/ggml-cuda/mmq.cu @@ -29,9 +29,9 @@ void ggml_cuda_op_mul_mat_q( const mmq_args args = {src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stride00, src1_padded_row_size, src1_ncols, ne11, nrows_dst}; switch (src0->type) { -// case GGML_TYPE_Q4_0: -// mul_mat_q_case(ctx, args, stream); -// break; + case GGML_TYPE_Q4_0: + mul_mat_q_case(ctx, args, stream); + break; // case GGML_TYPE_Q4_1: // mul_mat_q_case(ctx, args, stream); // break; diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu b/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu new file mode 100644 index 000000000..0c0b0c8a8 --- /dev/null +++ b/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q4_0);