mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-26 03:12:23 +01:00
cuda : re-add q4_0
This commit is contained in:
parent
503983a69a
commit
62d7b6c87f
@ -29,9 +29,9 @@ void ggml_cuda_op_mul_mat_q(
|
|||||||
const mmq_args args = {src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stride00, src1_padded_row_size, src1_ncols, ne11, nrows_dst};
|
const mmq_args args = {src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stride00, src1_padded_row_size, src1_ncols, ne11, nrows_dst};
|
||||||
|
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
// case GGML_TYPE_Q4_0:
|
case GGML_TYPE_Q4_0:
|
||||||
// mul_mat_q_case<GGML_TYPE_Q4_0>(ctx, args, stream);
|
mul_mat_q_case<GGML_TYPE_Q4_0>(ctx, args, stream);
|
||||||
// break;
|
break;
|
||||||
// case GGML_TYPE_Q4_1:
|
// case GGML_TYPE_Q4_1:
|
||||||
// mul_mat_q_case<GGML_TYPE_Q4_1>(ctx, args, stream);
|
// mul_mat_q_case<GGML_TYPE_Q4_1>(ctx, args, stream);
|
||||||
// break;
|
// break;
|
||||||
|
@ -0,0 +1,5 @@
|
|||||||
|
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
|
||||||
|
|
||||||
|
#include "../mmq.cuh"
|
||||||
|
|
||||||
|
DECL_MMQ_CASE(GGML_TYPE_Q4_0);
|
Loading…
Reference in New Issue
Block a user