mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-11 21:10:24 +01:00
cuda : re-add q4_0
This commit is contained in:
parent
503983a69a
commit
62d7b6c87f
@ -29,9 +29,9 @@ void ggml_cuda_op_mul_mat_q(
|
||||
const mmq_args args = {src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stride00, src1_padded_row_size, src1_ncols, ne11, nrows_dst};
|
||||
|
||||
switch (src0->type) {
|
||||
// case GGML_TYPE_Q4_0:
|
||||
// mul_mat_q_case<GGML_TYPE_Q4_0>(ctx, args, stream);
|
||||
// break;
|
||||
case GGML_TYPE_Q4_0:
|
||||
mul_mat_q_case<GGML_TYPE_Q4_0>(ctx, args, stream);
|
||||
break;
|
||||
// case GGML_TYPE_Q4_1:
|
||||
// mul_mat_q_case<GGML_TYPE_Q4_1>(ctx, args, stream);
|
||||
// break;
|
||||
|
@ -0,0 +1,5 @@
|
||||
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
|
||||
|
||||
#include "../mmq.cuh"
|
||||
|
||||
DECL_MMQ_CASE(GGML_TYPE_Q4_0);
|
Loading…
x
Reference in New Issue
Block a user