diff --git a/ggml-cuda.cu b/ggml-cuda.cu index e56595742..3242a0b4a 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -8657,9 +8657,9 @@ static void ggml_cuda_op_dequantize_mul_mat_vec( if (src1_convert_f16) { src1_dfloat = src1_dfloat_a.alloc(ne00); - ggml_cpy_f32_f16_cuda((const char *) src1_ddf_i, (char *) src1_dfloat, ne00, - ne00, 1, sizeof(float), 0, 0, - ne00, 1, sizeof(half), 0, 0, stream); + const to_fp16_cuda_t to_fp16_cuda = ggml_get_to_fp16_cuda(src1->type); + GGML_ASSERT(to_fp16_cuda != nullptr); + to_fp16_cuda(src1_ddf_i, src1_dfloat, ne00, stream); } #else const dfloat * src1_dfloat = (const dfloat *) src1_ddf_i; // dfloat == float, no conversion