mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 05:48:47 +01:00
cuda : fix LLAMA_CUDA_F16 (#5262)
This commit is contained in:
parent
d71ac90985
commit
8ca511cade
@ -8657,9 +8657,9 @@ static void ggml_cuda_op_dequantize_mul_mat_vec(
|
|||||||
|
|
||||||
if (src1_convert_f16) {
|
if (src1_convert_f16) {
|
||||||
src1_dfloat = src1_dfloat_a.alloc(ne00);
|
src1_dfloat = src1_dfloat_a.alloc(ne00);
|
||||||
ggml_cpy_f32_f16_cuda((const char *) src1_ddf_i, (char *) src1_dfloat, ne00,
|
const to_fp16_cuda_t to_fp16_cuda = ggml_get_to_fp16_cuda(src1->type);
|
||||||
ne00, 1, sizeof(float), 0, 0,
|
GGML_ASSERT(to_fp16_cuda != nullptr);
|
||||||
ne00, 1, sizeof(half), 0, 0, stream);
|
to_fp16_cuda(src1_ddf_i, src1_dfloat, ne00, stream);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
const dfloat * src1_dfloat = (const dfloat *) src1_ddf_i; // dfloat == float, no conversion
|
const dfloat * src1_dfloat = (const dfloat *) src1_ddf_i; // dfloat == float, no conversion
|
||||||
|
Loading…
Reference in New Issue
Block a user