mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-13 05:42:22 +01:00
convert : use f32 outtype for bf16 tensors (#6106)
The old behaviour is to use f16, but bf16 to f16 is not a lossless conversion. Change the outtype to f32 to default to a lossless conversion.
This commit is contained in:
parent
d01b3c4c32
commit
3a6efdd03c
@ -1167,9 +1167,9 @@ class OutputFile:
|
|||||||
def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileType:
|
def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileType:
|
||||||
wq_type = model[gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ATTN_Q].format(bid=0) + ".weight"].data_type
|
wq_type = model[gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ATTN_Q].format(bid=0) + ".weight"].data_type
|
||||||
|
|
||||||
if output_type_str == "f32" or (output_type_str is None and wq_type == DT_F32):
|
if output_type_str == "f32" or (output_type_str is None and wq_type in (DT_F32, DT_BF16)):
|
||||||
return GGMLFileType.AllF32
|
return GGMLFileType.AllF32
|
||||||
if output_type_str == "f16" or (output_type_str is None and wq_type in (DT_F16, DT_BF16)):
|
if output_type_str == "f16" or (output_type_str is None and wq_type == DT_F16):
|
||||||
return GGMLFileType.MostlyF16
|
return GGMLFileType.MostlyF16
|
||||||
if output_type_str == "q8_0":
|
if output_type_str == "q8_0":
|
||||||
return GGMLFileType.MostlyQ8_0
|
return GGMLFileType.MostlyQ8_0
|
||||||
|
Loading…
x
Reference in New Issue
Block a user