From a279f178156a6474b61a1a38639cc5ca93d05b22 Mon Sep 17 00:00:00 2001 From: pancake Date: Tue, 22 Oct 2024 18:51:03 +0200 Subject: [PATCH] feat(convert_hf_to_gguf): support q4_0 and q4_1 quantifications --- convert_hf_to_gguf.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 7e552a71b..53725afa3 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -357,6 +357,10 @@ class Model: data_qtype = gguf.GGMLQuantizationType.TQ1_0 elif self.ftype == gguf.LlamaFileType.MOSTLY_TQ2_0: data_qtype = gguf.GGMLQuantizationType.TQ2_0 + elif self.ftype == gguf.LlamaFileType.MOSTLY_Q4_0: + data_qtype = gguf.GGMLQuantizationType.Q4_0 + elif self.ftype == gguf.LlamaFileType.MOSTLY_Q4_1: + data_qtype = gguf.GGMLQuantizationType.Q4_1 else: raise ValueError(f"Unknown file type: {self.ftype.name}") @@ -4293,8 +4297,8 @@ def parse_args() -> argparse.Namespace: help="path to write to; default: based on input. {ftype} will be replaced by the outtype.", ) parser.add_argument( - "--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0", "tq1_0", "tq2_0", "auto"], default="f16", - help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q8_0 for Q8_0, tq1_0 or tq2_0 for ternary, and auto for the highest-fidelity 16-bit float type depending on the first loaded tensor type", + "--outtype", type=str, choices=["f32", "f16", "bf16", "q4_0", "q4_1", "q8_0", "tq1_0", "tq2_0", "auto"], default="f16", + help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q4_0, q4_1 , q8_0 for Q8_0, tq1_0 or tq2_0 for ternary, and auto for the highest-fidelity 16-bit float type depending on the first loaded tensor type", ) parser.add_argument( "--bigendian", action="store_true", @@ -4380,6 +4384,8 @@ def main() -> None: "f32": gguf.LlamaFileType.ALL_F32, "f16": gguf.LlamaFileType.MOSTLY_F16, "bf16": gguf.LlamaFileType.MOSTLY_BF16, + "q4_0": gguf.LlamaFileType.MOSTLY_Q4_0, + "q4_1": gguf.LlamaFileType.MOSTLY_Q4_1, "q8_0": gguf.LlamaFileType.MOSTLY_Q8_0, "tq1_0": gguf.LlamaFileType.MOSTLY_TQ1_0, "tq2_0": gguf.LlamaFileType.MOSTLY_TQ2_0,