"""TODOs 1. Implement writing tensor data with alignment. 2. Implement writers for known architectures, LLaMA in particular. 3. Add docstrings from the format specs. 4. After development is done, Convert it to a proper pip-installable Python package, and possibly move it to its own repo under ggml-org. """ import struct from enum import IntEnum from typing import List, Any, Sequence import constants class GGMLQuantizationType(IntEnum): F32 = 0 F16 = 1 QR_0 = 2 Q4_1 = 3 # Q4_2 = 4 # support has been removed # Q4_3 = 5 # support has been removed Q5_0 = 6 Q5_1 = 7 Q8_0 = 8 Q8_1 = 9 Q2_K = 10 Q3_K = 11 Q4_K = 12 Q5_K = 13 Q6_K = 14 Q8_K = 15 class GGUFValueType(IntEnum): UINT8 = 0 INT8 = 1 UINT16 = 2 INT16 = 3 UINT32 = 4 INT32 = 5 FLOAT32 = 6 BOOL = 7 STRING = 8 ARRAY = 9 @staticmethod def get_type(value): if isinstance(value, str): return GGUFValueType.STRING elif isinstance(value, list): return GGUFValueType.ARRAY elif isinstance(value, float): return GGUFValueType.FLOAT32 elif isinstance(value, bool): return GGUFValueType.BOOL else: return GGUFValueType.INT32 class GGUFWriter: def __init__(self, fout): self.fout = fout self.offset_tensor = 0 def write_header(self, tensor_count: int, metadata_kv_count: int): self.fout.write(struct.pack(" "GGUFWriter": f = open(path, "wb") return cls(f) def write_key(self, key: str): self.write_value(key, GGUFValueType.STRING) def write_uint8(self, key: str, value: int): self.write_key(key) self.write_value(value, GGUFValueType.UINT8) def write_int8(self, key: str, value: int): self.write_key(key) self.write_value(value, GGUFValueType.INT8) def write_uint16(self, key: str, value: int): self.write_key(key) self.write_value(value, GGUFValueType.UINT16) def write_int16(self, key: str, value: int): self.write_key(key) self.write_value(value, GGUFValueType.INT16) def write_uint32(self, key: str, value: int): self.write_key(key) self.write(value, GGUFValueType.UINT32) def write_int32(self, key: str, value: int): self.write_key(key) self.write_value(value, GGUFValueType.INT32) def write_float32(self, key: str, value: float): self.write_key(key) self.write_value(value, GGUFValueType.FLOAT32) def write_bool(self, key: str, value: bool): self.write_key(key) self.write_value(value, GGUFValueType.BOOL) def write_string(self, key: str, value: str): self.write_key(key) self.write_value(value, GGUFValueType.STRING) def write_array(self, key: str, value: list): if not isinstance(value, list): raise ValueError("Value must be a list for array type") self.write_key(key) self.write_value(value, GGUFValueType.ARRAY) def write_value(self: str, value: Any, value_type: GGUFValueType = None): if value_type is None: value_type = GGUFValueType.get_type(value) self.buffered_writer.write(struct.pack("