"""TODOs 1. Implement writers for known architectures, LLaMA in particular. 2. Add docstrings from the format specs. 3. After development is done, Convert it to a proper pip-installable Python package, and possibly move it to its own repo under ggml-org. """ import struct import constants from enum import IntEnum from typing import Any, IO, List import numpy as np class GGMLQuantizationType(IntEnum): F32 = 0 F16 = 1 QR_0 = 2 Q4_1 = 3 # Q4_2 = 4 # support has been removed # Q4_3 = 5 # support has been removed Q5_0 = 6 Q5_1 = 7 Q8_0 = 8 Q8_1 = 9 Q2_K = 10 Q3_K = 11 Q4_K = 12 Q5_K = 13 Q6_K = 14 Q8_K = 15 class GGUFValueType(IntEnum): UINT8 = 0 INT8 = 1 UINT16 = 2 INT16 = 3 UINT32 = 4 INT32 = 5 FLOAT32 = 6 BOOL = 7 STRING = 8 ARRAY = 9 @staticmethod def get_type(val): if isinstance(val, str): return GGUFValueType.STRING elif isinstance(val, list): return GGUFValueType.ARRAY elif isinstance(val, float): return GGUFValueType.FLOAT32 elif isinstance(val, bool): return GGUFValueType.BOOL else: return GGUFValueType.INT32 class GGUFWriter: def __init__(self, fout: IO): self.fout = fout self.offset_tensor = 0 self.tensors: List[np.ndarray] = [] def write_header(self, tensor_count: int, metadata_kv_count: int): self.fout.write(struct.pack(" "GGUFWriter": f = open(path, "wb") return cls(f) def write_key(self, key: str): self.write_val(key, GGUFValueType.STRING) def write_uint8(self, key: str, val: int): self.write_key(key) self.write_val(val, GGUFValueType.UINT8) def write_int8(self, key: str, val: int): self.write_key(key) self.write_val(val, GGUFValueType.INT8) def write_uint16(self, key: str, val: int): self.write_key(key) self.write_val(val, GGUFValueType.UINT16) def write_int16(self, key: str, val: int): self.write_key(key) self.write_val(val, GGUFValueType.INT16) def write_uint32(self, key: str, val: int): self.write_key(key) self.write_val(val, GGUFValueType.UINT32) def write_int32(self, key: str, val: int): self.write_key(key) self.write_val(val, GGUFValueType.INT32) def write_float32(self, key: str, val: float): self.write_key(key) self.write_val(val, GGUFValueType.FLOAT32) def write_bool(self, key: str, val: bool): self.write_key(key) self.write_val(val, GGUFValueType.BOOL) def write_string(self, key: str, val: str): self.write_key(key) self.write_val(val, GGUFValueType.STRING) def write_array(self, key: str, val: list): if not isinstance(val, list): raise ValueError("Value must be a list for array type") self.write_key(key) self.write_val(val, GGUFValueType.ARRAY) def write_val(self: str, val: Any, vtype: GGUFValueType = None): if vtype is None: vtype = GGUFValueType.get_type(val) self.fout.write(struct.pack(" int: return ((x + n - 1) // n) * n def write_tensor_info(self, name: str, tensor: np.ndarray): self.write_val(name, GGUFValueType.STRING) n_dims = len(tensor.shape) self.write_val(n_dims, GGUFValueType.INT32) for i in range(n_dims): self.write_val(tensor.shape[n_dims - 1 - i], GGUFValueType.INT32) assert tensor.dtype in (np.float32, np.float16), "Only F32 and F16 tensors are supported for now" dtype = GGMLQuantizationType.F32 if tensor.dtype == np.float32 else GGMLQuantizationType.F16 self.write_val(dtype, GGUFValueType.INT32) self.fout.write(struct.pack("