"""TODOs 1. Implement writing tensor data with alignment. 2. Implement writers for known architectures, LLaMA in particular. 3. Add docstrings from the format specs. 4. After development is done, Convert it to a proper pip-installable Python package, and possibly move it to its own repo under ggml-org. """ import struct import constants from enum import IntEnum from typing import Any, IO, List, Sequence import numpy as np class GGMLQuantizationType(IntEnum): F32 = 0 F16 = 1 QR_0 = 2 Q4_1 = 3 # Q4_2 = 4 # support has been removed # Q4_3 = 5 # support has been removed Q5_0 = 6 Q5_1 = 7 Q8_0 = 8 Q8_1 = 9 Q2_K = 10 Q3_K = 11 Q4_K = 12 Q5_K = 13 Q6_K = 14 Q8_K = 15 class GGUFValueType(IntEnum): UINT8 = 0 INT8 = 1 UINT16 = 2 INT16 = 3 UINT32 = 4 INT32 = 5 FLOAT32 = 6 BOOL = 7 STRING = 8 ARRAY = 9 @staticmethod def get_type(val): if isinstance(val, str): return GGUFValueType.STRING elif isinstance(val, list): return GGUFValueType.ARRAY elif isinstance(val, float): return GGUFValueType.FLOAT32 elif isinstance(val, bool): return GGUFValueType.BOOL else: return GGUFValueType.INT32 class GGUFWriter: def __init__(self, fout: IO): self.fout = fout self.offset_tensor = 0 self.tensors = [] def write_header(self, tensor_count: int, metadata_kv_count: int): self.fout.write(struct.pack(" "GGUFWriter": f = open(path, "wb") return cls(f) def write_key(self, key: str): self.write_val(key, GGUFValueType.STRING) def write_uint8(self, key: str, val: int): self.write_key(key) self.write_val(val, GGUFValueType.UINT8) def write_int8(self, key: str, val: int): self.write_key(key) self.write_val(val, GGUFValueType.INT8) def write_uint16(self, key: str, val: int): self.write_key(key) self.write_val(val, GGUFValueType.UINT16) def write_int16(self, key: str, val: int): self.write_key(key) self.write_val(val, GGUFValueType.INT16) def write_uint32(self, key: str, val: int): self.write_key(key) self.write_val(val, GGUFValueType.UINT32) def write_int32(self, key: str, val: int): self.write_key(key) self.write_val(val, GGUFValueType.INT32) def write_float32(self, key: str, val: float): self.write_key(key) self.write_val(val, GGUFValueType.FLOAT32) def write_bool(self, key: str, val: bool): self.write_key(key) self.write_val(val, GGUFValueType.BOOL) def write_string(self, key: str, val: str): self.write_key(key) self.write_val(val, GGUFValueType.STRING) def write_array(self, key: str, val: list): if not isinstance(val, list): raise ValueError("Value must be a list for array type") self.write_key(key) self.write_val(val, GGUFValueType.ARRAY) def write_val(self: str, val: Any, vtype: GGUFValueType = None): if vtype is None: vtype = GGUFValueType.get_type(val) self.buffered_writer.write(struct.pack(" int: return ((x + n - 1) // n) * n def write_tensor_info(self, name: str, tensor: np.ndarray): self.write_val(key, GGUFValueType.STRING) n_dims = len(tensor.shape) self.write_val(n_dims, GGUFValueType.INT32) for i in range(n_dims): self.write_val(tensor.shape[N_dims - 1 - i], GGUFValueType.INT32) dtype = GGMLQuantizationType.F32 if tensor.dtype == np.float32 else GGMLQuantizationType.F16 self.write_val(dtype, GGUFValueType.INT32) self.fout.write(struct.pack("