From 464192b9be263a916555870598086e99d8c2e449 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=2E=20Yusuf=20Sar=C4=B1g=C3=B6z?= Date: Thu, 27 Jul 2023 22:25:04 +0300 Subject: [PATCH] WIP: Write tensor --- constants.py | 5 ++-- gguf.py | 68 ++++++++++++++++++++++++++++++++++++---------------- 2 files changed, 51 insertions(+), 22 deletions(-) diff --git a/constants.py b/constants.py index 3a97460e5..34880bb20 100644 --- a/constants.py +++ b/constants.py @@ -1,5 +1,6 @@ -GGUF_MAGIC = 0x47475546 -GGUF_VERSION = 1 +GGUF_MAGIC = 0x47475546 +GGUF_VERSION = 1 +GGUF_DEFAULT_ALIGNMENT = 32 # general KEY_GENERAL_ARCHITECTURE = "general.architecture" diff --git a/gguf.py b/gguf.py index 764ae9a9d..1f24cb9d5 100644 --- a/gguf.py +++ b/gguf.py @@ -8,11 +8,14 @@ import struct import constants from enum import IntEnum -from typing import List, Any +from typing import Any, IO, List, Sequence + +import numpy as np + class GGMLQuantizationType(IntEnum): - F32 = 0 - F16 = 1 + F32 = 0 + F16 = 1 QR_0 = 2 Q4_1 = 3 # Q4_2 = 4 # support has been removed @@ -30,16 +33,16 @@ class GGMLQuantizationType(IntEnum): class GGUFValueType(IntEnum): - UINT8 = 0 - INT8 = 1 - UINT16 = 2 - INT16 = 3 - UINT32 = 4 - INT32 = 5 + UINT8 = 0 + INT8 = 1 + UINT16 = 2 + INT16 = 3 + UINT32 = 4 + INT32 = 5 FLOAT32 = 6 - BOOL = 7 - STRING = 8 - ARRAY = 9 + BOOL = 7 + STRING = 8 + ARRAY = 9 @staticmethod def get_type(val): @@ -54,15 +57,18 @@ class GGUFValueType(IntEnum): else: return GGUFValueType.INT32 + class GGUFWriter: - def __init__(self, buffered_writer): - self.buffered_writer = buffered_writer + def __init__(self, fout: IO): + self.fout = fout + self.offset_tensor = 0 + self.tensors = [] def write_header(self, tensor_count: int, metadata_kv_count: int): - self.buffered_writer.write(struct.pack(" "GGUFWriter": @@ -148,11 +154,33 @@ class GGUFWriter: else: raise ValueError("Invalid GGUF metadata value type") + @staticmethod + def ggml_pad(x: int, n: int) -> int: + return ((x + n - 1) // n) * n + + def write_tensor_info(self, name: str, tensor: np.ndarray): + self.write_val(key, GGUFValueType.STRING) + n_dims = len(tensor.shape) + self.write_val(n_dims, GGUFValueType.INT32) + for i in range(n_dims): + self.write_val(tensor.shape[N_dims - 1 - i], GGUFValueType.INT32) + + dtype = GGMLQuantizationType.F32 if tensor.dtype == np.float32 else GGMLQuantizationType.F16 + self.write_val(dtype, GGUFValueType.INT32) + self.fout.write(struct.pack("