mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 13:58:46 +01:00
gguf : track writer state, free unneeded tensors, cleanup (#3871)
This commit is contained in:
parent
413503d4b9
commit
0a7c980b6f
@ -646,18 +646,17 @@ class GGUFValueType(IntEnum):
|
|||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
|
|
||||||
|
class WriterState(Enum):
|
||||||
|
EMPTY = auto()
|
||||||
|
HEADER = auto()
|
||||||
|
KV_DATA = auto()
|
||||||
|
TI_DATA = auto()
|
||||||
|
|
||||||
|
|
||||||
class GGUFWriter:
|
class GGUFWriter:
|
||||||
fout: BufferedWriter
|
fout: BufferedWriter
|
||||||
arch: str
|
temp_file: tempfile.SpooledTemporaryFile[bytes] | None
|
||||||
offset_tensor = 0
|
tensors: list[np.ndarray[Any, Any]]
|
||||||
data_alignment = GGUF_DEFAULT_ALIGNMENT
|
|
||||||
kv_data = b""
|
|
||||||
kv_data_count = 0
|
|
||||||
ti_data = b""
|
|
||||||
ti_data_count = 0
|
|
||||||
use_temp_file: bool
|
|
||||||
temp_file: tempfile.SpooledTemporaryFile[bytes] | None = None
|
|
||||||
tensors: list[tuple[np.ndarray[Any, Any], int]]
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def pack_prefix(self):
|
def pack_prefix(self):
|
||||||
@ -683,27 +682,47 @@ class GGUFWriter:
|
|||||||
GGUFValueType.FLOAT64: f"{self.pack_prefix}d",
|
GGUFValueType.FLOAT64: f"{self.pack_prefix}d",
|
||||||
GGUFValueType.BOOL: "?" ,
|
GGUFValueType.BOOL: "?" ,
|
||||||
}
|
}
|
||||||
self.add_architecture()
|
self.offset_tensor = 0
|
||||||
|
self.data_alignment = GGUF_DEFAULT_ALIGNMENT
|
||||||
|
self.kv_data = b""
|
||||||
|
self.kv_data_count = 0
|
||||||
|
self.ti_data = b""
|
||||||
|
self.ti_data_count = 0
|
||||||
self.use_temp_file = use_temp_file
|
self.use_temp_file = use_temp_file
|
||||||
|
self.temp_file = None
|
||||||
self.tensors = []
|
self.tensors = []
|
||||||
endianess_str = "Big Endian" if self.endianess == GGUFEndian.BIG else "Little Endian"
|
endianess_str = "Big Endian" if self.endianess == GGUFEndian.BIG else "Little Endian"
|
||||||
print(f"This gguf file is for {endianess_str} only")
|
print(f"This gguf file is for {endianess_str} only")
|
||||||
|
self.state = WriterState.EMPTY
|
||||||
|
|
||||||
|
self.add_architecture()
|
||||||
|
|
||||||
def write_header_to_file(self):
|
def write_header_to_file(self):
|
||||||
|
if self.state is not WriterState.EMPTY:
|
||||||
|
raise ValueError(f'Expected output file to be empty, got {self.state}')
|
||||||
|
|
||||||
self.fout.write(struct.pack("<I", GGUF_MAGIC))
|
self.fout.write(struct.pack("<I", GGUF_MAGIC))
|
||||||
self.fout.write(struct.pack(f"{self.pack_prefix}I", GGUF_VERSION))
|
self.fout.write(struct.pack(f"{self.pack_prefix}I", GGUF_VERSION))
|
||||||
self.fout.write(struct.pack(f"{self.pack_prefix}Q", self.ti_data_count))
|
self.fout.write(struct.pack(f"{self.pack_prefix}Q", self.ti_data_count))
|
||||||
self.fout.write(struct.pack(f"{self.pack_prefix}Q", self.kv_data_count))
|
self.fout.write(struct.pack(f"{self.pack_prefix}Q", self.kv_data_count))
|
||||||
self.flush()
|
self.flush()
|
||||||
# print("tensors " + str(self.ti_data_count) + " kv " + str(self.kv_data_count))
|
self.state = WriterState.HEADER
|
||||||
|
|
||||||
def write_kv_data_to_file(self):
|
def write_kv_data_to_file(self):
|
||||||
|
if self.state is not WriterState.HEADER:
|
||||||
|
raise ValueError(f'Expected output file to contain the header, got {self.state}')
|
||||||
|
|
||||||
self.fout.write(self.kv_data)
|
self.fout.write(self.kv_data)
|
||||||
self.flush()
|
self.flush()
|
||||||
|
self.state = WriterState.KV_DATA
|
||||||
|
|
||||||
def write_ti_data_to_file(self):
|
def write_ti_data_to_file(self):
|
||||||
|
if self.state is not WriterState.KV_DATA:
|
||||||
|
raise ValueError(f'Expected output file to contain KV data, got {self.state}')
|
||||||
|
|
||||||
self.fout.write(self.ti_data)
|
self.fout.write(self.ti_data)
|
||||||
self.flush()
|
self.flush()
|
||||||
|
self.state = WriterState.TI_DATA
|
||||||
|
|
||||||
def add_key(self, key: str):
|
def add_key(self, key: str):
|
||||||
self.add_val(key, GGUFValueType.STRING, add_vtype=False)
|
self.add_val(key, GGUFValueType.STRING, add_vtype=False)
|
||||||
@ -796,6 +815,9 @@ class GGUFWriter:
|
|||||||
return ((x + n - 1) // n) * n
|
return ((x + n - 1) // n) * n
|
||||||
|
|
||||||
def add_tensor_info(self, name: str, tensor_shape: Sequence[int], tensor_dtype: np.dtype[np.float16] | np.dtype[np.float32], tensor_nbytes: int, raw_dtype: GGMLQuantizationType | None = None):
|
def add_tensor_info(self, name: str, tensor_shape: Sequence[int], tensor_dtype: np.dtype[np.float16] | np.dtype[np.float32], tensor_nbytes: int, raw_dtype: GGMLQuantizationType | None = None):
|
||||||
|
if self.state is not WriterState.EMPTY:
|
||||||
|
raise ValueError(f'Expected output file to be empty, got {self.state}')
|
||||||
|
|
||||||
assert raw_dtype is not None or tensor_dtype in (np.float32, np.float16), "Only F32 and F16 tensors are supported for now"
|
assert raw_dtype is not None or tensor_dtype in (np.float32, np.float16), "Only F32 and F16 tensors are supported for now"
|
||||||
|
|
||||||
encoded_name = name.encode("utf8")
|
encoded_name = name.encode("utf8")
|
||||||
@ -825,23 +847,22 @@ class GGUFWriter:
|
|||||||
shape: Sequence[int] = raw_shape if raw_shape is not None else tensor.shape
|
shape: Sequence[int] = raw_shape if raw_shape is not None else tensor.shape
|
||||||
self.add_tensor_info(name, shape, tensor.dtype, tensor.nbytes, raw_dtype = raw_dtype)
|
self.add_tensor_info(name, shape, tensor.dtype, tensor.nbytes, raw_dtype = raw_dtype)
|
||||||
|
|
||||||
pad = GGUFWriter.ggml_pad(tensor.nbytes, self.data_alignment) - tensor.nbytes
|
if self.temp_file is None:
|
||||||
|
self.tensors.append(tensor)
|
||||||
if self.temp_file is None:
|
|
||||||
self.tensors.append((tensor, pad))
|
|
||||||
return
|
return
|
||||||
|
|
||||||
tensor.tofile(self.temp_file)
|
tensor.tofile(self.temp_file)
|
||||||
|
self.write_padding(self.temp_file, tensor.nbytes)
|
||||||
|
|
||||||
if pad != 0:
|
def write_padding(self, fp: IO[bytes], n: int, align: int | None = None):
|
||||||
self.temp_file.write(bytes([0] * pad))
|
|
||||||
|
|
||||||
def write_padding(self, fp: BinaryIO, n: int, align: int | None = None):
|
|
||||||
pad = GGUFWriter.ggml_pad(n, align if align is not None else self.data_alignment) - n
|
pad = GGUFWriter.ggml_pad(n, align if align is not None else self.data_alignment) - n
|
||||||
if pad != 0:
|
if pad != 0:
|
||||||
fp.write(bytes([0] * pad))
|
fp.write(bytes([0] * pad))
|
||||||
|
|
||||||
def write_tensor_data(self, tensor: np.ndarray[Any, Any]):
|
def write_tensor_data(self, tensor: np.ndarray[Any, Any]):
|
||||||
|
if self.state is not WriterState.TI_DATA:
|
||||||
|
raise ValueError(f'Expected output file to contain tensor info, got {self.state}')
|
||||||
|
|
||||||
if self.endianess==GGUFEndian.BIG:
|
if self.endianess==GGUFEndian.BIG:
|
||||||
tensor.byteswap(inplace=True)
|
tensor.byteswap(inplace=True)
|
||||||
self.write_padding(self.fout, self.fout.tell())
|
self.write_padding(self.fout, self.fout.tell())
|
||||||
@ -854,10 +875,13 @@ class GGUFWriter:
|
|||||||
self.write_padding(self.fout, self.fout.tell())
|
self.write_padding(self.fout, self.fout.tell())
|
||||||
|
|
||||||
if self.temp_file is None:
|
if self.temp_file is None:
|
||||||
for (currtensor, currpad) in self.tensors:
|
while True:
|
||||||
currtensor.tofile(self.fout)
|
try:
|
||||||
if currpad != 0:
|
tensor = self.tensors.pop(0)
|
||||||
self.fout.write(bytes([0] * currpad))
|
except IndexError:
|
||||||
|
break
|
||||||
|
tensor.tofile(self.fout)
|
||||||
|
self.write_padding(self.fout, tensor.nbytes)
|
||||||
return
|
return
|
||||||
|
|
||||||
self.temp_file.seek(0)
|
self.temp_file.seek(0)
|
||||||
@ -1002,11 +1026,8 @@ class GGUFWriter:
|
|||||||
|
|
||||||
|
|
||||||
class SpecialVocab:
|
class SpecialVocab:
|
||||||
load_merges: bool = False
|
merges: list[str]
|
||||||
merges: list[str] = []
|
special_token_ids: dict[str, int]
|
||||||
special_token_types: tuple[str, ...] = ('bos', 'eos', 'unk', 'sep', 'pad')
|
|
||||||
special_token_ids: dict[str, int] = {}
|
|
||||||
n_vocab: int | None = None
|
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, path: str | os.PathLike[str], load_merges: bool = False,
|
self, path: str | os.PathLike[str], load_merges: bool = False,
|
||||||
@ -1016,8 +1037,11 @@ class SpecialVocab:
|
|||||||
self.special_token_ids = {}
|
self.special_token_ids = {}
|
||||||
self.n_vocab = n_vocab
|
self.n_vocab = n_vocab
|
||||||
self.load_merges = load_merges
|
self.load_merges = load_merges
|
||||||
|
self.merges = []
|
||||||
if special_token_types is not None:
|
if special_token_types is not None:
|
||||||
self.special_token_types = special_token_types
|
self.special_token_types = special_token_types
|
||||||
|
else:
|
||||||
|
self.special_token_types = ('bos', 'eos', 'unk', 'sep', 'pad')
|
||||||
self._load(Path(path))
|
self._load(Path(path))
|
||||||
|
|
||||||
def _load(self, path: Path) -> None:
|
def _load(self, path: Path) -> None:
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "gguf"
|
name = "gguf"
|
||||||
version = "0.4.5"
|
version = "0.4.6"
|
||||||
description = "Write ML models in GGUF for GGML"
|
description = "Write ML models in GGUF for GGML"
|
||||||
authors = ["GGML <ggml@ggml.ai>"]
|
authors = ["GGML <ggml@ggml.ai>"]
|
||||||
packages = [
|
packages = [
|
||||||
|
Loading…
Reference in New Issue
Block a user