diff --git a/constants.py b/constants.py index 14f11123b..d9e110b73 100644 --- a/constants.py +++ b/constants.py @@ -5,6 +5,7 @@ GGUF_DEFAULT_ALIGNMENT = 32 # general KEY_GENERAL_ARCHITECTURE = "general.architecture" KEY_GENERAL_QUANTIZATION_VERSION = "general.quantization_version" +KEY_GENERAL_ALIGNMENT = "general.alignment" KEY_GENERAL_NAME = "general.name" KEY_GENERAL_AUTHOR = "general.author" KEY_GENERAL_URL = "general.url" diff --git a/ggml.c b/ggml.c index 1b2f071e7..157199118 100644 --- a/ggml.c +++ b/ggml.c @@ -18543,7 +18543,10 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p ctx->alignment = GGUF_DEFAULT_ALIGNMENT; - // TODO: determine new alignment from kv if available + int alignment_idx = gguf_find_key(ctx, "general.alignment"); + if (alignment_idx != -1) { + ctx->alignment = gguf_get_u32(ctx, alignment_idx); + } // we require the data section to be aligned, so take into account any padding { diff --git a/gguf.py b/gguf.py index 99ea81702..8e2c771ba 100644 --- a/gguf.py +++ b/gguf.py @@ -220,6 +220,9 @@ class GGUFWriter: self.write_uint32( constants.KEY_GENERAL_QUANTIZATION_VERSION, quantization_version) + def write_custom_alignment(self, alignment: int): + self.write_uint32(constants.KEY_GENERAL_ALIGNMENT, alignment) + def write_context_length(self, llm: str, length: int): self.write_uint32( constants.KEY_LLM_CONTEXT_LENGTH.format(llm=llm), length) @@ -292,11 +295,12 @@ class GGUFWriter: if __name__ == "__main__": # Example usage with a file gguf_writer = GGUFWriter.open("example.gguf") - gguf_writer.write_header(2, 3) + gguf_writer.write_header(2, 4) gguf_writer.write_architecture("llama") gguf_writer.write_uint32("answer", 42) # Write a 32-bit integer gguf_writer.write_float32("answer_in_float", 42.0) # Write a 32-bit float + gguf_writer.write_custom_alignment(64) tensor1 = np.ones((32,), dtype=np.float32) * 100.0 tensor2 = np.ones((32,), dtype=np.float32) * 101.0 gguf_writer.write_tensor_info("tensor0", tensor1)