mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-07 11:23:56 +01:00
gguf : support custom alignment value
This commit is contained in:
parent
6b3a7b9f4f
commit
7aa0a0e7f7
@ -5,6 +5,7 @@ GGUF_DEFAULT_ALIGNMENT = 32
|
|||||||
# general
|
# general
|
||||||
KEY_GENERAL_ARCHITECTURE = "general.architecture"
|
KEY_GENERAL_ARCHITECTURE = "general.architecture"
|
||||||
KEY_GENERAL_QUANTIZATION_VERSION = "general.quantization_version"
|
KEY_GENERAL_QUANTIZATION_VERSION = "general.quantization_version"
|
||||||
|
KEY_GENERAL_ALIGNMENT = "general.alignment"
|
||||||
KEY_GENERAL_NAME = "general.name"
|
KEY_GENERAL_NAME = "general.name"
|
||||||
KEY_GENERAL_AUTHOR = "general.author"
|
KEY_GENERAL_AUTHOR = "general.author"
|
||||||
KEY_GENERAL_URL = "general.url"
|
KEY_GENERAL_URL = "general.url"
|
||||||
|
5
ggml.c
5
ggml.c
@ -18543,7 +18543,10 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||||||
|
|
||||||
ctx->alignment = GGUF_DEFAULT_ALIGNMENT;
|
ctx->alignment = GGUF_DEFAULT_ALIGNMENT;
|
||||||
|
|
||||||
// TODO: determine new alignment from kv if available
|
int alignment_idx = gguf_find_key(ctx, "general.alignment");
|
||||||
|
if (alignment_idx != -1) {
|
||||||
|
ctx->alignment = gguf_get_u32(ctx, alignment_idx);
|
||||||
|
}
|
||||||
|
|
||||||
// we require the data section to be aligned, so take into account any padding
|
// we require the data section to be aligned, so take into account any padding
|
||||||
{
|
{
|
||||||
|
6
gguf.py
6
gguf.py
@ -220,6 +220,9 @@ class GGUFWriter:
|
|||||||
self.write_uint32(
|
self.write_uint32(
|
||||||
constants.KEY_GENERAL_QUANTIZATION_VERSION, quantization_version)
|
constants.KEY_GENERAL_QUANTIZATION_VERSION, quantization_version)
|
||||||
|
|
||||||
|
def write_custom_alignment(self, alignment: int):
|
||||||
|
self.write_uint32(constants.KEY_GENERAL_ALIGNMENT, alignment)
|
||||||
|
|
||||||
def write_context_length(self, llm: str, length: int):
|
def write_context_length(self, llm: str, length: int):
|
||||||
self.write_uint32(
|
self.write_uint32(
|
||||||
constants.KEY_LLM_CONTEXT_LENGTH.format(llm=llm), length)
|
constants.KEY_LLM_CONTEXT_LENGTH.format(llm=llm), length)
|
||||||
@ -292,11 +295,12 @@ class GGUFWriter:
|
|||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# Example usage with a file
|
# Example usage with a file
|
||||||
gguf_writer = GGUFWriter.open("example.gguf")
|
gguf_writer = GGUFWriter.open("example.gguf")
|
||||||
gguf_writer.write_header(2, 3)
|
gguf_writer.write_header(2, 4)
|
||||||
|
|
||||||
gguf_writer.write_architecture("llama")
|
gguf_writer.write_architecture("llama")
|
||||||
gguf_writer.write_uint32("answer", 42) # Write a 32-bit integer
|
gguf_writer.write_uint32("answer", 42) # Write a 32-bit integer
|
||||||
gguf_writer.write_float32("answer_in_float", 42.0) # Write a 32-bit float
|
gguf_writer.write_float32("answer_in_float", 42.0) # Write a 32-bit float
|
||||||
|
gguf_writer.write_custom_alignment(64)
|
||||||
tensor1 = np.ones((32,), dtype=np.float32) * 100.0
|
tensor1 = np.ones((32,), dtype=np.float32) * 100.0
|
||||||
tensor2 = np.ones((32,), dtype=np.float32) * 101.0
|
tensor2 = np.ones((32,), dtype=np.float32) * 101.0
|
||||||
gguf_writer.write_tensor_info("tensor0", tensor1)
|
gguf_writer.write_tensor_info("tensor0", tensor1)
|
||||||
|
Loading…
Reference in New Issue
Block a user