mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-06 02:48:57 +01:00
gguf : start implementing quantization (WIP)
This commit is contained in:
parent
4fa017a1f9
commit
0e1a3c7e7d
@ -615,6 +615,8 @@ struct gguf_file_saver {
|
||||
gguf_file file;
|
||||
gguf_file_loader * fl;
|
||||
size_t info_offset;
|
||||
size_t tensor_offset = 0;
|
||||
|
||||
gguf_file_saver(const char * fname, gguf_file_loader * fl, enum llama_ftype new_ftype)
|
||||
: file(fname, "wb"), fl(fl) {
|
||||
fprintf(stderr, "llama.cpp: saving model to %s\n", fname);
|
||||
@ -622,8 +624,6 @@ struct gguf_file_saver {
|
||||
write_hparams(new_ftype);
|
||||
}
|
||||
|
||||
// TODO: probably it's better to move these to gguf_file
|
||||
|
||||
void write_header() {
|
||||
const int32_t magic = GGUF_MAGIC;
|
||||
file.write_i32(magic);
|
||||
@ -740,12 +740,26 @@ struct gguf_file_saver {
|
||||
file.write_zeros(count);
|
||||
}
|
||||
|
||||
size_t write_tensor_info(llama_load_tensor & tensor) {
|
||||
size_t total_written = 0;
|
||||
file.seek(0, info_offset);
|
||||
total_written += file.write_str(tensor.name);
|
||||
|
||||
int32_t n_dims = tensor.ne.size();
|
||||
file.write_i32(n_dims);
|
||||
for (int32_t i = 0; i < n_dims; ++i) {
|
||||
total_written += file.write_i32(i);
|
||||
}
|
||||
|
||||
total_written += file.write_u64(tensor_offset);
|
||||
info_offset += total_written;
|
||||
|
||||
file.seek(0, SEEK_END);
|
||||
|
||||
return total_written;
|
||||
}
|
||||
|
||||
void write_tensor(llama_load_tensor & tensor, enum ggml_type new_type, const void * new_data, size_t new_size) {
|
||||
GGML_UNUSED(tensor);
|
||||
GGML_UNUSED(new_data);
|
||||
GGML_UNUSED(new_size);
|
||||
|
||||
switch (new_type) {
|
||||
case GGML_TYPE_F32:
|
||||
case GGML_TYPE_F16:
|
||||
@ -763,6 +777,13 @@ struct gguf_file_saver {
|
||||
default: GGML_ASSERT(false);
|
||||
}
|
||||
|
||||
write_tensor_info(tensor);
|
||||
// file.write_raw(new_data);
|
||||
GGML_UNUSED(new_data);
|
||||
size_t padded_size = GGML_PAD(new_size, GGUF_DEFAULT_ALIGNMENT); // TODO: handle custom alignment
|
||||
size_t pad = padded_size - new_size;
|
||||
file.write_zeros(pad);
|
||||
tensor_offset += padded_size;
|
||||
}
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user