From 202eab04d3f5a06304b9fd43b4b6b079d3f76dfa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=2E=20Yusuf=20Sar=C4=B1g=C3=B6z?= Date: Sat, 12 Aug 2023 16:39:05 +0300 Subject: [PATCH] gguf : quantization is working --- examples/gguf/gguf.cpp | 2 +- gguf-llama.cpp | 7 ++----- gguf-util.h | 4 ++++ 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/examples/gguf/gguf.cpp b/examples/gguf/gguf.cpp index 08f2b6322..6f454a204 100644 --- a/examples/gguf/gguf.cpp +++ b/examples/gguf/gguf.cpp @@ -421,7 +421,7 @@ int main(int argc, char ** argv) { GGML_ASSERT(gguf_ex_write(fname) && "failed to write gguf file"); } else if (mode == "r") { GGML_ASSERT(gguf_ex_read_0(fname) && "failed to read gguf file"); - //GGML_ASSERT(gguf_ex_read_1(fname) && "failed to read gguf file"); + GGML_ASSERT(gguf_ex_read_1(fname) && "failed to read gguf file"); GGML_ASSERT(gguf_ex_read_2(fname) && "failed to read gguf file"); } else if (mode == "q") { llama_model_quantize_params params = llama_model_quantize_default_params(); diff --git a/gguf-llama.cpp b/gguf-llama.cpp index ea721a0c7..700d6009b 100644 --- a/gguf-llama.cpp +++ b/gguf-llama.cpp @@ -752,7 +752,6 @@ struct gguf_file_saver { file.seek(info_offset, SEEK_SET); GGML_ASSERT(info_offset == file.tell()); total_written += file.write_str(tensor.name); -printf("total_written = %zu, name = %s\n", total_written, tensor.name.c_str()); int32_t n_dims = tensor.ne.size(); total_written += file.write_i32(n_dims); @@ -765,8 +764,7 @@ printf("total_written = %zu, name = %s\n", total_written, tensor.name.c_str()); info_offset += total_written; file.seek(0, SEEK_END); - printf("total_written = %zu\n", total_written); - + return total_written; } @@ -936,8 +934,7 @@ struct llama_model_loader { } else { gguf_file & file = file_loader->file; file.seek(lt.file_off, SEEK_SET); - // TODO - //file.read_raw(lt.data, lt.size); + file.read_raw(lt.data, lt.size); } if (0) { diff --git a/gguf-util.h b/gguf-util.h index ed7d53f69..6395cf304 100644 --- a/gguf-util.h +++ b/gguf-util.h @@ -131,6 +131,10 @@ struct gguf_file { fwrite(data, size, 1, fp); } + void read_raw(void * data, size_t size) { + fread(data, size, 1, fp); + } + template void write_val(const std::string & key, enum gguf_type type, const T & val) { write_str(key);