diff --git a/Makefile b/Makefile index f5922c95d..304b3035d 100644 --- a/Makefile +++ b/Makefile @@ -393,7 +393,7 @@ $(LIB_PRE)embdinput$(DSO_EXT): examples/embd-input/embd-input.h examples/embd-in embd-input-test: $(LIB_PRE)embdinput$(DSO_EXT) examples/embd-input/embd-input-test.cpp build-info.h ggml.o llama.o common.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %$(DSO_EXT),$(filter-out %.h,$(filter-out %.hpp,$^))) -o $@ $(LDFLAGS) -L. -lembdinput -gguf: examples/gguf/gguf.cpp build-info.h ggml.o $(OBJS) +gguf: examples/gguf/gguf.cpp build-info.h ggml.o gguf-llama.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) gguf-llama-simple: examples/gguf/gguf-llama-simple.cpp build-info.h ggml.o gguf-llama.o common.o $(OBJS) diff --git a/examples/gguf/gguf.cpp b/examples/gguf/gguf.cpp index a1b8edc71..6f454a204 100644 --- a/examples/gguf/gguf.cpp +++ b/examples/gguf/gguf.cpp @@ -1,5 +1,6 @@ #include "ggml.h" #include "gguf-util.h" +#include "gguf-llama.h" #include #include @@ -7,14 +8,14 @@ #include #include #include - +/* template static std::string to_string(const T & val) { std::stringstream ss; ss << val; return ss.str(); } - +*/ void gguf_ex_write_str(std::ofstream & fout, const std::string & val) { const int32_t n = val.size(); fout.write((const char *) &n, sizeof(n)); @@ -414,7 +415,7 @@ int main(int argc, char ** argv) { const std::string fname(argv[1]); const std::string mode (argv[2]); - GGML_ASSERT((mode == "r" || mode == "w") && "mode must be r or w"); + GGML_ASSERT((mode == "r" || mode == "w" || mode == "q") && "mode must be r, w or q"); if (mode == "w") { GGML_ASSERT(gguf_ex_write(fname) && "failed to write gguf file"); @@ -422,6 +423,9 @@ int main(int argc, char ** argv) { GGML_ASSERT(gguf_ex_read_0(fname) && "failed to read gguf file"); GGML_ASSERT(gguf_ex_read_1(fname) && "failed to read gguf file"); GGML_ASSERT(gguf_ex_read_2(fname) && "failed to read gguf file"); + } else if (mode == "q") { + llama_model_quantize_params params = llama_model_quantize_default_params(); + llama_model_quantize(fname.c_str(), "quant.gguf", ¶ms); } return 0; diff --git a/gguf-llama.cpp b/gguf-llama.cpp index defe26fe0..f1755fef5 100644 --- a/gguf-llama.cpp +++ b/gguf-llama.cpp @@ -738,15 +738,19 @@ struct gguf_file_saver { info_offset = file.tell(); size_t count = gguf_get_data_offset(fl->gguf_ctx) - info_offset; file.write_zeros(count); + printf("info_offset = %zu\n", info_offset); + file.seek(info_offset, SEEK_SET); + GGML_ASSERT(info_offset == file.tell()); } size_t write_tensor_info(llama_load_tensor & tensor) { size_t total_written = 0; - file.seek(0, info_offset); + file.seek(info_offset, SEEK_SET); + GGML_ASSERT(info_offset == file.tell()); total_written += file.write_str(tensor.name); int32_t n_dims = tensor.ne.size(); - file.write_i32(n_dims); + total_written += file.write_i32(n_dims); for (int32_t i = 0; i < n_dims; ++i) { total_written += file.write_i32(i); }