diff --git a/Makefile b/Makefile
index f5922c95d..304b3035d 100644
--- a/Makefile
+++ b/Makefile
@@ -393,7 +393,7 @@ $(LIB_PRE)embdinput$(DSO_EXT): examples/embd-input/embd-input.h examples/embd-in
 embd-input-test: $(LIB_PRE)embdinput$(DSO_EXT) examples/embd-input/embd-input-test.cpp build-info.h ggml.o llama.o common.o $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %$(DSO_EXT),$(filter-out %.h,$(filter-out %.hpp,$^))) -o $@ $(LDFLAGS) -L. -lembdinput
 
-gguf: examples/gguf/gguf.cpp                                  build-info.h ggml.o $(OBJS)
+gguf: examples/gguf/gguf.cpp                                  build-info.h ggml.o gguf-llama.o $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 
 gguf-llama-simple: examples/gguf/gguf-llama-simple.cpp                            build-info.h ggml.o gguf-llama.o common.o $(OBJS)
diff --git a/examples/gguf/gguf.cpp b/examples/gguf/gguf.cpp
index a1b8edc71..6f454a204 100644
--- a/examples/gguf/gguf.cpp
+++ b/examples/gguf/gguf.cpp
@@ -1,5 +1,6 @@
 #include "ggml.h"
 #include "gguf-util.h"
+#include "gguf-llama.h"
 
 #include <cstdio>
 #include <cinttypes>
@@ -7,14 +8,14 @@
 #include <sstream>
 #include <fstream>
 #include <vector>
-
+/*
 template<typename T>
 static std::string to_string(const T & val) {
     std::stringstream ss;
     ss << val;
     return ss.str();
 }
-
+*/
 void gguf_ex_write_str(std::ofstream & fout, const std::string & val) {
     const int32_t n = val.size();
     fout.write((const char *) &n, sizeof(n));
@@ -414,7 +415,7 @@ int main(int argc, char ** argv) {
     const std::string fname(argv[1]);
     const std::string mode (argv[2]);
 
-    GGML_ASSERT((mode == "r" || mode == "w") && "mode must be r or w");
+    GGML_ASSERT((mode == "r" || mode == "w" || mode == "q") && "mode must be r, w or q");
 
     if (mode == "w") {
         GGML_ASSERT(gguf_ex_write(fname) && "failed to write gguf file");
@@ -422,6 +423,9 @@ int main(int argc, char ** argv) {
         GGML_ASSERT(gguf_ex_read_0(fname) && "failed to read gguf file");
         GGML_ASSERT(gguf_ex_read_1(fname) && "failed to read gguf file");
         GGML_ASSERT(gguf_ex_read_2(fname) && "failed to read gguf file");
+    } else if (mode == "q") {
+        llama_model_quantize_params params = llama_model_quantize_default_params();
+        llama_model_quantize(fname.c_str(), "quant.gguf", &params);
     }
 
     return 0;
diff --git a/gguf-llama.cpp b/gguf-llama.cpp
index defe26fe0..f1755fef5 100644
--- a/gguf-llama.cpp
+++ b/gguf-llama.cpp
@@ -738,15 +738,19 @@ struct gguf_file_saver {
         info_offset  = file.tell();
         size_t count = gguf_get_data_offset(fl->gguf_ctx) - info_offset;
         file.write_zeros(count);
+        printf("info_offset = %zu\n", info_offset);
+        file.seek(info_offset, SEEK_SET);
+        GGML_ASSERT(info_offset == file.tell());
     }
 
     size_t write_tensor_info(llama_load_tensor & tensor) {
         size_t total_written = 0;
-        file.seek(0, info_offset);
+        file.seek(info_offset, SEEK_SET);
+        GGML_ASSERT(info_offset == file.tell());
         total_written += file.write_str(tensor.name);
 
         int32_t n_dims = tensor.ne.size();
-        file.write_i32(n_dims);
+        total_written += file.write_i32(n_dims);
         for (int32_t i = 0; i < n_dims; ++i) {
             total_written += file.write_i32(i);
         }