diff --git a/gguf-llama.cpp b/gguf-llama.cpp
index 0c4095714..9c0b5651a 100644
--- a/gguf-llama.cpp
+++ b/gguf-llama.cpp
@@ -626,20 +626,32 @@ struct gguf_file_saver {
         : file(fname, "wb"), any_file_loader(any_file_loader) {
         fprintf(stderr, "llama.cpp: saving model to %s\n", fname);
         write_magic();
+        write_version();
         write_hparams(new_ftype);
         write_vocab();
     }
+
     void write_magic() {
+        const int32_t magic = GGUF_MAGIC;
+        file.write_i32(magic);
         }
+
+        void write_version() {
+            const int32_t version = GGUF_VERSION;
+            file.write_i32(version);
+        }
+
     void write_hparams(enum llama_ftype new_ftype) {
         const llama_hparams & hparams = any_file_loader->hparams;
         GGML_UNUSED(hparams);
         GGML_UNUSED(new_ftype);
     }
+
     void write_vocab() {
         uint32_t n_vocab = any_file_loader->hparams.n_vocab;
         GGML_UNUSED(n_vocab);
     }
+    
     void write_tensor(llama_load_tensor & tensor, enum ggml_type new_type, const void * new_data, size_t new_size) {
         switch (new_type) {
             case GGML_TYPE_F32:
diff --git a/gguf-util.h b/gguf-util.h
index 74d6e61f7..94ea9006a 100644
--- a/gguf-util.h
+++ b/gguf-util.h
@@ -15,6 +15,7 @@
 #include <climits>
 
 #include <string>
+#include <sstream>
 #include <vector>
 #include <stdexcept>
 
@@ -61,6 +62,14 @@ static std::string format(const char * fmt, ...) {
     return std::string(buf.data(), size);
 }
 
+
+template<typename T>
+static std::string to_string(const T & val) {
+    std::stringstream ss;
+    ss << val;
+    return ss.str();
+}
+
 // TODO: can we merge this one and gguf_context?
 struct gguf_file {
     // use FILE * so we don't have to re-open the file to mmap
@@ -95,6 +104,42 @@ struct gguf_file {
 #endif
         GGML_ASSERT(ret == 0); // same
     }
+
+    
+    void write_str(const std::string & val) {
+        const int32_t n = val.size();
+        fwrite((const char *) &n, sizeof(n), 1, fp);
+        fwrite(val.c_str(), n, 1, fp);
+    }
+
+    void write_i32(int32_t val) {
+        fwrite((const char *) &val, sizeof(val), 1, fp);
+    }
+
+    void write_u64(size_t val) {
+        fwrite((const char *) &val, sizeof(val), 1, fp);
+    }
+
+    template<typename T>
+    void write_val(const std::string & key, enum gguf_type type, const T & val) {
+        write_str(key);
+        fwrite((const char *) &type, sizeof(type), 1, fp);
+        fwrite((const char *) &val, sizeof(val), 1, fp);
+    }
+
+    template<typename T>
+    void write_arr(const std::string & key, enum gguf_type type, const std::vector<T> & val) {
+        write_str(key);
+        {
+            const enum gguf_type tarr = GGUF_TYPE_ARRAY;
+            fwrite((const char *) &tarr, sizeof(tarr), 1, fp);
+        }
+
+        const int32_t n = val.size();
+        fwrite((const char *) &type, sizeof(type), 1, fp);
+        fwrite((const char *) &n, sizeof(n), 1, fp);
+        fwrite(val.data(), sizeof(T), n, fp);
+    }
 };
 
 #if defined(_WIN32)