diff --git a/ggml.c b/ggml.c
index 77a3d89f7..f4c34f5d1 100644
--- a/ggml.c
+++ b/ggml.c
@@ -813,6 +813,34 @@ typedef struct {
 } block_q8_1;
 static_assert(sizeof(block_q8_1) == 2*sizeof(float) + QK8_1, "wrong q8_1 block size/padding");
 
+void quantize_upgrade(enum ggml_type type, void* data, size_t size) {
+
+    if (type == GGML_TYPE_Q4_0) {
+
+        int qk = ggml_blck_size(type);
+        const size_t nb = size / sizeof(block_q4_0);
+        block_q4_0 *blk = (block_q4_0 *)data;
+        block_q4_0 new_blk;
+
+        for (size_t i = 0; i < nb ; i++) {
+            for (size_t j = 0; j < qk/4; j++)
+            {
+                // old: d0, d1, d2, d3, d4, ....... d_half, d_half1
+                // new: d0, d_half, d1, d_half1
+                uint8_t d1;
+                uint8_t d2;
+
+                d1 = blk[i].qs[0 + j];
+                d2 = blk[i].qs[qk/4 + j];
+
+                new_blk.qs[0 + j * 2] = (d1 & 0x0f) | ((d2 & 0x0f) << 4);
+                new_blk.qs[1 + j * 2] = (d1 >> 4) | (d2 & 0xf0);
+            }
+            memcpy(blk[i].qs, new_blk.qs, sizeof(new_blk.qs));
+        }
+    }
+}
+
 // reference implementation for deterministic creation of model files
 static void quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * restrict y, int k) {
     static const int qk = QK4_0;
diff --git a/ggml.h b/ggml.h
index 51a616c50..787f927cd 100644
--- a/ggml.h
+++ b/ggml.h
@@ -1086,6 +1086,7 @@ extern "C" {
 
     GGML_API size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist);
 
+    GGML_API void quantize_upgrade(enum ggml_type type, void* data, size_t size);
     //
     // system info
     //
diff --git a/llama.cpp b/llama.cpp
index 4cbc8d6b6..fb231d56d 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -2074,7 +2074,26 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
         size_t new_size;
         llama_buffer work;
 
-        if (!quantize) {
+        if (model_loader->file_loaders.at(0)->file_version == LLAMA_FILE_VERSION_GGJT_V1 && quantize) {
+            if (tensor.type == GGML_TYPE_Q4_0 && quantized_type == GGML_TYPE_Q4_0) {
+                // convet
+                new_type = tensor.type;
+                new_data = tensor.data;
+                new_size = tensor.size;
+                quantize_upgrade(new_type, new_data, new_size);
+                printf("Upgrade - size = %8.3f MB\n", tensor.size/1024.0/1024.0);
+            }
+            else if (tensor.type == GGML_TYPE_Q4_1 && quantized_type == GGML_TYPE_Q4_1) {
+                new_type = tensor.type;
+                new_data = tensor.data;
+                new_size = tensor.size;
+                quantize_upgrade(new_type, new_data, new_size);
+                printf("Upgrade - size = %8.3f MB\n", tensor.size/1024.0/1024.0);
+            }
+            else {
+                throw format("type %s unsupported for quantization format upgrade", ggml_type_name(tensor.type));
+            }
+        } else if (!quantize) {
             new_type = tensor.type;
             new_data = tensor.data;
             new_size = tensor.size;