diff --git a/ggml.c b/ggml.c index 1223afced..4d6389a56 100644 --- a/ggml.c +++ b/ggml.c @@ -920,7 +920,6 @@ void quantize_upgrade(enum ggml_type type, void* data, size_t * size, bool shuff block_q8_0_old *blk = (block_q8_0_old *)data; block_q8_0 *new_blk = (block_q8_0 *)data; block_q8_0 new_blk_buf; - *size = nb * sizeof(block_q8_0); for (size_t i = 0; i < nb ; i++) { diff --git a/llama.cpp b/llama.cpp index 5f4714be5..c1892037b 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2109,10 +2109,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s bool needShuffle = (model_loader->file_loaders.at(0)->file_version == LLAMA_FILE_VERSION_GGJT_V1); if (model_loader->file_loaders.at(0)->file_version < LLAMA_FILE_VERSION_GGJT_V3 && quantize) { - if ((quantized_type == tensor.type) && - (tensor.type == GGML_TYPE_Q4_0 || tensor.type == GGML_TYPE_Q4_1 - || tensor.type == GGML_TYPE_Q5_0 || tensor.type == GGML_TYPE_Q5_1 - || tensor.type == GGML_TYPE_Q8_0)) { + if ((quantized_type == tensor.type) && + (tensor.type == GGML_TYPE_Q4_0 || tensor.type == GGML_TYPE_Q4_1 || tensor.type == GGML_TYPE_Q5_0 || tensor.type == GGML_TYPE_Q5_1 || tensor.type == GGML_TYPE_Q8_0)) { // convet new_type = tensor.type; new_data = tensor.data;