fix codestyle

2025-01-13 05:42:22 +01:00 · 2024-06-09 21:22:50 +08:00 · 2024-06-09 21:22:50 +08:00 · 97d22be58c
commit 97d22be58c
parent 3a0f8b0697
3 changed files with 6 additions and 4 deletions
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@ -1390,9 +1390,11 @@ class LlamaModel(Model):
            if len(experts) > 0:
                raise ValueError(f"Unprocessed experts: {experts}")

+
@Model.register("BitnetForCausalLM")
 class BitnetModel(Model):
    model_arch = gguf.MODEL_ARCH.BITNET
+
    def set_vocab(self):
        self._set_vocab_sentencepiece()

@ -1407,9 +1409,7 @@ class BitnetModel(Model):
        self.gguf_writer.add_head_count(self.hparams["num_attention_heads"])
        self.gguf_writer.add_head_count_kv(self.hparams["num_key_value_heads"])
        self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
-
        self.gguf_writer.add_vocab_size(self.hparams["vocab_size"])
-
        self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
        self.gguf_writer.add_rope_scaling_factor(1.0)
        self.gguf_writer.add_rope_freq_base(self.hparams["rope_theta"])
@ -1430,6 +1430,7 @@ class BitnetModel(Model):

        return [(self.map_tensor_name(name), data_torch)]

+
@Model.register("GrokForCausalLM")
 class GrokModel(Model):
    model_arch = gguf.MODEL_ARCH.GROK
--- a/ggml.c
+++ b/ggml.c
@ -12349,7 +12349,7 @@ static void ggml_compute_forward_mul_mat_one_chunk(
    // attempt to reduce false-sharing (does not seem to make a difference)
    // 16 * 2, accounting for mmla kernels
    float tmp[32];
-    float * scale = (float * )((uint8_t*) (src0->data) + (ne00 * ne01 / 4));
+    const float * scale      = (float * )((uint8_t*) (src0->data) + (ne00 * ne01 / 4));
    const float * act_scales = (const float*) ((const char *) wdata + (ne11 * ne10));

    for (int64_t iir1 = ir1_start; iir1 < ir1_end; iir1 += blck_1) {
--- a/llama.cpp
+++ b/llama.cpp
@ -15961,6 +15961,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
            if (params->output_tensor_type < GGML_TYPE_COUNT && strcmp(tensor->name, "output.weight") == 0) {
                new_type = params->output_tensor_type;
            }
+
            // If we've decided to quantize to the same type the tensor is already
            // in then there's nothing to do.
            quantize = tensor->type != new_type;