fix codestyle

This commit is contained in:
Eddie-Wang1120 2024-06-09 21:22:50 +08:00
parent 3a0f8b0697
commit 97d22be58c
3 changed files with 6 additions and 4 deletions

View File

@ -1390,9 +1390,11 @@ class LlamaModel(Model):
if len(experts) > 0: if len(experts) > 0:
raise ValueError(f"Unprocessed experts: {experts}") raise ValueError(f"Unprocessed experts: {experts}")
@Model.register("BitnetForCausalLM") @Model.register("BitnetForCausalLM")
class BitnetModel(Model): class BitnetModel(Model):
model_arch = gguf.MODEL_ARCH.BITNET model_arch = gguf.MODEL_ARCH.BITNET
def set_vocab(self): def set_vocab(self):
self._set_vocab_sentencepiece() self._set_vocab_sentencepiece()
@ -1407,9 +1409,7 @@ class BitnetModel(Model):
self.gguf_writer.add_head_count(self.hparams["num_attention_heads"]) self.gguf_writer.add_head_count(self.hparams["num_attention_heads"])
self.gguf_writer.add_head_count_kv(self.hparams["num_key_value_heads"]) self.gguf_writer.add_head_count_kv(self.hparams["num_key_value_heads"])
self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"]) self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
self.gguf_writer.add_vocab_size(self.hparams["vocab_size"]) self.gguf_writer.add_vocab_size(self.hparams["vocab_size"])
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR) self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
self.gguf_writer.add_rope_scaling_factor(1.0) self.gguf_writer.add_rope_scaling_factor(1.0)
self.gguf_writer.add_rope_freq_base(self.hparams["rope_theta"]) self.gguf_writer.add_rope_freq_base(self.hparams["rope_theta"])
@ -1430,6 +1430,7 @@ class BitnetModel(Model):
return [(self.map_tensor_name(name), data_torch)] return [(self.map_tensor_name(name), data_torch)]
@Model.register("GrokForCausalLM") @Model.register("GrokForCausalLM")
class GrokModel(Model): class GrokModel(Model):
model_arch = gguf.MODEL_ARCH.GROK model_arch = gguf.MODEL_ARCH.GROK

2
ggml.c
View File

@ -12349,7 +12349,7 @@ static void ggml_compute_forward_mul_mat_one_chunk(
// attempt to reduce false-sharing (does not seem to make a difference) // attempt to reduce false-sharing (does not seem to make a difference)
// 16 * 2, accounting for mmla kernels // 16 * 2, accounting for mmla kernels
float tmp[32]; float tmp[32];
float * scale = (float * )((uint8_t*) (src0->data) + (ne00 * ne01 / 4)); const float * scale = (float * )((uint8_t*) (src0->data) + (ne00 * ne01 / 4));
const float * act_scales = (const float*) ((const char *) wdata + (ne11 * ne10)); const float * act_scales = (const float*) ((const char *) wdata + (ne11 * ne10));
for (int64_t iir1 = ir1_start; iir1 < ir1_end; iir1 += blck_1) { for (int64_t iir1 = ir1_start; iir1 < ir1_end; iir1 += blck_1) {

View File

@ -15961,6 +15961,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
if (params->output_tensor_type < GGML_TYPE_COUNT && strcmp(tensor->name, "output.weight") == 0) { if (params->output_tensor_type < GGML_TYPE_COUNT && strcmp(tensor->name, "output.weight") == 0) {
new_type = params->output_tensor_type; new_type = params->output_tensor_type;
} }
// If we've decided to quantize to the same type the tensor is already // If we've decided to quantize to the same type the tensor is already
// in then there's nothing to do. // in then there's nothing to do.
quantize = tensor->type != new_type; quantize = tensor->type != new_type;