mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-13 05:42:22 +01:00
fix codestyle
This commit is contained in:
parent
3a0f8b0697
commit
97d22be58c
@ -1390,9 +1390,11 @@ class LlamaModel(Model):
|
||||
if len(experts) > 0:
|
||||
raise ValueError(f"Unprocessed experts: {experts}")
|
||||
|
||||
|
||||
@Model.register("BitnetForCausalLM")
|
||||
class BitnetModel(Model):
|
||||
model_arch = gguf.MODEL_ARCH.BITNET
|
||||
|
||||
def set_vocab(self):
|
||||
self._set_vocab_sentencepiece()
|
||||
|
||||
@ -1407,9 +1409,7 @@ class BitnetModel(Model):
|
||||
self.gguf_writer.add_head_count(self.hparams["num_attention_heads"])
|
||||
self.gguf_writer.add_head_count_kv(self.hparams["num_key_value_heads"])
|
||||
self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
|
||||
|
||||
self.gguf_writer.add_vocab_size(self.hparams["vocab_size"])
|
||||
|
||||
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
|
||||
self.gguf_writer.add_rope_scaling_factor(1.0)
|
||||
self.gguf_writer.add_rope_freq_base(self.hparams["rope_theta"])
|
||||
@ -1430,6 +1430,7 @@ class BitnetModel(Model):
|
||||
|
||||
return [(self.map_tensor_name(name), data_torch)]
|
||||
|
||||
|
||||
@Model.register("GrokForCausalLM")
|
||||
class GrokModel(Model):
|
||||
model_arch = gguf.MODEL_ARCH.GROK
|
||||
|
2
ggml.c
2
ggml.c
@ -12349,7 +12349,7 @@ static void ggml_compute_forward_mul_mat_one_chunk(
|
||||
// attempt to reduce false-sharing (does not seem to make a difference)
|
||||
// 16 * 2, accounting for mmla kernels
|
||||
float tmp[32];
|
||||
float * scale = (float * )((uint8_t*) (src0->data) + (ne00 * ne01 / 4));
|
||||
const float * scale = (float * )((uint8_t*) (src0->data) + (ne00 * ne01 / 4));
|
||||
const float * act_scales = (const float*) ((const char *) wdata + (ne11 * ne10));
|
||||
|
||||
for (int64_t iir1 = ir1_start; iir1 < ir1_end; iir1 += blck_1) {
|
||||
|
@ -15961,6 +15961,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
||||
if (params->output_tensor_type < GGML_TYPE_COUNT && strcmp(tensor->name, "output.weight") == 0) {
|
||||
new_type = params->output_tensor_type;
|
||||
}
|
||||
|
||||
// If we've decided to quantize to the same type the tensor is already
|
||||
// in then there's nothing to do.
|
||||
quantize = tensor->type != new_type;
|
||||
|
Loading…
x
Reference in New Issue
Block a user