mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-13 05:42:22 +01:00
fix codestyle
This commit is contained in:
parent
3a0f8b0697
commit
97d22be58c
@ -1390,9 +1390,11 @@ class LlamaModel(Model):
|
|||||||
if len(experts) > 0:
|
if len(experts) > 0:
|
||||||
raise ValueError(f"Unprocessed experts: {experts}")
|
raise ValueError(f"Unprocessed experts: {experts}")
|
||||||
|
|
||||||
|
|
||||||
@Model.register("BitnetForCausalLM")
|
@Model.register("BitnetForCausalLM")
|
||||||
class BitnetModel(Model):
|
class BitnetModel(Model):
|
||||||
model_arch = gguf.MODEL_ARCH.BITNET
|
model_arch = gguf.MODEL_ARCH.BITNET
|
||||||
|
|
||||||
def set_vocab(self):
|
def set_vocab(self):
|
||||||
self._set_vocab_sentencepiece()
|
self._set_vocab_sentencepiece()
|
||||||
|
|
||||||
@ -1407,9 +1409,7 @@ class BitnetModel(Model):
|
|||||||
self.gguf_writer.add_head_count(self.hparams["num_attention_heads"])
|
self.gguf_writer.add_head_count(self.hparams["num_attention_heads"])
|
||||||
self.gguf_writer.add_head_count_kv(self.hparams["num_key_value_heads"])
|
self.gguf_writer.add_head_count_kv(self.hparams["num_key_value_heads"])
|
||||||
self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
|
self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
|
||||||
|
|
||||||
self.gguf_writer.add_vocab_size(self.hparams["vocab_size"])
|
self.gguf_writer.add_vocab_size(self.hparams["vocab_size"])
|
||||||
|
|
||||||
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
|
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
|
||||||
self.gguf_writer.add_rope_scaling_factor(1.0)
|
self.gguf_writer.add_rope_scaling_factor(1.0)
|
||||||
self.gguf_writer.add_rope_freq_base(self.hparams["rope_theta"])
|
self.gguf_writer.add_rope_freq_base(self.hparams["rope_theta"])
|
||||||
@ -1430,6 +1430,7 @@ class BitnetModel(Model):
|
|||||||
|
|
||||||
return [(self.map_tensor_name(name), data_torch)]
|
return [(self.map_tensor_name(name), data_torch)]
|
||||||
|
|
||||||
|
|
||||||
@Model.register("GrokForCausalLM")
|
@Model.register("GrokForCausalLM")
|
||||||
class GrokModel(Model):
|
class GrokModel(Model):
|
||||||
model_arch = gguf.MODEL_ARCH.GROK
|
model_arch = gguf.MODEL_ARCH.GROK
|
||||||
|
2
ggml.c
2
ggml.c
@ -12349,7 +12349,7 @@ static void ggml_compute_forward_mul_mat_one_chunk(
|
|||||||
// attempt to reduce false-sharing (does not seem to make a difference)
|
// attempt to reduce false-sharing (does not seem to make a difference)
|
||||||
// 16 * 2, accounting for mmla kernels
|
// 16 * 2, accounting for mmla kernels
|
||||||
float tmp[32];
|
float tmp[32];
|
||||||
float * scale = (float * )((uint8_t*) (src0->data) + (ne00 * ne01 / 4));
|
const float * scale = (float * )((uint8_t*) (src0->data) + (ne00 * ne01 / 4));
|
||||||
const float * act_scales = (const float*) ((const char *) wdata + (ne11 * ne10));
|
const float * act_scales = (const float*) ((const char *) wdata + (ne11 * ne10));
|
||||||
|
|
||||||
for (int64_t iir1 = ir1_start; iir1 < ir1_end; iir1 += blck_1) {
|
for (int64_t iir1 = ir1_start; iir1 < ir1_end; iir1 += blck_1) {
|
||||||
|
@ -15961,6 +15961,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
|||||||
if (params->output_tensor_type < GGML_TYPE_COUNT && strcmp(tensor->name, "output.weight") == 0) {
|
if (params->output_tensor_type < GGML_TYPE_COUNT && strcmp(tensor->name, "output.weight") == 0) {
|
||||||
new_type = params->output_tensor_type;
|
new_type = params->output_tensor_type;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we've decided to quantize to the same type the tensor is already
|
// If we've decided to quantize to the same type the tensor is already
|
||||||
// in then there's nothing to do.
|
// in then there's nothing to do.
|
||||||
quantize = tensor->type != new_type;
|
quantize = tensor->type != new_type;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user