mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 05:17:21 +01:00
gguf : add rope_freq_base parameter for CodeLlama (#2769)
This commit is contained in:
parent
01f2224682
commit
0d3094f0c7
43
convert.py
43
convert.py
@ -104,6 +104,8 @@ class Params:
|
|||||||
n_head_kv: int
|
n_head_kv: int
|
||||||
f_norm_eps: float
|
f_norm_eps: float
|
||||||
|
|
||||||
|
f_rope_freq_base: Optional[float] = None
|
||||||
|
|
||||||
ftype: Optional[GGMLFileType] = None
|
ftype: Optional[GGMLFileType] = None
|
||||||
|
|
||||||
# path to the directory containing the model files
|
# path to the directory containing the model files
|
||||||
@ -194,15 +196,16 @@ class Params:
|
|||||||
def loadOriginalParamsJson(model: 'LazyModel', config_path: 'Path') -> 'Params':
|
def loadOriginalParamsJson(model: 'LazyModel', config_path: 'Path') -> 'Params':
|
||||||
config = json.load(open(config_path))
|
config = json.load(open(config_path))
|
||||||
|
|
||||||
n_vocab = config["vocab_size"] if "vocab_size" in config else -1
|
n_vocab = config["vocab_size"] if "vocab_size" in config else -1
|
||||||
n_embd = config["dim"]
|
n_embd = config["dim"]
|
||||||
n_layer = config["n_layers"]
|
n_layer = config["n_layers"]
|
||||||
n_mult = config["multiple_of"]
|
n_mult = config["multiple_of"]
|
||||||
n_ctx = 2048 if config["norm_eps"] == 1e-06 else 4096 # hack to determine LLaMA v1 vs v2
|
n_ctx = 2048 if config["norm_eps"] == 1e-06 else 4096 # hack to determine LLaMA v1 vs v2
|
||||||
n_ff = -1
|
n_ff = -1
|
||||||
n_head = config["n_heads"]
|
n_head = config["n_heads"]
|
||||||
n_head_kv = config["n_kv_heads"] if "n_kv_heads" in config else n_head
|
n_head_kv = config["n_kv_heads"] if "n_kv_heads" in config else n_head
|
||||||
f_norm_eps = config["norm_eps"]
|
f_norm_eps = config["norm_eps"]
|
||||||
|
f_rope_freq_base = config["rope_theta"] if "rope_theta" in config else None
|
||||||
|
|
||||||
if n_vocab == -1:
|
if n_vocab == -1:
|
||||||
n_vocab = model["tok_embeddings.weight"].shape[0]
|
n_vocab = model["tok_embeddings.weight"].shape[0]
|
||||||
@ -211,15 +214,16 @@ class Params:
|
|||||||
n_ff = model["layers.0.feed_forward.w1.weight"].shape[0]
|
n_ff = model["layers.0.feed_forward.w1.weight"].shape[0]
|
||||||
|
|
||||||
return Params(
|
return Params(
|
||||||
n_vocab = n_vocab,
|
n_vocab = n_vocab,
|
||||||
n_embd = n_embd,
|
n_embd = n_embd,
|
||||||
n_mult = n_mult,
|
n_mult = n_mult,
|
||||||
n_layer = n_layer,
|
n_layer = n_layer,
|
||||||
n_ctx = n_ctx,
|
n_ctx = n_ctx,
|
||||||
n_ff = n_ff,
|
n_ff = n_ff,
|
||||||
n_head = n_head,
|
n_head = n_head,
|
||||||
n_head_kv = n_head_kv,
|
n_head_kv = n_head_kv,
|
||||||
f_norm_eps = f_norm_eps,
|
f_norm_eps = f_norm_eps,
|
||||||
|
f_rope_freq_base = f_rope_freq_base,
|
||||||
)
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -754,6 +758,9 @@ class OutputFile:
|
|||||||
self.gguf.add_head_count_kv (params.n_head_kv)
|
self.gguf.add_head_count_kv (params.n_head_kv)
|
||||||
self.gguf.add_layer_norm_rms_eps (params.f_norm_eps)
|
self.gguf.add_layer_norm_rms_eps (params.f_norm_eps)
|
||||||
|
|
||||||
|
if params.f_rope_freq_base:
|
||||||
|
self.gguf.add_rope_freq_base(params.f_rope_freq_base)
|
||||||
|
|
||||||
if params.ftype:
|
if params.ftype:
|
||||||
self.gguf.add_file_type(params.ftype)
|
self.gguf.add_file_type(params.ftype)
|
||||||
|
|
||||||
|
6
gguf.py
6
gguf.py
@ -47,6 +47,7 @@ KEY_ATTENTION_LAYERNORM_RMS_EPS = "{arch}.attention.layer_norm_rms_epsilon"
|
|||||||
|
|
||||||
# RoPE
|
# RoPE
|
||||||
KEY_ROPE_DIMENSION_COUNT = "{arch}.rope.dimension_count"
|
KEY_ROPE_DIMENSION_COUNT = "{arch}.rope.dimension_count"
|
||||||
|
KEY_ROPE_FREQ_BASE = "{arch}.rope.freq_base"
|
||||||
KEY_ROPE_SCALE_LINEAR = "{arch}.rope.scale_linear"
|
KEY_ROPE_SCALE_LINEAR = "{arch}.rope.scale_linear"
|
||||||
|
|
||||||
# tokenization
|
# tokenization
|
||||||
@ -663,7 +664,10 @@ class GGUFWriter:
|
|||||||
self.add_uint32(
|
self.add_uint32(
|
||||||
KEY_ROPE_DIMENSION_COUNT.format(arch=self.arch), count)
|
KEY_ROPE_DIMENSION_COUNT.format(arch=self.arch), count)
|
||||||
|
|
||||||
def add_rope_scale_linear(self, value: float):
|
def add_rope_freq_base(self, value: float):
|
||||||
|
self.add_float32(KEY_ROPE_FREQ_BASE.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
def add_rope_scale_linear(self, value: float):
|
||||||
self.add_float32(KEY_ROPE_SCALE_LINEAR.format(arch=self.arch), value)
|
self.add_float32(KEY_ROPE_SCALE_LINEAR.format(arch=self.arch), value)
|
||||||
|
|
||||||
def add_tokenizer_model(self, model: str):
|
def add_tokenizer_model(self, model: str):
|
||||||
|
20
llama.cpp
20
llama.cpp
@ -195,6 +195,7 @@ enum llm_kv {
|
|||||||
LLM_KV_ATTENTION_LAYERNORM_RMS_EPS,
|
LLM_KV_ATTENTION_LAYERNORM_RMS_EPS,
|
||||||
|
|
||||||
LLM_KV_ROPE_DIMENSION_COUNT,
|
LLM_KV_ROPE_DIMENSION_COUNT,
|
||||||
|
LLM_KV_ROPE_FREQ_BASE,
|
||||||
LLM_KV_ROPE_SCALE_LINEAR,
|
LLM_KV_ROPE_SCALE_LINEAR,
|
||||||
|
|
||||||
LLM_KV_TOKENIZER_MODEL,
|
LLM_KV_TOKENIZER_MODEL,
|
||||||
@ -238,6 +239,7 @@ static std::map<llm_kv, std::string> LLM_KV_NAMES = {
|
|||||||
{ LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, "%s.attention.layer_norm_rms_epsilon" },
|
{ LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, "%s.attention.layer_norm_rms_epsilon" },
|
||||||
|
|
||||||
{ LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
|
{ LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
|
||||||
|
{ LLM_KV_ROPE_FREQ_BASE, "%s.rope.freq_base" },
|
||||||
{ LLM_KV_ROPE_SCALE_LINEAR, "%s.rope.scale_linear" },
|
{ LLM_KV_ROPE_SCALE_LINEAR, "%s.rope.scale_linear" },
|
||||||
|
|
||||||
{ LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" },
|
{ LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" },
|
||||||
@ -1561,12 +1563,26 @@ static void llm_load_hparams(
|
|||||||
hparams.n_head_kv = hparams.n_head;
|
hparams.n_head_kv = hparams.n_head;
|
||||||
GGUF_GET_KEY(ctx, hparams.n_head_kv, gguf_get_val_u32, GGUF_TYPE_UINT32, false, kv(LLM_KV_ATTENTION_HEAD_COUNT_KV));
|
GGUF_GET_KEY(ctx, hparams.n_head_kv, gguf_get_val_u32, GGUF_TYPE_UINT32, false, kv(LLM_KV_ATTENTION_HEAD_COUNT_KV));
|
||||||
|
|
||||||
// TODO: manually setting rope scale should override this
|
// TODO: manually setting rope freq base and scale should override this
|
||||||
|
// FIXME: partial fix when the param specified is not the default value, but
|
||||||
|
// will not work for overriding the model value to the params default
|
||||||
|
|
||||||
|
llama_context_params defaults = llama_context_default_params();
|
||||||
|
|
||||||
|
// rope_freq_base
|
||||||
|
{
|
||||||
|
float ropebase = 10000.0f;
|
||||||
|
GGUF_GET_KEY(ctx, ropebase, gguf_get_val_f32, GGUF_TYPE_FLOAT32, false, kv(LLM_KV_ROPE_FREQ_BASE));
|
||||||
|
if (ropebase != 10000.0f && rope_freq_base == defaults.rope_freq_base) {
|
||||||
|
rope_freq_base = ropebase;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// rope_freq_scale (inverse of the kv) is optional
|
// rope_freq_scale (inverse of the kv) is optional
|
||||||
{
|
{
|
||||||
float ropescale = 1.0f;
|
float ropescale = 1.0f;
|
||||||
GGUF_GET_KEY(ctx, ropescale, gguf_get_val_f32, GGUF_TYPE_FLOAT32, false, kv(LLM_KV_ROPE_SCALE_LINEAR));
|
GGUF_GET_KEY(ctx, ropescale, gguf_get_val_f32, GGUF_TYPE_FLOAT32, false, kv(LLM_KV_ROPE_SCALE_LINEAR));
|
||||||
if (ropescale != 1.0f) {
|
if (ropescale != 1.0f && rope_freq_scale == defaults.rope_freq_scale) {
|
||||||
rope_freq_scale = 1.0f/ropescale;
|
rope_freq_scale = 1.0f/ropescale;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user