mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-03 17:51:09 +01:00
convert : support safetensors format
This commit is contained in:
parent
f1cbfabd64
commit
6a419f4d19
10
convert.py
10
convert.py
@ -42,6 +42,7 @@ NDArray: TypeAlias = 'np.ndarray[Any, Any]'
|
|||||||
ARCH = gguf.MODEL_ARCH.LLAMA
|
ARCH = gguf.MODEL_ARCH.LLAMA
|
||||||
|
|
||||||
DEFAULT_CONCURRENCY = 8
|
DEFAULT_CONCURRENCY = 8
|
||||||
|
|
||||||
#
|
#
|
||||||
# data types
|
# data types
|
||||||
#
|
#
|
||||||
@ -235,6 +236,13 @@ class Params:
|
|||||||
raise Exception("failed to guess 'n_ctx'. This model is unknown or unsupported.\n"
|
raise Exception("failed to guess 'n_ctx'. This model is unknown or unsupported.\n"
|
||||||
"Suggestion: provide 'config.json' of the model in the same directory containing model files.")
|
"Suggestion: provide 'config.json' of the model in the same directory containing model files.")
|
||||||
|
|
||||||
|
n_experts = None
|
||||||
|
n_experts_used = None
|
||||||
|
|
||||||
|
if "num_local_experts" in config:
|
||||||
|
n_experts = config["num_local_experts"]
|
||||||
|
n_experts_used = config["num_experts_per_tok"]
|
||||||
|
|
||||||
return Params(
|
return Params(
|
||||||
n_vocab = config["vocab_size"],
|
n_vocab = config["vocab_size"],
|
||||||
n_embd = config["hidden_size"],
|
n_embd = config["hidden_size"],
|
||||||
@ -243,6 +251,8 @@ class Params:
|
|||||||
n_ff = config["intermediate_size"],
|
n_ff = config["intermediate_size"],
|
||||||
n_head = (n_head := config["num_attention_heads"]),
|
n_head = (n_head := config["num_attention_heads"]),
|
||||||
n_head_kv = config.get("num_key_value_heads", n_head),
|
n_head_kv = config.get("num_key_value_heads", n_head),
|
||||||
|
n_experts = n_experts,
|
||||||
|
n_experts_used = n_experts_used,
|
||||||
f_norm_eps = config["rms_norm_eps"],
|
f_norm_eps = config["rms_norm_eps"],
|
||||||
f_rope_freq_base = config.get("rope_theta"),
|
f_rope_freq_base = config.get("rope_theta"),
|
||||||
rope_scaling_type = rope_scaling_type,
|
rope_scaling_type = rope_scaling_type,
|
||||||
|
@ -151,6 +151,7 @@ class TensorNameMap:
|
|||||||
|
|
||||||
MODEL_TENSOR.FFN_GATE_INP: (
|
MODEL_TENSOR.FFN_GATE_INP: (
|
||||||
"layers.{bid}.feed_forward.gate", # mixtral
|
"layers.{bid}.feed_forward.gate", # mixtral
|
||||||
|
"model.layers.{bid}.block_sparse_moe.gate", # mixtral
|
||||||
),
|
),
|
||||||
|
|
||||||
# Feed-forward up
|
# Feed-forward up
|
||||||
@ -170,6 +171,7 @@ class TensorNameMap:
|
|||||||
|
|
||||||
MODEL_TENSOR.FFN_UP_EXP: (
|
MODEL_TENSOR.FFN_UP_EXP: (
|
||||||
"layers.{bid}.feed_forward.experts.{xid}.w3", # mixtral
|
"layers.{bid}.feed_forward.experts.{xid}.w3", # mixtral
|
||||||
|
"model.layers.{bid}.block_sparse_moe.experts.{xid}.w3", # mixtral
|
||||||
),
|
),
|
||||||
|
|
||||||
# Feed-forward gate
|
# Feed-forward gate
|
||||||
@ -181,6 +183,7 @@ class TensorNameMap:
|
|||||||
|
|
||||||
MODEL_TENSOR.FFN_GATE_EXP: (
|
MODEL_TENSOR.FFN_GATE_EXP: (
|
||||||
"layers.{bid}.feed_forward.experts.{xid}.w1", # mixtral
|
"layers.{bid}.feed_forward.experts.{xid}.w1", # mixtral
|
||||||
|
"model.layers.{bid}.block_sparse_moe.experts.{xid}.w1", # mixtral
|
||||||
),
|
),
|
||||||
|
|
||||||
# Feed-forward down
|
# Feed-forward down
|
||||||
@ -199,6 +202,7 @@ class TensorNameMap:
|
|||||||
|
|
||||||
MODEL_TENSOR.FFN_DOWN_EXP: (
|
MODEL_TENSOR.FFN_DOWN_EXP: (
|
||||||
"layers.{bid}.feed_forward.experts.{xid}.w2", # mixtral
|
"layers.{bid}.feed_forward.experts.{xid}.w2", # mixtral
|
||||||
|
"model.layers.{bid}.block_sparse_moe.experts.{xid}.w2", # mixtral
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.ATTN_Q_NORM: (
|
MODEL_TENSOR.ATTN_Q_NORM: (
|
||||||
|
Loading…
Reference in New Issue
Block a user