mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 13:58:46 +01:00
convert : fix RWKV v6 model conversion (#10913)
* Enable --no-context-shift for llama-perplexity example Signed-off-by: Molly Sophia <mollysophia379@gmail.com> * RWKV 6: Fix error in ggml_cuda_op_bin_bcast Signed-off-by: Molly Sophia <mollysophia379@gmail.com> --------- Signed-off-by: Molly Sophia <mollysophia379@gmail.com>
This commit is contained in:
parent
d408bb9268
commit
0a11f8b7b5
@ -626,7 +626,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||||||
[](common_params & params) {
|
[](common_params & params) {
|
||||||
params.ctx_shift = false;
|
params.ctx_shift = false;
|
||||||
}
|
}
|
||||||
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_IMATRIX}).set_env("LLAMA_ARG_NO_CONTEXT_SHIFT"));
|
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_PERPLEXITY}).set_env("LLAMA_ARG_NO_CONTEXT_SHIFT"));
|
||||||
add_opt(common_arg(
|
add_opt(common_arg(
|
||||||
{"--chunks"}, "N",
|
{"--chunks"}, "N",
|
||||||
string_format("max number of chunks to process (default: %d, -1 = all)", params.n_chunks),
|
string_format("max number of chunks to process (default: %d, -1 = all)", params.n_chunks),
|
||||||
|
@ -3065,6 +3065,9 @@ class Rwkv6Model(Model):
|
|||||||
if new_name.endswith("time_mix_w2.weight"):
|
if new_name.endswith("time_mix_w2.weight"):
|
||||||
data_torch = data_torch.permute(0, 2, 1)
|
data_torch = data_torch.permute(0, 2, 1)
|
||||||
|
|
||||||
|
if new_name.endswith("time_mix_decay.weight") or "lerp" in new_name:
|
||||||
|
data_torch = data_torch.squeeze()
|
||||||
|
|
||||||
rescale_every_n_layers = self.hparams["rescale_every"]
|
rescale_every_n_layers = self.hparams["rescale_every"]
|
||||||
if rescale_every_n_layers > 0:
|
if rescale_every_n_layers > 0:
|
||||||
if new_name.endswith("time_mix_output.weight") or new_name.endswith("channel_mix_value.weight"):
|
if new_name.endswith("time_mix_output.weight") or new_name.endswith("channel_mix_value.weight"):
|
||||||
|
Loading…
Reference in New Issue
Block a user