Skip to content

Commit 0a11f8b

Browse files
authored
convert : fix RWKV v6 model conversion (#10913)
* Enable --no-context-shift for llama-perplexity example Signed-off-by: Molly Sophia <[email protected]> * RWKV 6: Fix error in ggml_cuda_op_bin_bcast Signed-off-by: Molly Sophia <[email protected]> --------- Signed-off-by: Molly Sophia <[email protected]>
1 parent d408bb9 commit 0a11f8b

File tree

2 files changed

+4
-1
lines changed

2 files changed

+4
-1
lines changed

common/arg.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -626,7 +626,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
626626
[](common_params & params) {
627627
params.ctx_shift = false;
628628
}
629-
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_IMATRIX}).set_env("LLAMA_ARG_NO_CONTEXT_SHIFT"));
629+
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_PERPLEXITY}).set_env("LLAMA_ARG_NO_CONTEXT_SHIFT"));
630630
add_opt(common_arg(
631631
{"--chunks"}, "N",
632632
string_format("max number of chunks to process (default: %d, -1 = all)", params.n_chunks),

convert_hf_to_gguf.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3065,6 +3065,9 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
30653065
if new_name.endswith("time_mix_w2.weight"):
30663066
data_torch = data_torch.permute(0, 2, 1)
30673067

3068+
if new_name.endswith("time_mix_decay.weight") or "lerp" in new_name:
3069+
data_torch = data_torch.squeeze()
3070+
30683071
rescale_every_n_layers = self.hparams["rescale_every"]
30693072
if rescale_every_n_layers > 0:
30703073
if new_name.endswith("time_mix_output.weight") or new_name.endswith("channel_mix_value.weight"):

0 commit comments

Comments
 (0)