convert : fix RWKV v6 model conversion (#10913)

MollySophia · web-flow · commit 0a11f8b7b5c3 · 2024-12-20T11:44:58.000+02:00
* Enable --no-context-shift for llama-perplexity example

Signed-off-by: Molly Sophia &lt;mollysophia379@gmail.com&gt;

* RWKV 6: Fix error in ggml_cuda_op_bin_bcast

Signed-off-by: Molly Sophia &lt;mollysophia379@gmail.com&gt;

---------

Signed-off-by: Molly Sophia &lt;mollysophia379@gmail.com&gt;
diff --git a/common/arg.cpp b/common/arg.cpp
@@ -626,7 +626,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
         [](common_params & params) {
             params.ctx_shift = false;
         }
-    ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_IMATRIX}).set_env("LLAMA_ARG_NO_CONTEXT_SHIFT"));
+    ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_PERPLEXITY}).set_env("LLAMA_ARG_NO_CONTEXT_SHIFT"));
     add_opt(common_arg(
         {"--chunks"}, "N",
         string_format("max number of chunks to process (default: %d, -1 = all)", params.n_chunks),
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
@@ -3065,6 +3065,9 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
         if new_name.endswith("time_mix_w2.weight"):
             data_torch = data_torch.permute(0, 2, 1)
 
+        if new_name.endswith("time_mix_decay.weight") or "lerp" in new_name:
+            data_torch = data_torch.squeeze()
+
         rescale_every_n_layers = self.hparams["rescale_every"]
         if rescale_every_n_layers > 0:
             if new_name.endswith("time_mix_output.weight") or new_name.endswith("channel_mix_value.weight"):

Original file line number	Diff line number	Diff line change
`@@ -626,7 +626,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex`
`626`	`626`	`[](common_params & params) {`
`627`	`627`	`params.ctx_shift = false;`
`628`	`628`	`}`
`629`		`- ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_IMATRIX}).set_env("LLAMA_ARG_NO_CONTEXT_SHIFT"));`
	`629`	`+ ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_PERPLEXITY}).set_env("LLAMA_ARG_NO_CONTEXT_SHIFT"));`
`630`	`630`	`add_opt(common_arg(`
`631`	`631`	`{"--chunks"}, "N",`
`632`	`632`	`string_format("max number of chunks to process (default: %d, -1 = all)", params.n_chunks),`