From 9d3f44dad426acc26d35e3b6cf1462d3a3f43113 Mon Sep 17 00:00:00 2001 From: Francis Couture-Harpin Date: Sun, 1 Sep 2024 21:46:27 -0400 Subject: [PATCH] convert_hf : fix Jamba conversion --- convert_hf_to_gguf.py | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 00059bd01..e9bb4b20b 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -2910,7 +2910,6 @@ class JambaModel(Model): n_kv_head if (i - attn_offset) % attn_period == 0 else 0 for i in range(attn_offset, self.block_count) ] - self.gguf_writer.add_name(self.dir_model.name) self.gguf_writer.add_block_count(self.block_count) self.gguf_writer.add_context_length(self.find_hparam(["max_position_embeddings", "n_ctx"])) self.gguf_writer.add_embedding_length(d_model) @@ -2979,8 +2978,8 @@ class JambaModel(Model): yield new_name, data_torch - def write_tensors(self): - super().write_tensors() + def prepare_tensors(self): + super().prepare_tensors() if self._experts is not None: # flatten `list[dict[str, Tensor]]` into `list[str]` @@ -2988,20 +2987,6 @@ class JambaModel(Model): if len(experts) > 0: raise ValueError(f"Unprocessed experts: {experts}") - # same as Mamba - def extra_f32_tensors(self, name: str, new_name: str, bid: int | None, n_dims: int) -> bool: - del n_dims # unused - - return bid is not None and new_name in ( - self.format_tensor_name(n, bid, ".weight" if name.endswith(".weight") else "") for n in [ - gguf.MODEL_TENSOR.SSM_CONV1D, - gguf.MODEL_TENSOR.SSM_X, - gguf.MODEL_TENSOR.SSM_DT, - gguf.MODEL_TENSOR.SSM_A, - gguf.MODEL_TENSOR.SSM_D, - ] - ) - @Model.register("CohereForCausalLM") class CommandR2Model(Model):