From acaa98234ac80480cd15ebf1f8e99f81cd5ec442 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 17 Aug 2023 21:06:45 +0300 Subject: [PATCH] convert.py : fix HF tensor permuting / unpacking ggml-ci --- convert.py | 20 +++++++++++++++++++- gguf.py | 6 +++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/convert.py b/convert.py index adc5fdd26..f6237579d 100755 --- a/convert.py +++ b/convert.py @@ -812,6 +812,23 @@ def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyM def convert_model_names(model: LazyModel, params: Params) -> LazyModel: tmap = gguf.get_tensor_name_map(ARCH, params.n_layer) + tmp = model + + # HF models permut or pack some of the tensors, so we need to undo that + for i in itertools.count(): + if f"model.layers.{i}.self_attn.q_proj.weight" in model: + print(f"Permuting layer {i}") + tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.q_proj.weight"], params.n_head, params.n_head_kv) + tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.k_proj.weight"], params.n_head, params.n_head_kv) + #tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = model[f"model.layers.{i}.self_attn.v_proj.weight"] + elif f"model.layers.{i}.self_attn.W_pack.weight" in model: + print(f"Unpacking and permuting layer {i}") + tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 0, params.n_head, params.n_head_kv) + tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 1, params.n_head, params.n_head_kv) + tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = part_lazy (model[f"model.layers.{i}.self_attn.W_pack.weight"], 2) + else: + break + out: LazyModel = {} for name, lazy_tensor in model.items(): name_new = name @@ -825,8 +842,9 @@ def convert_model_names(model: LazyModel, params: Params) -> LazyModel: else: raise Exception(f"Unexpected tensor name: {name}") - if gguf.should_skip_tensor(ARCH, params.n_layer, name_new): + if gguf.should_skip_tensor_TMP(ARCH, params.n_layer, name_new): print(f"skipping tensor {name_new}") + continue else: print(f"{name:48s} -> {name_new:40s} | {lazy_tensor.data_type} | {lazy_tensor.shape}") out[name_new] = lazy_tensor diff --git a/gguf.py b/gguf.py index a4dd10872..2a6806a91 100644 --- a/gguf.py +++ b/gguf.py @@ -148,7 +148,11 @@ MODEL_TENSOR_SKIP = { ], } -def should_skip_tensor(arch : MODEL_ARCH, n_blocks : int, name : str) -> bool: +# TODO: the following helper functions should be removed +# instead, get_tensor_name_map should return tuples of (name, MODEL_TENSOR) +# however, my Python is very bad, and I couldn't figure out how to do this, hence these functions +# REMOVE +def should_skip_tensor_TMP(arch : MODEL_ARCH, n_blocks : int, name : str) -> bool: for skip in MODEL_TENSOR_SKIP.get(arch, []): for i in range(n_blocks): if name == MODEL_TENSOR_NAMES[arch][skip].format(bid=i):