From 22c61c5b45a55e789209414b9d4f151d08fbfecf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=2E=20Yusuf=20Sar=C4=B1g=C3=B6z?= Date: Thu, 17 Aug 2023 19:05:43 +0300 Subject: [PATCH] gguf : style fixes in simple conversion script --- convert-llama-h5-to-gguf.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/convert-llama-h5-to-gguf.py b/convert-llama-h5-to-gguf.py index d63893351..885dd640a 100644 --- a/convert-llama-h5-to-gguf.py +++ b/convert-llama-h5-to-gguf.py @@ -23,6 +23,7 @@ NDArray: 'TypeAlias' = 'np.ndarray[Any, Any]' def reverse_hf_permute(weights: NDArray, n_head: int, n_kv_head: Optional[int] = None) -> NDArray: if n_kv_head is not None and n_head != n_kv_head: n_head //= n_kv_head + return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:]) .swapaxes(1, 2) .reshape(weights.shape)) @@ -30,12 +31,14 @@ def reverse_hf_permute(weights: NDArray, n_head: int, n_kv_head: Optional[int] = def count_model_parts(dir_model: str) -> int: num_parts = 0 + for filename in os.listdir(dir_model): if filename.startswith("pytorch_model-"): num_parts += 1 if num_parts > 0: print("gguf: found " + str(num_parts) + " model parts") + return num_parts @@ -43,6 +46,7 @@ if len(sys.argv) < 3: print("Usage: convert-h5-to-ggml.py dir-model ftype\n") print(" ftype == 0 -> float32") print(" ftype == 1 -> float16") + sys.exit(1) @@ -54,7 +58,8 @@ last_dir = os.path.basename(os.path.normpath(dir_model)) # possible tensor data types # ftype == 0 -> float32 # ftype == 1 -> float16 -# + + # map from ftype to string ftype_str = ["f32", "f16"] @@ -63,6 +68,7 @@ if len(sys.argv) > 2: ftype = int(sys.argv[2]) if ftype < 0 or ftype > 1: print("Invalid ftype: " + str(ftype)) + sys.exit(1) fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".gguf" @@ -74,12 +80,13 @@ with open(dir_model + "/config.json", "r", encoding="utf-8") as f: if hparams["architectures"][0] != "LlamaForCausalLM": print("Model architecture not supported: " + hparams["architectures"][0]) + sys.exit() # get number of model parts num_parts = count_model_parts(dir_model) -gguf_writer = gguf.GGUFWriter(fname_out, architecture="llama") +gguf_writer = gguf.GGUFWriter(fname_out, arch="llama") print("gguf: get model metadata") @@ -103,12 +110,12 @@ elif "max_position_embeddings" in hparams: ctx_length = hparams["max_position_embeddings"] else: print("gguf: can not find ctx length parameter.") + sys.exit() gguf_writer.add_architecture() gguf_writer.add_name(last_dir) -gguf_writer.add_file_type("All tensors F32" if ftype == 0 else "Most tensors F16, some F32") gguf_writer.add_source_hf_repo(hf_repo) gguf_writer.add_tensor_data_layout("Meta AI original pth") gguf_writer.add_context_length(ctx_length) @@ -247,6 +254,7 @@ for part_name in part_names: name = tensor_map[name[:-5]] + ".bias" else: print("Can not map tensor '" + name + "'") + sys.exit() n_dims = len(data.shape)