mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-26 03:12:23 +01:00
convert-hf : match model part name prefix and suffix (#7687)
In #7075, to fix the conversion of (some) models using model-00001-of-00001.safetensors instead of model.safetensors for a single model part we simply used the same logic as the part count to get the part names. But this doesn't always work correctly, like when unusual additional model files like consolidated.safetensors in https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3 are present. This commit matching both the prefix and the suffix of the model part names should fix this problem without breaking any previously-supported upstream models. But according to report by @teleprint-me there is still some persistent problem, but shall do in the meantime.
This commit is contained in:
parent
ed9f252118
commit
5795b94182
@ -73,10 +73,10 @@ class Model:
|
|||||||
self.endianess = gguf.GGUFEndian.BIG if is_big_endian else gguf.GGUFEndian.LITTLE
|
self.endianess = gguf.GGUFEndian.BIG if is_big_endian else gguf.GGUFEndian.LITTLE
|
||||||
self.use_temp_file = use_temp_file
|
self.use_temp_file = use_temp_file
|
||||||
self.lazy = not eager
|
self.lazy = not eager
|
||||||
self.part_names = Model.get_model_part_names(self.dir_model, ".safetensors")
|
self.part_names = Model.get_model_part_names(self.dir_model, "model", ".safetensors")
|
||||||
self.is_safetensors = len(self.part_names) > 0
|
self.is_safetensors = len(self.part_names) > 0
|
||||||
if not self.is_safetensors:
|
if not self.is_safetensors:
|
||||||
self.part_names = Model.get_model_part_names(self.dir_model, ".bin")
|
self.part_names = Model.get_model_part_names(self.dir_model, "pytorch_model", ".bin")
|
||||||
self.hparams = Model.load_hparams(self.dir_model)
|
self.hparams = Model.load_hparams(self.dir_model)
|
||||||
self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer"])
|
self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer"])
|
||||||
self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
|
self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
|
||||||
@ -335,10 +335,10 @@ class Model:
|
|||||||
self.gguf_writer.close()
|
self.gguf_writer.close()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_model_part_names(dir_model: Path, suffix: str) -> list[str]:
|
def get_model_part_names(dir_model: Path, prefix: str, suffix: str) -> list[str]:
|
||||||
part_names: list[str] = []
|
part_names: list[str] = []
|
||||||
for filename in os.listdir(dir_model):
|
for filename in os.listdir(dir_model):
|
||||||
if filename.endswith(suffix):
|
if filename.startswith(prefix) and filename.endswith(suffix):
|
||||||
part_names.append(filename)
|
part_names.append(filename)
|
||||||
|
|
||||||
part_names.sort()
|
part_names.sort()
|
||||||
|
Loading…
Reference in New Issue
Block a user