convert-hf : make model class definitions self-contained

This commit is contained in:
Jared Van Bortel 2024-03-01 15:52:37 -05:00
parent 95845d17ec
commit 7f0a1d66b5

View File

@ -8,9 +8,10 @@ import json
import os import os
import re import re
import sys import sys
from abc import ABC, abstractmethod
from enum import IntEnum from enum import IntEnum
from pathlib import Path from pathlib import Path
from typing import TYPE_CHECKING, Any, ContextManager, Iterator, Sequence, cast from typing import TYPE_CHECKING, Any, Callable, ContextManager, Iterator, Sequence, TypeVar, cast
import numpy as np import numpy as np
import torch import torch
@ -35,8 +36,11 @@ class SentencePieceTokenTypes(IntEnum):
UNUSED = 5 UNUSED = 5
BYTE = 6 BYTE = 6
AnyModel = TypeVar("AnyModel", bound="type[Model]")
class Model(ABC):
_MODEL_CLASSES: dict[str, type[Model]] = {}
class Model:
def __init__(self, dir_model: Path, ftype: int, fname_out: Path, is_big_endian: bool): def __init__(self, dir_model: Path, ftype: int, fname_out: Path, is_big_endian: bool):
self.dir_model = dir_model self.dir_model = dir_model
self.ftype = ftype self.ftype = ftype
@ -47,10 +51,14 @@ class Model:
self.num_parts = Model.count_model_parts(self.dir_model, ".safetensors" if self.is_safetensors else ".bin") self.num_parts = Model.count_model_parts(self.dir_model, ".safetensors" if self.is_safetensors else ".bin")
self.part_names = self._get_part_names() self.part_names = self._get_part_names()
self.hparams = Model.load_hparams(self.dir_model) self.hparams = Model.load_hparams(self.dir_model)
self.model_arch = self._get_model_architecture()
self.gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=False) self.gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=False)
self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer"]) self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer"])
@property
@abstractmethod
def model_arch(self) -> gguf.MODEL_ARCH:
pass
def find_hparam(self, keys: Sequence[str], optional: bool = False) -> Any: def find_hparam(self, keys: Sequence[str], optional: bool = False) -> Any:
key = next((k for k in keys if k in self.hparams), None) key = next((k for k in keys if k in self.hparams), None)
if key is not None: if key is not None:
@ -176,55 +184,21 @@ class Model:
with open(dir_model / "config.json", "r", encoding="utf-8") as f: with open(dir_model / "config.json", "r", encoding="utf-8") as f:
return json.load(f) return json.load(f)
@staticmethod @classmethod
def from_model_architecture(model_architecture): def register(cls, *names: str) -> Callable[[AnyModel], AnyModel]:
if model_architecture == "GPTNeoXForCausalLM": assert names
return GPTNeoXModel def func(modelcls: type[Model]):
if model_architecture == "BloomForCausalLM": for name in names:
return BloomModel cls._MODEL_CLASSES[name] = modelcls
if model_architecture == "MPTForCausalLM": return modelcls
return MPTModel return func
if model_architecture in ("BaichuanForCausalLM", "BaiChuanForCausalLM"):
return BaichuanModel @classmethod
if model_architecture in ("FalconForCausalLM", "RWForCausalLM"): def from_model_architecture(cls, arch):
return FalconModel try:
if model_architecture == "GPTBigCodeForCausalLM": return cls._MODEL_CLASSES[arch]
return StarCoderModel except KeyError:
if model_architecture == "GPTRefactForCausalLM": raise NotImplementedError(f'Architecture {arch!r} not supported!') from None
return RefactModel
if model_architecture == "PersimmonForCausalLM":
return PersimmonModel
if model_architecture in ("StableLmForCausalLM", "StableLMEpochForCausalLM", "LlavaStableLMEpochForCausalLM"):
return StableLMModel
if model_architecture == "QWenLMHeadModel":
return QwenModel
if model_architecture == "Qwen2ForCausalLM":
return Model
if model_architecture == "MixtralForCausalLM":
return MixtralModel
if model_architecture == "GPT2LMHeadModel":
return GPT2Model
if model_architecture == "PhiForCausalLM":
return Phi2Model
if model_architecture == "PlamoForCausalLM":
return PlamoModel
if model_architecture == "CodeShellForCausalLM":
return CodeShellModel
if model_architecture == "OrionForCausalLM":
return OrionModel
if model_architecture == "InternLM2ForCausalLM":
return InternLM2Model
if model_architecture == "MiniCPMForCausalLM":
return MiniCPMModel
if model_architecture == "BertModel":
return BertModel
if model_architecture == "NomicBertModel":
return NomicBertModel
if model_architecture == "GemmaForCausalLM":
return GemmaModel
if model_architecture == "Starcoder2ForCausalLM":
return Model
return Model
def _is_model_safetensors(self) -> bool: def _is_model_safetensors(self) -> bool:
return Model.count_model_parts(self.dir_model, ".safetensors") > 0 return Model.count_model_parts(self.dir_model, ".safetensors") > 0
@ -239,57 +213,6 @@ class Model:
return ("pytorch_model.bin",) return ("pytorch_model.bin",)
return (f"pytorch_model-{n:05}-of-{self.num_parts:05}.bin" for n in range(1, self.num_parts + 1)) return (f"pytorch_model-{n:05}-of-{self.num_parts:05}.bin" for n in range(1, self.num_parts + 1))
def _get_model_architecture(self) -> gguf.MODEL_ARCH:
arch = self.hparams["architectures"][0]
if arch == "GPTNeoXForCausalLM":
return gguf.MODEL_ARCH.GPTNEOX
if arch == "BloomForCausalLM":
return gguf.MODEL_ARCH.BLOOM
if arch == "MPTForCausalLM":
return gguf.MODEL_ARCH.MPT
if arch in ("BaichuanForCausalLM", "BaiChuanForCausalLM"):
return gguf.MODEL_ARCH.BAICHUAN
if arch in ("FalconForCausalLM", "RWForCausalLM"):
return gguf.MODEL_ARCH.FALCON
if arch == "GPTBigCodeForCausalLM":
return gguf.MODEL_ARCH.STARCODER
if arch == "GPTRefactForCausalLM":
return gguf.MODEL_ARCH.REFACT
if arch == "PersimmonForCausalLM":
return gguf.MODEL_ARCH.PERSIMMON
if arch in ("StableLmForCausalLM", "StableLMEpochForCausalLM", "LlavaStableLMEpochForCausalLM"):
return gguf.MODEL_ARCH.STABLELM
if arch == "QWenLMHeadModel":
return gguf.MODEL_ARCH.QWEN
if arch == "Qwen2ForCausalLM":
return gguf.MODEL_ARCH.QWEN2
if arch == "MixtralForCausalLM":
return gguf.MODEL_ARCH.LLAMA
if arch == "GPT2LMHeadModel":
return gguf.MODEL_ARCH.GPT2
if arch == "PhiForCausalLM":
return gguf.MODEL_ARCH.PHI2
if arch == "PlamoForCausalLM":
return gguf.MODEL_ARCH.PLAMO
if arch == "CodeShellForCausalLM":
return gguf.MODEL_ARCH.CODESHELL
if arch == "OrionForCausalLM":
return gguf.MODEL_ARCH.ORION
if arch == "InternLM2ForCausalLM":
return gguf.MODEL_ARCH.INTERNLM2
if arch == "MiniCPMForCausalLM":
return gguf.MODEL_ARCH.MINICPM
if arch == "BertModel":
return gguf.MODEL_ARCH.BERT
if arch == "NomicBertModel":
return gguf.MODEL_ARCH.NOMIC_BERT
if arch == "GemmaForCausalLM":
return gguf.MODEL_ARCH.GEMMA
if arch == "Starcoder2ForCausalLM":
return gguf.MODEL_ARCH.STARCODER2
raise NotImplementedError(f'Architecture "{arch}" not supported!')
def _set_vocab_gpt2(self): def _set_vocab_gpt2(self):
dir_model = self.dir_model dir_model = self.dir_model
hparams = self.hparams hparams = self.hparams
@ -457,7 +380,10 @@ class Model:
special_vocab.add_to_gguf(self.gguf_writer) special_vocab.add_to_gguf(self.gguf_writer)
@Model.register("GPTNeoXForCausalLM")
class GPTNeoXModel(Model): class GPTNeoXModel(Model):
model_arch = gguf.MODEL_ARCH.GPTNEOX
def set_gguf_parameters(self): def set_gguf_parameters(self):
block_count = self.hparams["num_hidden_layers"] block_count = self.hparams["num_hidden_layers"]
@ -474,7 +400,10 @@ class GPTNeoXModel(Model):
self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_eps"]) self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_eps"])
@Model.register("BloomForCausalLM")
class BloomModel(Model): class BloomModel(Model):
model_arch = gguf.MODEL_ARCH.BLOOM
def set_gguf_parameters(self): def set_gguf_parameters(self):
self.gguf_writer.add_name("Bloom") self.gguf_writer.add_name("Bloom")
n_embed = self.hparams.get("hidden_size", self.hparams.get("n_embed")) n_embed = self.hparams.get("hidden_size", self.hparams.get("n_embed"))
@ -566,7 +495,10 @@ class BloomModel(Model):
print(name, f"=> output.weight, shape = {data.shape}, {old_dtype} --> {data.dtype}") print(name, f"=> output.weight, shape = {data.shape}, {old_dtype} --> {data.dtype}")
@Model.register("MPTForCausalLM")
class MPTModel(Model): class MPTModel(Model):
model_arch = gguf.MODEL_ARCH.MPT
def set_gguf_parameters(self): def set_gguf_parameters(self):
block_count = self.hparams["n_layers"] block_count = self.hparams["n_layers"]
self.gguf_writer.add_name(self.dir_model.name) self.gguf_writer.add_name(self.dir_model.name)
@ -629,7 +561,10 @@ class MPTModel(Model):
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
@Model.register("OrionForCausalLM")
class OrionModel(Model): class OrionModel(Model):
model_arch = gguf.MODEL_ARCH.ORION
def set_vocab(self): def set_vocab(self):
self._set_vocab_sentencepiece() self._set_vocab_sentencepiece()
@ -708,7 +643,10 @@ class OrionModel(Model):
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
@Model.register("BaichuanForCausalLM", "BaiChuanForCausalLM")
class BaichuanModel(Model): class BaichuanModel(Model):
model_arch = gguf.MODEL_ARCH.BAICHUAN
def set_vocab(self): def set_vocab(self):
self._set_vocab_sentencepiece() self._set_vocab_sentencepiece()
@ -823,7 +761,10 @@ class BaichuanModel(Model):
return weights[r * n_part:r * n_part + r, ...] return weights[r * n_part:r * n_part + r, ...]
@Model.register("FalconForCausalLM", "RWForCausalLM")
class FalconModel(Model): class FalconModel(Model):
model_arch = gguf.MODEL_ARCH.FALCON
def set_gguf_parameters(self): def set_gguf_parameters(self):
block_count = self.hparams.get("num_hidden_layers") block_count = self.hparams.get("num_hidden_layers")
if block_count is None: if block_count is None:
@ -916,7 +857,10 @@ class FalconModel(Model):
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
@Model.register("GPTBigCodeForCausalLM")
class StarCoderModel(Model): class StarCoderModel(Model):
model_arch = gguf.MODEL_ARCH.STARCODER
def set_gguf_parameters(self): def set_gguf_parameters(self):
block_count = self.hparams["n_layer"] block_count = self.hparams["n_layer"]
@ -931,7 +875,10 @@ class StarCoderModel(Model):
self.gguf_writer.add_file_type(self.ftype) self.gguf_writer.add_file_type(self.ftype)
@Model.register("GPTRefactForCausalLM")
class RefactModel(Model): class RefactModel(Model):
model_arch = gguf.MODEL_ARCH.REFACT
def set_gguf_parameters(self): def set_gguf_parameters(self):
hidden_dim = self.hparams["n_embd"] hidden_dim = self.hparams["n_embd"]
inner_dim = 4 * hidden_dim inner_dim = 4 * hidden_dim
@ -1015,7 +962,10 @@ class RefactModel(Model):
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
@Model.register("PersimmonForCausalLM")
class PersimmonModel(Model): class PersimmonModel(Model):
model_arch = gguf.MODEL_ARCH.PERSIMMON
def set_gguf_parameters(self): def set_gguf_parameters(self):
block_count = self.hparams.get("num_layers", self.hparams.get("num_hidden_layers")) block_count = self.hparams.get("num_layers", self.hparams.get("num_hidden_layers"))
head_count = self.hparams["num_attention_heads"] head_count = self.hparams["num_attention_heads"]
@ -1063,7 +1013,10 @@ class PersimmonModel(Model):
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
@Model.register("StableLmForCausalLM", "StableLMEpochForCausalLM", "LlavaStableLMEpochForCausalLM")
class StableLMModel(Model): class StableLMModel(Model):
model_arch = gguf.MODEL_ARCH.STABLELM
def set_vocab(self): def set_vocab(self):
if (self.dir_model / "tokenizer.json").is_file(): if (self.dir_model / "tokenizer.json").is_file():
self._set_vocab_gpt2() self._set_vocab_gpt2()
@ -1087,12 +1040,18 @@ class StableLMModel(Model):
self.gguf_writer.add_layer_norm_eps(self.find_hparam(["layer_norm_eps", "norm_eps"])) self.gguf_writer.add_layer_norm_eps(self.find_hparam(["layer_norm_eps", "norm_eps"]))
@Model.register("MixtralForCausalLM")
class MixtralModel(Model): class MixtralModel(Model):
model_arch = gguf.MODEL_ARCH.LLAMA
def set_vocab(self): def set_vocab(self):
self._set_vocab_sentencepiece() self._set_vocab_sentencepiece()
@Model.register("MiniCPMForCausalLM")
class MiniCPMModel(Model): class MiniCPMModel(Model):
model_arch = gguf.MODEL_ARCH.MINICPM
def set_gguf_parameters(self): def set_gguf_parameters(self):
block_count = self.hparams["num_hidden_layers"] block_count = self.hparams["num_hidden_layers"]
self.gguf_writer.add_name("MiniCPM") self.gguf_writer.add_name("MiniCPM")
@ -1169,7 +1128,10 @@ class MiniCPMModel(Model):
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
@Model.register("QWenLMHeadModel")
class QwenModel(Model): class QwenModel(Model):
model_arch = gguf.MODEL_ARCH.QWEN
@staticmethod @staticmethod
def token_bytes_to_string(b): def token_bytes_to_string(b):
from transformers.models.gpt2.tokenization_gpt2 import bytes_to_unicode from transformers.models.gpt2.tokenization_gpt2 import bytes_to_unicode
@ -1249,7 +1211,15 @@ class QwenModel(Model):
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
@Model.register("Qwen2ForCausalLM")
class Qwen2Model(Model):
model_arch = gguf.MODEL_ARCH.QWEN2
@Model.register("GPT2LMHeadModel")
class GPT2Model(Model): class GPT2Model(Model):
model_arch = gguf.MODEL_ARCH.GPT2
def set_gguf_parameters(self): def set_gguf_parameters(self):
self.gguf_writer.add_name(self.dir_model.name) self.gguf_writer.add_name(self.dir_model.name)
self.gguf_writer.add_block_count(self.hparams["n_layer"]) self.gguf_writer.add_block_count(self.hparams["n_layer"])
@ -1311,7 +1281,10 @@ class GPT2Model(Model):
self.gguf_writer.add_tensor("output.weight", data) self.gguf_writer.add_tensor("output.weight", data)
@Model.register("PhiForCausalLM")
class Phi2Model(Model): class Phi2Model(Model):
model_arch = gguf.MODEL_ARCH.PHI2
def set_gguf_parameters(self): def set_gguf_parameters(self):
block_count = self.find_hparam(["num_hidden_layers", "n_layer"]) block_count = self.find_hparam(["num_hidden_layers", "n_layer"])
@ -1333,7 +1306,10 @@ class Phi2Model(Model):
self.gguf_writer.add_add_bos_token(False) self.gguf_writer.add_add_bos_token(False)
@Model.register("PlamoForCausalLM")
class PlamoModel(Model): class PlamoModel(Model):
model_arch = gguf.MODEL_ARCH.PLAMO
def set_vocab(self): def set_vocab(self):
self._set_vocab_sentencepiece() self._set_vocab_sentencepiece()
@ -1412,7 +1388,10 @@ class PlamoModel(Model):
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
@Model.register("CodeShellForCausalLM")
class CodeShellModel(Model): class CodeShellModel(Model):
model_arch = gguf.MODEL_ARCH.CODESHELL
def set_gguf_parameters(self): def set_gguf_parameters(self):
block_count = self.hparams["n_layer"] block_count = self.hparams["n_layer"]
@ -1477,7 +1456,10 @@ class CodeShellModel(Model):
print(name, f"=> output.weight, shape = {data.shape}, {old_dtype} --> {data.dtype}") print(name, f"=> output.weight, shape = {data.shape}, {old_dtype} --> {data.dtype}")
@Model.register("InternLM2ForCausalLM")
class InternLM2Model(Model): class InternLM2Model(Model):
model_arch = gguf.MODEL_ARCH.INTERNLM2
def set_vocab(self): def set_vocab(self):
# (TODO): Is there a better way? # (TODO): Is there a better way?
# Copy from _set_vocab_sentencepiece, The only difference is that we will treat the character # Copy from _set_vocab_sentencepiece, The only difference is that we will treat the character
@ -1649,7 +1631,10 @@ in chat mode so that the conversation can end normally.")
self.post_write_tensors(tensor_map, name, data_torch) self.post_write_tensors(tensor_map, name, data_torch)
@Model.register("BertModel")
class BertModel(Model): class BertModel(Model):
model_arch = gguf.MODEL_ARCH.BERT
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
self.vocab_size = None self.vocab_size = None
@ -1755,7 +1740,10 @@ class BertModel(Model):
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
@Model.register("NomicBertModel")
class NomicBertModel(BertModel): class NomicBertModel(BertModel):
model_arch = gguf.MODEL_ARCH.NOMIC_BERT
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
@ -1792,7 +1780,10 @@ class NomicBertModel(BertModel):
yield name, data yield name, data
@Model.register("GemmaForCausalLM")
class GemmaModel(Model): class GemmaModel(Model):
model_arch = gguf.MODEL_ARCH.GEMMA
def set_vocab(self): def set_vocab(self):
self._set_vocab_sentencepiece() self._set_vocab_sentencepiece()
@ -1848,6 +1839,11 @@ class GemmaModel(Model):
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
@Model.register("Starcoder2ForCausalLM")
class StarCoder2Model(Model):
model_arch = gguf.MODEL_ARCH.STARCODER2
###### CONVERSION LOGIC ###### ###### CONVERSION LOGIC ######