mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-10 12:30:50 +01:00
f98eb31c51
* convert-hf : begin refactoring write_tensor * convert : upgrade to sentencepiece v0.2.0 * convert-hf : remove unused n_dims in extra_*_tensors * convert-hf : simplify MoE weights stacking * convert-hf : flake8 linter doesn't like semicolons * convert-hf : allow unusual model part names For example, loading `model-00001-of-00001.safetensors` now works. * convert-hf : fix stacking MoE expert tensors `torch.stack` and `torch.cat` don't do the same thing. * convert-hf : fix Mamba conversion Tested to work even with a SentencePiece-based tokenizer. * convert : use a string for the SentencePiece tokenizer path * convert-hf : display tensor shape * convert-hf : convert norms to f32 by default * convert-hf : sort model part names `os.listdir` is said to list files in arbitrary order. Sorting the file names should let "model-00009-of-00042.safetensors" be loaded before "model-00010-of-00042.safetensors". * convert-hf : use an ABC for Model again It seems Protocol can't be used as a statically type-checked ABC, because its subclasses also can't be instantiated. (why did it seem to work?) At least there's still a way to throw an error when forgetting to define the `model_arch` property of any registered Model subclasses. * convert-hf : use a plain class for Model, and forbid direct instantiation There are no abstract methods used anyway, so using ABC isn't really necessary. * convert-hf : more consistent formatting of cmdline args * convert-hf : align the message logged for converted tensors * convert-hf : fix Refact conversion * convert-hf : save memory with lazy evaluation * convert-hf : flake8 doesn't like lowercase L as a variable name * convert-hf : remove einops requirement for InternLM2 * convert-hf : faster model parts loading Instead of pre-loading them all into a dict, iterate on the tensors in the model parts progressively as needed in Model.write_tensors Conversion for some architectures relies on checking for the presence of specific tensor names, so for multi-part models, the weight map is read from the relevant json file to quickly get these names up-front. * convert-hf : minor changes for consistency * gguf-py : add tqdm as a dependency It's small, and used for a progress bar in GGUFWriter.write_tensors_to_file
38 lines
996 B
TOML
38 lines
996 B
TOML
[tool.poetry]
|
|
name = "gguf"
|
|
version = "0.9.0"
|
|
description = "Read and write ML models in GGUF for GGML"
|
|
authors = ["GGML <ggml@ggml.ai>"]
|
|
packages = [
|
|
{include = "gguf"},
|
|
{include = "gguf/py.typed"},
|
|
{include = "scripts"},
|
|
]
|
|
readme = "README.md"
|
|
homepage = "https://ggml.ai"
|
|
repository = "https://github.com/ggerganov/llama.cpp"
|
|
keywords = ["ggml", "gguf", "llama.cpp"]
|
|
classifiers = [
|
|
"Programming Language :: Python :: 3",
|
|
"License :: OSI Approved :: MIT License",
|
|
"Operating System :: OS Independent",
|
|
]
|
|
|
|
[tool.poetry.dependencies]
|
|
python = ">=3.8"
|
|
numpy = ">=1.17"
|
|
tqdm = ">=4.27"
|
|
|
|
[tool.poetry.dev-dependencies]
|
|
pytest = "^5.2"
|
|
|
|
[build-system]
|
|
requires = ["poetry-core>=1.0.0"]
|
|
build-backend = "poetry.core.masonry.api"
|
|
|
|
[tool.poetry.scripts]
|
|
gguf-convert-endian = "scripts:gguf_convert_endian_entrypoint"
|
|
gguf-dump = "scripts:gguf_dump_entrypoint"
|
|
gguf-set-metadata = "scripts:gguf_set_metadata_entrypoint"
|
|
gguf-new-metadata = "scripts:gguf_new_metadata_entrypoint"
|