mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-25 10:58:56 +01:00
08a0c02060
* ggml : update mul_mat_id to use the same tensor for all the experts * update cuda * minor * update metal * update test-backend-ops * fix cuda * Update ggml-metal.m Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * update convert.py * update convert-hf-to-gguf.py * update convert.py for mixtral hf models * Update convert-hf-to-gguf.py Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * cuda : support non-pow-2 number of experts * allow quantize to work for split and merged experts models in the same way * cleanup + disable mmap automatically with split tensors models * update imatrix * test-backend-ops : test qwen argsort * update grok model loading * llama : add merged experts tensors to the grok tensor map * minor * gguf : bump version * fix quantizing of merged experts * convert-hf-to-gguf.py : update grok (untested) * make linter happy * cuda/argsort : use shared memory instead of pool memory * convert : fix grok tensor names * metal : add support for non-pow-2 argsort * llama : more loader cleanup, better error checking * cuda : fix warning * llama : still use mmap for loading old models, but copy the data to a host buffer * add review note * llama : remove ffn tensor counting + add sanity check ggml-ci * convert : fix handling of n_experts == None ggml-ci * imatrix : fix ncall counters * llama : produce error if imatrix size does not match * quantize : terminate on errors + trace logs ggml-ci * metal : pad shared memory to 16 bytes --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
36 lines
921 B
TOML
36 lines
921 B
TOML
[tool.poetry]
|
|
name = "gguf"
|
|
version = "0.9.0"
|
|
description = "Read and write ML models in GGUF for GGML"
|
|
authors = ["GGML <ggml@ggml.ai>"]
|
|
packages = [
|
|
{include = "gguf"},
|
|
{include = "gguf/py.typed"},
|
|
{include = "scripts"},
|
|
]
|
|
readme = "README.md"
|
|
homepage = "https://ggml.ai"
|
|
repository = "https://github.com/ggerganov/llama.cpp"
|
|
keywords = ["ggml", "gguf", "llama.cpp"]
|
|
classifiers = [
|
|
"Programming Language :: Python :: 3",
|
|
"License :: OSI Approved :: MIT License",
|
|
"Operating System :: OS Independent",
|
|
]
|
|
|
|
[tool.poetry.dependencies]
|
|
python = ">=3.8"
|
|
numpy = ">=1.17"
|
|
|
|
[tool.poetry.dev-dependencies]
|
|
pytest = "^5.2"
|
|
|
|
[build-system]
|
|
requires = ["poetry-core>=1.0.0"]
|
|
build-backend = "poetry.core.masonry.api"
|
|
|
|
[tool.poetry.scripts]
|
|
gguf-convert-endian = "scripts:gguf_convert_endian_entrypoint"
|
|
gguf-dump = "scripts:gguf_dump_entrypoint"
|
|
gguf-set-metadata = "scripts:gguf_set_metadata_entrypoint"
|