mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-30 16:07:17 +01:00
328884f421
* gguf-py : fix some metadata name extraction edge cases * convert_lora : use the lora dir for the model card path * gguf-py : more metadata edge cases fixes Multiple finetune versions are now joined together, and the removal of the basename annotation on trailing versions is more robust. * gguf-py : add more name metadata extraction tests * convert_lora : fix default filename The default filename was previously hardcoded. * convert_hf : Model.fname_out can no longer be None * gguf-py : do not use title case for naming convention Some models use acronyms in lowercase, which can't be title-cased like other words, so it's best to simply use the same case as in the original model name. Note that the size label still has an uppercased suffix to make it distinguishable from the context size of a finetune.
70 lines
2.9 KiB
Python
70 lines
2.9 KiB
Python
from __future__ import annotations
|
|
|
|
from typing import Literal
|
|
|
|
|
|
def fill_templated_filename(filename: str, output_type: str | None) -> str:
|
|
# Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf'
|
|
ftype_lowercase: str = output_type.lower() if output_type is not None else ""
|
|
ftype_uppercase: str = output_type.upper() if output_type is not None else ""
|
|
return filename.format(ftype_lowercase,
|
|
outtype=ftype_lowercase, ftype=ftype_lowercase,
|
|
OUTTYPE=ftype_uppercase, FTYPE=ftype_uppercase)
|
|
|
|
|
|
def model_weight_count_rounded_notation(model_params_count: int, min_digits: int = 2) -> str:
|
|
if model_params_count > 1e12 :
|
|
# Trillions Of Parameters
|
|
scaled_model_params = model_params_count * 1e-12
|
|
scale_suffix = "T"
|
|
elif model_params_count > 1e9 :
|
|
# Billions Of Parameters
|
|
scaled_model_params = model_params_count * 1e-9
|
|
scale_suffix = "B"
|
|
elif model_params_count > 1e6 :
|
|
# Millions Of Parameters
|
|
scaled_model_params = model_params_count * 1e-6
|
|
scale_suffix = "M"
|
|
else:
|
|
# Thousands Of Parameters
|
|
scaled_model_params = model_params_count * 1e-3
|
|
scale_suffix = "K"
|
|
|
|
fix = max(min_digits - len(str(round(scaled_model_params)).lstrip('0')), 0)
|
|
|
|
return f"{scaled_model_params:.{fix}f}{scale_suffix}"
|
|
|
|
|
|
def size_label(total_params: int, shared_params: int, expert_params: int, expert_count: int) -> str:
|
|
|
|
if expert_count > 0:
|
|
pretty_size = model_weight_count_rounded_notation(abs(shared_params) + abs(expert_params), min_digits=2)
|
|
size_class = f"{expert_count}x{pretty_size}"
|
|
else:
|
|
size_class = model_weight_count_rounded_notation(abs(total_params), min_digits=2)
|
|
|
|
return size_class
|
|
|
|
|
|
def naming_convention(model_name: str | None, base_name: str | None, finetune_string: str | None, version_string: str | None, size_label: str | None, output_type: str | None, model_type: Literal['vocab', 'LoRA'] | None = None) -> str:
|
|
# Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
|
|
|
|
if base_name is not None:
|
|
name = base_name.strip().replace(' ', '-').replace('/', '-')
|
|
elif model_name is not None:
|
|
name = model_name.strip().replace(' ', '-').replace('/', '-')
|
|
else:
|
|
name = "ggml-model"
|
|
|
|
parameters = f"-{size_label}" if size_label is not None else ""
|
|
|
|
finetune = f"-{finetune_string.strip().replace(' ', '-')}" if finetune_string is not None else ""
|
|
|
|
version = f"-{version_string.strip().replace(' ', '-')}" if version_string is not None else ""
|
|
|
|
encoding = f"-{output_type.strip().replace(' ', '-').upper()}" if output_type is not None else ""
|
|
|
|
kind = f"-{model_type.strip().replace(' ', '-')}" if model_type is not None else ""
|
|
|
|
return f"{name}{parameters}{finetune}{version}{encoding}{kind}"
|