9 Metadata Override
Brian edited this page 2024-11-21 01:40:44 +11:00

https://github.com/ggerganov/llama.cpp/pull/7499 was added which adds --metadata metadata override feature so you can add extra metadata to the gguf file. The metadata override files may look like below (Note that // c style comment is not present in the actual file, but is there for illustrative purpose)

{
    // Example Metadata Override Fields
    "general.name"           : "Example Model Six",
    "general.author"         : "John Smith",
    "general.version"        : "v1.0",
    "general.organization"   : "SparkExampleMind",
    "general.quantized_by"   : "Abbety Jenson",
    "general.description"    : "This is an example of a model",
    // Useful for cleanly regenerating default naming conventions
    "general.finetune"       : "instruct",
    "general.basename"       : "llamabase",
    "general.size_label"     : "8x2.3Q",
    // Licensing details
    "general.license"        : "apache-2.0",
    "general.license.name"   : "Apache License Version 2.0, January 2004",
    "general.license.link"   : "https://huggingface.co/datasets/choosealicense/licenses/blob/main/markdown/apache-2.0.md",
    // Typically represents the converted GGUF repo (Unless native)
    "general.url"            : "https://huggingface.co/SparkExampleMind/llamabase-8x2.3Q-instruct-v1.0-F16/blob/main/README.md",
    "general.doi"            : "doi:10.1080/02626667.2018.1560449", 
    "general.uuid"           : "f18383df-ceb9-4ef3-b929-77e4dc64787c", 
    "general.repo_url"       : "https://huggingface.co/SparkExampleMind/llamabase-8x2.3Q-instruct-v1.0-F16",
    // Model Source during conversion
    "general.source.url"     : "https://huggingface.co/SparkExampleMind/llamabase-8x2.3Q-instruct-v1.0-safetensor/blob/main/README.md",
    "general.source.doi"     : "doi:10.1080/02626667.2018.1560449", 
    "general.source.uuid"    : "a72998bf-3b84-4ff4-91c6-7a6b780507bc", 
    "general.source.repo_url": "https://huggingface.co/SparkExampleMind/llamabase-8x2.3Q-instruct-v1.0-safetensor",
    // Model Parents (Merges, Pre-tuning, etc...)
    "general.base_models"    : [
        {
            "name": "GPT-3",
            "author": "OpenAI",
            "version": "3.0",
            "organization": "OpenAI",
            "description": "A large language model capable of performing a wide variety of language tasks.",
            "url": "https://openai.com/research/gpt-3",
            "doi": "10.5555/gpt3doi123456",
            "uuid": "123e4567-e89b-12d3-a456-426614174000",
            "repo_url": "https://github.com/openai/gpt-3"
        },
        {
            "name": "BERT",
            "author": "Google AI Language",
            "version": "1.0",
            "organization": "Google",
            "description": "A transformer-based model pretrained on English to achieve state-of-the-art performance on a range of NLP tasks.",
            "url": "https://github.com/google-research/bert",
            "doi": "10.5555/bertdoi789012",
            "uuid": "987e6543-e21a-43f3-a356-527614173999",
            "repo_url": "https://github.com/google-research/bert"
        }
    ],
    // Model Datasets Used (Training data...)
    "general.datasets": [
        {
            "name": "Wikipedia Corpus",
            "author": "Wikimedia Foundation",
            "version": "2021-06",
            "organization": "Wikimedia",
            "description": "A dataset comprising the full English Wikipedia, used to train models in a range of natural language tasks.",
            "url": "https://dumps.wikimedia.org/enwiki/",
            "doi": "10.5555/wikidoi234567",
            "uuid": "234e5678-f90a-12d3-c567-426614172345",
            "repo_url": "https://github.com/wikimedia/wikipedia-corpus"
        },
        {
            "name": "Common Crawl",
            "author": "Common Crawl Foundation",
            "version": "2021-04",
            "organization": "Common Crawl",
            "description": "A dataset containing web-crawled data from various domains, providing a broad range of text.",
            "url": "https://commoncrawl.org",
            "doi": "10.5555/ccdoi345678",
            "uuid": "345e6789-f90b-34d5-d678-426614173456",
            "repo_url": "https://github.com/commoncrawl/cc-crawl-data"
        }
    ],
    // Array Based Metadata
    "general.tags": ["text generation", "transformer", "llama", "tiny", "tiny model"],
    "general.languages": ["en"]
}