Updated Metadata Override (markdown)

2024-11-22 08:17:58 +01:00 · 2024-11-18 01:07:44 +11:00 · 2024-11-18 01:07:44 +11:00 · eb98a44bb0
commit eb98a44bb0
parent 584c3525b5
1 changed files with 47 additions and 11 deletions
--- a/Metadata-Override.md
+++ b/Metadata-Override.md
@ -31,19 +31,55 @@ https://github.com/ggerganov/llama.cpp/pull/7499 was added which adds `--metadat
    // Model Parents (Merges, Pre-tuning, etc...)
    "general.base_models"    : [
        {
-            "name" : "base model example" ,
+            "name": "GPT-3",
-            "author" : "example parent" ,
+            "author": "OpenAI",
-            "version" : "v3.2" ,
+            "version": "3.0",
-            "organization" : "grandOldMaster" ,
+            "organization": "OpenAI",
-            "url" : "https://huggingface.co/SparkExampleMind/parentlalama-1Q-v1.0-safetensor/blob/main/README.md",
+            "description": "A large language model capable of performing a wide variety of language tasks.",
-            "doi" : "doi:10.1080/02626667.2018.1560449",
+            "url": "https://openai.com/research/gpt-3",
-            "uuid" : "52d8c7ef-1de5-43f1-87a4-0c7c9c3d07c4" ,
+            "doi": "10.5555/gpt3doi123456",
-            "repo_url" : "https://huggingface.co/SparkExampleMind/parentlalama-1Q-v1.0-safetensor"
+            "uuid": "123e4567-e89b-12d3-a456-426614174000",
            "repo_url": "https://github.com/openai/gpt-3"
        },
        {
            "name": "BERT",
            "author": "Google AI Language",
            "version": "1.0",
            "organization": "Google",
            "description": "A transformer-based model pretrained on English to achieve state-of-the-art performance on a range of NLP tasks.",
            "url": "https://github.com/google-research/bert",
            "doi": "10.5555/bertdoi789012",
            "uuid": "987e6543-e21a-43f3-a356-527614173999",
            "repo_url": "https://github.com/google-research/bert"
        }
    ],
-    // Array based metadata
+    // Model Datasets Used (Training data...)
    "general.datasets": [
        {
            "name": "Wikipedia Corpus",
            "author": "Wikimedia Foundation",
            "version": "2021-06",
            "organization": "Wikimedia",
            "description": "A dataset comprising the full English Wikipedia, used to train models in a range of natural language tasks.",
            "url": "https://dumps.wikimedia.org/enwiki/",
            "doi": "10.5555/wikidoi234567",
            "uuid": "234e5678-f90a-12d3-c567-426614172345",
            "repo_url": "https://github.com/wikimedia/wikipedia-corpus"
        },
        {
            "name": "Common Crawl",
            "author": "Common Crawl Foundation",
            "version": "2021-04",
            "organization": "Common Crawl",
            "description": "A dataset containing web-crawled data from various domains, providing a broad range of text.",
            "url": "https://commoncrawl.org",
            "doi": "10.5555/ccdoi345678",
            "uuid": "345e6789-f90b-34d5-d678-426614173456",
            "repo_url": "https://github.com/commoncrawl/cc-crawl-data"
        }
    ],
    // Array Based Metadata
    "general.tags": ["text generation", "transformer", "llama", "tiny", "tiny model"],
-    "general.languages": ["en"],
+    "general.languages": ["en"]
    "general.datasets": ["https://huggingface.co/datasets/roneneldan/TinyStories/blob/main/TinyStoriesV2-GPT4-train.txt", "https://huggingface.co/datasets/roneneldan/TinyStories/blob/main/TinyStoriesV2-GPT4-valid.txt"]
 }
 ```