From eb98a44bb07e6d4bd027362d751d0c7b76905256 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 18 Nov 2024 01:07:44 +1100 Subject: [PATCH] Updated Metadata Override (markdown) --- Metadata-Override.md | 58 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 47 insertions(+), 11 deletions(-) diff --git a/Metadata-Override.md b/Metadata-Override.md index ab819b9..4a028df 100644 --- a/Metadata-Override.md +++ b/Metadata-Override.md @@ -31,19 +31,55 @@ https://github.com/ggerganov/llama.cpp/pull/7499 was added which adds `--metadat // Model Parents (Merges, Pre-tuning, etc...) "general.base_models" : [ { - "name" : "base model example" , - "author" : "example parent" , - "version" : "v3.2" , - "organization" : "grandOldMaster" , - "url" : "https://huggingface.co/SparkExampleMind/parentlalama-1Q-v1.0-safetensor/blob/main/README.md", - "doi" : "doi:10.1080/02626667.2018.1560449", - "uuid" : "52d8c7ef-1de5-43f1-87a4-0c7c9c3d07c4" , - "repo_url" : "https://huggingface.co/SparkExampleMind/parentlalama-1Q-v1.0-safetensor" + "name": "GPT-3", + "author": "OpenAI", + "version": "3.0", + "organization": "OpenAI", + "description": "A large language model capable of performing a wide variety of language tasks.", + "url": "https://openai.com/research/gpt-3", + "doi": "10.5555/gpt3doi123456", + "uuid": "123e4567-e89b-12d3-a456-426614174000", + "repo_url": "https://github.com/openai/gpt-3" + }, + { + "name": "BERT", + "author": "Google AI Language", + "version": "1.0", + "organization": "Google", + "description": "A transformer-based model pretrained on English to achieve state-of-the-art performance on a range of NLP tasks.", + "url": "https://github.com/google-research/bert", + "doi": "10.5555/bertdoi789012", + "uuid": "987e6543-e21a-43f3-a356-527614173999", + "repo_url": "https://github.com/google-research/bert" } ], - // Array based metadata + // Model Datasets Used (Training data...) + "general.datasets": [ + { + "name": "Wikipedia Corpus", + "author": "Wikimedia Foundation", + "version": "2021-06", + "organization": "Wikimedia", + "description": "A dataset comprising the full English Wikipedia, used to train models in a range of natural language tasks.", + "url": "https://dumps.wikimedia.org/enwiki/", + "doi": "10.5555/wikidoi234567", + "uuid": "234e5678-f90a-12d3-c567-426614172345", + "repo_url": "https://github.com/wikimedia/wikipedia-corpus" + }, + { + "name": "Common Crawl", + "author": "Common Crawl Foundation", + "version": "2021-04", + "organization": "Common Crawl", + "description": "A dataset containing web-crawled data from various domains, providing a broad range of text.", + "url": "https://commoncrawl.org", + "doi": "10.5555/ccdoi345678", + "uuid": "345e6789-f90b-34d5-d678-426614173456", + "repo_url": "https://github.com/commoncrawl/cc-crawl-data" + } + ], + // Array Based Metadata "general.tags": ["text generation", "transformer", "llama", "tiny", "tiny model"], - "general.languages": ["en"], - "general.datasets": ["https://huggingface.co/datasets/roneneldan/TinyStories/blob/main/TinyStoriesV2-GPT4-train.txt", "https://huggingface.co/datasets/roneneldan/TinyStories/blob/main/TinyStoriesV2-GPT4-valid.txt"] + "general.languages": ["en"] } ``` \ No newline at end of file