diff --git a/Metadata-Override.md b/Metadata-Override.md index 4647322..7095dc6 100644 --- a/Metadata-Override.md +++ b/Metadata-Override.md @@ -83,70 +83,3 @@ https://github.com/ggerganov/llama.cpp/pull/7499 was added which adds `--metadat "general.languages": ["en"] } ``` - -As for how this may corresponds with Hugging Face style model cards... consider: - -```yaml - - - -# Model Card Fields -model_name: Example Model Six -# Licensing details -license: apache-2.0 -license_name: Apache License Version 2.0, January 2004 -license_link: https://huggingface.co/datasets/choosealicense/licenses/blob/main/markdown/apache-2.0.md -# Simple Model (singular or list of hugging face model ids) -base_model: stabilityai/stable-diffusion-xl-base-1.0 -# Detailed Model Parents (Merges, Pre-tuning, etc...) (list of dicts) -base_model_sources: - - name: GPT-3 - author: OpenAI - version: '3.0' - organization: OpenAI - description: A large language model capable of performing a wide variety of language tasks. - url: 'https://openai.com/research/gpt-3' - doi: 10.5555/gpt3doi123456 - uuid: 123e4567-e89b-12d3-a456-426614174000 - repo_url: 'https://github.com/openai/gpt-3' - - name: BERT - author: Google AI Language - version: '1.0' - organization: Google - description: A transformer-based model pretrained on English to achieve state-of-the-art performance on a range of NLP tasks. - url: 'https://github.com/google-research/bert' - doi: 10.5555/bertdoi789012 - uuid: 987e6543-e21a-43f3-a356-527614173999 - repo_url: 'https://github.com/google-research/bert' -# Simple Dataset (singular or list of hugging face dataset ids) -datasets: common_voice -# Detailed Model Datasets Used (Training data...) (list of dicts) -dataset_sources: - - name: Wikipedia Corpus - author: Wikimedia Foundation - version: '2021-06' - organization: Wikimedia - description: A dataset comprising the full English Wikipedia, used to train models in a range of natural language tasks. - url: 'https://dumps.wikimedia.org/enwiki/' - doi: 10.5555/wikidoi234567 - uuid: 234e5678-f90a-12d3-c567-426614172345 - repo_url: 'https://github.com/wikimedia/wikipedia-corpus' - - name: Common Crawl - author: Common Crawl Foundation - version: '2021-04' - organization: Common Crawl - description: A dataset containing web-crawled data from various domains, providing a broad range of text. - url: 'https://commoncrawl.org' - doi: 10.5555/ccdoi345678 - uuid: 345e6789-f90b-34d5-d678-426614173456 - repo_url: 'https://github.com/commoncrawl/cc-crawl-data' -# Model Content Metadata -tags: - - text generation - - transformer - - llama - - tiny - - tiny model -language: - - en -``` \ No newline at end of file