diff --git a/HuggingFace-Model-Card-Metadata-Interoperability-Consideration.md b/HuggingFace-Model-Card-Metadata-Interoperability-Consideration.md index 0e94373..5fa42b3 100644 --- a/HuggingFace-Model-Card-Metadata-Interoperability-Consideration.md +++ b/HuggingFace-Model-Card-Metadata-Interoperability-Consideration.md @@ -37,3 +37,67 @@ Below is the agreed upon mapping between GGUF KV Keys and Hugging face as per [D | `general.quantized_by` | *Not explicitly mapped for now* | Indicates who performed quantization. | | `general.alignment` | *Not explicitly mapped for now* | Potentially indicates alignment objective (e.g., RLHF, etc.). | | `general.file_type` | *Not explicitly mapped for now* | File format of the model (e.g., GGUF, Safetensors). | + +An example below of how the mapping as shown above may appear: + +```yaml +# Model Card Fields +model_name: Example Model Six +# Licensing details +license: apache-2.0 +license_name: Apache License Version 2.0, January 2004 +license_link: https://huggingface.co/datasets/choosealicense/licenses/blob/main/markdown/apache-2.0.md +# Simple Model (singular or list of hugging face model ids) +base_model: stabilityai/stable-diffusion-xl-base-1.0 +# Detailed Model Parents (Merges, Pre-tuning, etc...) (list of dicts) +base_model_sources: + - name: GPT-3 + author: OpenAI + version: '3.0' + organization: OpenAI + description: A large language model capable of performing a wide variety of language tasks. + url: 'https://openai.com/research/gpt-3' + doi: 10.5555/gpt3doi123456 + uuid: 123e4567-e89b-12d3-a456-426614174000 + repo_url: 'https://github.com/openai/gpt-3' + - name: BERT + author: Google AI Language + version: '1.0' + organization: Google + description: A transformer-based model pretrained on English to achieve state-of-the-art performance on a range of NLP tasks. + url: 'https://github.com/google-research/bert' + doi: 10.5555/bertdoi789012 + uuid: 987e6543-e21a-43f3-a356-527614173999 + repo_url: 'https://github.com/google-research/bert' +# Simple Dataset (singular or list of hugging face dataset ids) +datasets: common_voice +# Detailed Model Datasets Used (Training data...) (list of dicts) +dataset_sources: + - name: Wikipedia Corpus + author: Wikimedia Foundation + version: '2021-06' + organization: Wikimedia + description: A dataset comprising the full English Wikipedia, used to train models in a range of natural language tasks. + url: 'https://dumps.wikimedia.org/enwiki/' + doi: 10.5555/wikidoi234567 + uuid: 234e5678-f90a-12d3-c567-426614172345 + repo_url: 'https://github.com/wikimedia/wikipedia-corpus' + - name: Common Crawl + author: Common Crawl Foundation + version: '2021-04' + organization: Common Crawl + description: A dataset containing web-crawled data from various domains, providing a broad range of text. + url: 'https://commoncrawl.org' + doi: 10.5555/ccdoi345678 + uuid: 345e6789-f90b-34d5-d678-426614173456 + repo_url: 'https://github.com/commoncrawl/cc-crawl-data' +# Model Content Metadata +tags: + - text generation + - transformer + - llama + - tiny + - tiny model +language: + - en +``` \ No newline at end of file