diff --git a/Metadata-Override.md b/Metadata-Override.md index 4a028df..91a55d4 100644 --- a/Metadata-Override.md +++ b/Metadata-Override.md @@ -82,4 +82,89 @@ https://github.com/ggerganov/llama.cpp/pull/7499 was added which adds `--metadat "general.tags": ["text generation", "transformer", "llama", "tiny", "tiny model"], "general.languages": ["en"] } +``` + +As for how this may corresponds with Hugging Face style model cards... consider: + +```yaml +# Model Card Fields +model_name: Example Model Six +model_author: John Smith +model_version: v1.0 +model_organization: SparkExampleMind +quantized_by: Abbety Jenson +model_description: This is an example of a model +# Useful for cleanly regenerating default naming conventions +model_finetune: instruct +model_basename: llamabase +model_size_label: 8x2.3Q +# Licensing details +license: apache-2.0 +license_name: 'Apache License Version 2.0, January 2004' +license_link: 'https://huggingface.co/datasets/choosealicense/licenses/blob/main/markdown/apache-2.0.md' +# Typically represents the converted GGUF repo (Unless native) +model_url: 'https://huggingface.co/SparkExampleMind/llamabase-8x2.3Q-instruct-v1.0-F16/blob/main/README.md' +model_doi: 'doi:10.1080/02626667.2018.1560449' +model_uuid: f18383df-ceb9-4ef3-b929-77e4dc64787c +model_repo_url: 'https://huggingface.co/SparkExampleMind/llamabase-8x2.3Q-instruct-v1.0-F16' +# Model Source If Conversion +source_model_url: 'https://huggingface.co/SparkExampleMind/llamabase-8x2.3Q-instruct-v1.0-safetensor/blob/main/README.md' +source_model_doi: 'doi:10.1080/02626667.2018.1560449' +source_model_uuid: 'a72998bf-3b84-4ff4-91c6-7a6b780507bc' +source_model_repo_url: 'https://huggingface.co/SparkExampleMind/llamabase-8x2.3Q-instruct-v1.0-safetensor' +# Model Parents (Merges, Pre-tuning, etc...) +base_model_sources: + - name: GPT-3 + author: OpenAI + version: '3.0' + organization: OpenAI + description: >- + A large language model capable of performing a wide variety of language + tasks. + url: 'https://openai.com/research/gpt-3' + doi: 10.5555/gpt3doi123456 + uuid: 123e4567-e89b-12d3-a456-426614174000 + repo_url: 'https://github.com/openai/gpt-3' + - name: BERT + author: Google AI Language + version: '1.0' + organization: Google + description: >- + A transformer-based model pretrained on English to achieve + state-of-the-art performance on a range of NLP tasks. + url: 'https://github.com/google-research/bert' + doi: 10.5555/bertdoi789012 + uuid: 987e6543-e21a-43f3-a356-527614173999 + repo_url: 'https://github.com/google-research/bert' +# Model Datasets Used (Training data...) +dataset_sources: + - name: Wikipedia Corpus + author: Wikimedia Foundation + version: 2021-06 + organization: Wikimedia + description: A dataset comprising the full English Wikipedia, used to train models in a range of natural language tasks. + url: 'https://dumps.wikimedia.org/enwiki/' + doi: 10.5555/wikidoi234567 + uuid: 234e5678-f90a-12d3-c567-426614172345 + repo_url: 'https://github.com/wikimedia/wikipedia-corpus' + - name: Common Crawl + author: Common Crawl Foundation + version: 2021-04 + organization: Common Crawl + description: A dataset containing web-crawled data from various domains, providing a broad range of text. + url: 'https://commoncrawl.org' + doi: 10.5555/ccdoi345678 + uuid: 345e6789-f90b-34d5-d678-426614173456 + repo_url: 'https://github.com/commoncrawl/cc-crawl-data' +# Model Content Metadata +tags: + - text generation + - transformer + - llama + - tiny + - tiny model +pipeline_tag: + - text-classification +languages: + - en ``` \ No newline at end of file