Merge pull request #211 from zoidbb/add-tokenizer-to-hf-downloads

download tokenizer when present
This commit is contained in:
oobabooga 2023-03-10 00:46:21 -03:00 committed by GitHub
commit 1d7e893fa1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -113,9 +113,10 @@ def get_download_links_from_huggingface(model, branch):
is_pytorch = re.match("pytorch_model.*\.bin", fname)
is_safetensors = re.match("model.*\.safetensors", fname)
is_text = re.match(".*\.(txt|json)", fname)
is_tokenizer = re.match("tokenizer.*\.model", fname)
is_text = re.match(".*\.(txt|json)", fname) or is_tokenizer
if is_text or is_safetensors or is_pytorch:
if any((is_pytorch, is_safetensors, is_text, is_tokenizer)):
if is_text:
links.append(f"https://huggingface.co/{model}/resolve/{branch}/{fname}")
classifications.append('text')