convert : fix TypeError on GPT-2 vocab.json (#5288)

This commit is contained in:
Sang-Kil Park 2024-02-07 13:28:00 +09:00 committed by GitHub
parent 213d1439fa
commit f68664ac24
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -334,9 +334,9 @@ class Params:
class BpeVocab: class BpeVocab:
def __init__(self, fname_tokenizer: Path, fname_added_tokens: Path | None) -> None: def __init__(self, fname_tokenizer: Path, fname_added_tokens: Path | None) -> None:
self.bpe_tokenizer = json.loads(open(str(fname_tokenizer), encoding="utf-8").read()) self.bpe_tokenizer = json.loads(open(str(fname_tokenizer), encoding="utf-8").read())
try: if isinstance(self.bpe_tokenizer.get('model'), dict):
self.vocab = self.bpe_tokenizer["model"]["vocab"] self.vocab = self.bpe_tokenizer["model"]["vocab"]
except KeyError: else:
self.vocab = self.bpe_tokenizer self.vocab = self.bpe_tokenizer
added_tokens: dict[str, int] added_tokens: dict[str, int]
if fname_added_tokens is not None: if fname_added_tokens is not None: