diff --git a/convert.py b/convert.py index b0dd6628a..5abd77f05 100755 --- a/convert.py +++ b/convert.py @@ -516,7 +516,7 @@ class LlamaHfVocab(Vocab): tokenizer_model = "llama" name = "hfft" - def __init__(self, base_path: Path, ignore_nonllama: bool = False): + def __init__(self, base_path: Path): fname_tokenizer = base_path / FAST_TOKENIZER_FILE # if this fails, FileNotFoundError propagates to caller with open(fname_tokenizer, encoding='utf-8') as f: @@ -524,9 +524,7 @@ class LlamaHfVocab(Vocab): # pre-check so we know if we need transformers tokenizer_model: dict[str, Any] = tokenizer_json['model'] - if ignore_nonllama: - pass # workaround incorrect use of this class for WordPiece - elif ( + if ( tokenizer_model['type'] != 'BPE' or not tokenizer_model.get('byte_fallback', False) or tokenizer_json['decoder']['type'] != 'Sequence' ):