File size: 409 Bytes
d389c0e
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
from transformers import AutoTokenizer


def assert_tokenizer_consistency(model_id_1, model_id_2):
    identical_tokenizers = (
            AutoTokenizer.from_pretrained(model_id_1).vocab
            == AutoTokenizer.from_pretrained(model_id_2).vocab
    )
    if not identical_tokenizers:
        raise ValueError(f"Tokenizers are not identical for {model_id_1.name_of_path} and {model_id_2.name_of_path}.")