Spaces:
Paused
Paused
File size: 409 Bytes
d389c0e |
1 2 3 4 5 6 7 8 9 10 11 |
from transformers import AutoTokenizer
def assert_tokenizer_consistency(model_id_1, model_id_2):
identical_tokenizers = (
AutoTokenizer.from_pretrained(model_id_1).vocab
== AutoTokenizer.from_pretrained(model_id_2).vocab
)
if not identical_tokenizers:
raise ValueError(f"Tokenizers are not identical for {model_id_1.name_of_path} and {model_id_2.name_of_path}.")
|