DeepLearning101's picture
Upload 21 files
45311fe
raw
history blame
672 Bytes
from transformers import AutoTokenizer
"""
obtain special tokens
"""
def get_special_token_mapping(tokenizer: AutoTokenizer):
if "t5" in type(tokenizer).__name__.lower():
special_token_mapping = {
"cls": 3, "mask": 32099, "sep": tokenizer.eos_token_id,
"sep+": tokenizer.eos_token_id,
"pseudo_token": tokenizer.unk_token_id
}
else:
special_token_mapping = {
"cls": tokenizer.cls_token_id, "mask": tokenizer.mask_token_id, "sep": tokenizer.sep_token_id,
"sep+": tokenizer.sep_token_id,
"pseudo_token": tokenizer.unk_token_id
}
return special_token_mapping