sma-language-pretrained / tokenizer_config.json
jxie's picture
Upload tokenizer
5443d8a verified
raw
history blame
1.43 kB
{
"bos_token": {
"__type": "AddedToken",
"content": "[BOS]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"cls_token": {
"__type": "AddedToken",
"content": "[CLS]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"eos_token": {
"__type": "AddedToken",
"content": "[EOS]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"mask_token": {
"__type": "AddedToken",
"content": "[MASK]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"model_max_length": 1024,
"name_or_path": "/iris/u/jwxie/workspace/releases/domain-agnostic-pretraining/examples/saved_models/language_pretrained/wikibooks_guided_self_random_select_masking_recon_small-adamw_torch-lr1e-4-wd0.01-mr0.15/checkpoint-1000000",
"pad_token": {
"__type": "AddedToken",
"content": "[PAD]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"sep_token": {
"__type": "AddedToken",
"content": "[SEP]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"special_tokens_map_file": "../saved_models/scratch_configs/language_guided_self_masking_recon_small_long/special_tokens_map.json",
"tokenizer_class": "SMATokenizer"
}