multi-head-xlm-xl-tokens-38 / tokenizer_config.json
SotirisLegkas's picture
Upload tokenizer
9386147 verified
raw
history blame
10.5 kB
{
"added_tokens_decoder": {
"0": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<pad>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"3": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250001": {
"content": "<mask>",
"lstrip": true,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": true
},
"250002": {
"content": "<NONE>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250003": {
"content": "<Self-direction: thought attained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250004": {
"content": "<Self-direction: thought constrained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250005": {
"content": "<Self-direction: action attained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250006": {
"content": "<Self-direction: action constrained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250007": {
"content": "<Stimulation attained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250008": {
"content": "<Stimulation constrained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250009": {
"content": "<Hedonism attained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250010": {
"content": "<Hedonism constrained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250011": {
"content": "<Achievement attained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250012": {
"content": "<Achievement constrained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250013": {
"content": "<Power: dominance attained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250014": {
"content": "<Power: dominance constrained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250015": {
"content": "<Power: resources attained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250016": {
"content": "<Power: resources constrained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250017": {
"content": "<Face attained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250018": {
"content": "<Face constrained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250019": {
"content": "<Security: personal attained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250020": {
"content": "<Security: personal constrained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250021": {
"content": "<Security: societal attained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250022": {
"content": "<Security: societal constrained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250023": {
"content": "<Tradition attained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250024": {
"content": "<Tradition constrained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250025": {
"content": "<Conformity: rules attained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250026": {
"content": "<Conformity: rules constrained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250027": {
"content": "<Conformity: interpersonal attained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250028": {
"content": "<Conformity: interpersonal constrained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250029": {
"content": "<Humility attained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250030": {
"content": "<Humility constrained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250031": {
"content": "<Benevolence: caring attained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250032": {
"content": "<Benevolence: caring constrained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250033": {
"content": "<Benevolence: dependability attained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250034": {
"content": "<Benevolence: dependability constrained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250035": {
"content": "<Universalism: concern attained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250036": {
"content": "<Universalism: concern constrained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250037": {
"content": "<Universalism: nature attained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250038": {
"content": "<Universalism: nature constrained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250039": {
"content": "<Universalism: tolerance attained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250040": {
"content": "<Universalism: tolerance constrained>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [
"<NONE>",
"<Self-direction: thought attained>",
"<Self-direction: thought constrained>",
"<Self-direction: action attained>",
"<Self-direction: action constrained>",
"<Stimulation attained>",
"<Stimulation constrained>",
"<Hedonism attained>",
"<Hedonism constrained>",
"<Achievement attained>",
"<Achievement constrained>",
"<Power: dominance attained>",
"<Power: dominance constrained>",
"<Power: resources attained>",
"<Power: resources constrained>",
"<Face attained>",
"<Face constrained>",
"<Security: personal attained>",
"<Security: personal constrained>",
"<Security: societal attained>",
"<Security: societal constrained>",
"<Tradition attained>",
"<Tradition constrained>",
"<Conformity: rules attained>",
"<Conformity: rules constrained>",
"<Conformity: interpersonal attained>",
"<Conformity: interpersonal constrained>",
"<Humility attained>",
"<Humility constrained>",
"<Benevolence: caring attained>",
"<Benevolence: caring constrained>",
"<Benevolence: dependability attained>",
"<Benevolence: dependability constrained>",
"<Universalism: concern attained>",
"<Universalism: concern constrained>",
"<Universalism: nature attained>",
"<Universalism: nature constrained>",
"<Universalism: tolerance attained>",
"<Universalism: tolerance constrained>"
],
"bos_token": "<s>",
"clean_up_tokenization_spaces": true,
"cls_token": "<s>",
"eos_token": "</s>",
"mask_token": "<mask>",
"max_length": 512,
"model_max_length": 1000000000000000019884624838656,
"pad_to_multiple_of": null,
"pad_token": "<pad>",
"pad_token_type_id": 0,
"padding_side": "right",
"sep_token": "</s>",
"sp_model_kwargs": {},
"stride": 0,
"tokenizer_class": "XLMRobertaTokenizer",
"truncation_side": "right",
"truncation_strategy": "longest_first",
"unk_token": "<unk>"
}