materials.selfies-ted2m / tokenizer.json
matthewpwilson-ibm's picture
Initial commit
d9bfa88
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "[UNK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "[CLS]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "[SEP]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "[PAD]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 4,
"content": "[MASK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "WhitespaceSplit"
},
"post_processor": {
"type": "TemplateProcessing",
"single": [
{
"SpecialToken": {
"id": "[CLS]",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "[SEP]",
"type_id": 0
}
}
],
"pair": [
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"Sequence": {
"id": "B",
"type_id": 1
}
}
],
"special_tokens": {
"[CLS]": {
"id": "[CLS]",
"ids": [
1
],
"tokens": [
"[CLS]"
]
},
"[SEP]": {
"id": "[SEP]",
"ids": [
2
],
"tokens": [
"[SEP]"
]
}
}
},
"decoder": null,
"model": {
"type": "WordLevel",
"vocab": {
"[UNK]": 0,
"[CLS]": 1,
"[SEP]": 2,
"[PAD]": 3,
"[MASK]": 4,
"[17O]": 5,
"[/S@@]": 6,
"[/N]": 7,
"[B@-1]": 8,
"[18OH1]": 9,
"[-/Ring2]": 10,
"[=17O]": 11,
"[/Br]": 12,
"[C@]": 13,
"[N@]": 14,
"[Branch1]": 15,
"[=Si]": 16,
"[=Branch2]": 17,
"[Cl].[Cl-1]": 18,
"[/C]": 19,
"[S].[Cl-1]": 20,
"[SH1]": 21,
"[Sn@]": 22,
"[#S]": 23,
"[NH1+1]": 24,
"[#Branch1]": 25,
"[#Branch1].[Cl-1]": 26,
"[BH2-1]": 27,
"[/NH1+1]": 28,
"[=P@]": 29,
"[I]": 30,
"[/S+1]": 31,
"[S-1]": 32,
"[=S@+1]": 33,
"[NH0]": 34,
"[SnH1]": 35,
"[F]": 36,
"[BH1-1]": 37,
"[=Ring2]": 38,
"[CH1]": 39,
"[=Ring1]": 40,
"[/S@]": 41,
"[N@+1]": 42,
"[Sn+2]": 43,
"[Sn+3]": 44,
"[#N+1]": 45,
"[/F]": 46,
"[Si]": 47,
"[/O]": 48,
"[Sn]": 49,
"[P]": 50,
"[Ring2]": 51,
"[#Branch2].[Cl-1]": 52,
"[=P@@]": 53,
"[PH1+1]": 54,
"[/Cl]": 55,
"[C@@]": 56,
"[Sn@@]": 57,
"[=S@]": 58,
"[=S]": 59,
"[C@H1]": 60,
"[Ring2].[Cl-1]": 61,
"[O-1].[Cl-1]": 62,
"[N-1]": 63,
"[S@@H1]": 64,
"[/O-1]": 65,
"[/NH1]": 66,
"[/C@@H1]": 67,
"[B-1]": 68,
"[OH0]": 69,
"[Ring1]": 70,
"[=O+1]": 71,
"[O+1]": 72,
"[=S@@+1]": 73,
"[=S+1]": 74,
"[/I]": 75,
"[Si@@]": 76,
"[=C]": 77,
"[P+1]": 78,
"[I].[Cl-1]": 79,
"[Branch1].[Cl-1]": 80,
"[P@@]": 81,
"[Branch1].[C]": 82,
"[Br+1]": 83,
"[=B]": 84,
"[CH1-1]": 85,
"[Sn+1]": 86,
"[Si-1]": 87,
"[/N+1]": 88,
"[N+1]": 89,
"[CH2]": 90,
"[SiH2]": 91,
"[/Sn]": 92,
"[=PH1]": 93,
"[=P+1]": 94,
"[S@@]": 95,
"[/C@H1]": 96,
"[N@@+1]": 97,
"[=S@@]": 98,
"[/NH2+1]": 99,
"[=N+1]": 100,
"[=C].[Cl-1]": 101,
"[=NH2+1]": 102,
"[=SH1]": 103,
"[/Si]": 104,
"[#N]": 105,
"[=N]": 106,
"[/OH0]": 107,
"[S]": 108,
"[Branch2].[Cl-1]": 109,
"[N@H1+1]": 110,
"[NH2+1]": 111,
"[Br]": 112,
"[O-1]": 113,
"[=Branch1]": 114,
"[CH0]": 115,
"[=Se]": 116,
"[#Branch2]": 117,
"[#C]": 118,
"[=Branch1].[Cl-1]": 119,
"[S@]": 120,
"[SiH1]": 121,
"[SiH3]": 122,
"[#C].[Cl-1]": 123,
"[#C-1]": 124,
"[S+1]": 125,
"[Cl]": 126,
"[#N].[Cl-1]": 127,
"[/C@@]": 128,
"[B@@-1]": 129,
"[F].[Cl-1]": 130,
"[Br].[Cl-1]": 131,
"[=Branch2].[Cl-1]": 132,
"[=P]": 133,
"[C].[Cl-1]": 134,
"[/B]": 135,
"[PH1]": 136,
"[C-1]": 137,
"[Si@@H1]": 138,
"[/P]": 139,
"[=O].[Cl-1]": 140,
"[O].[Cl-1]": 141,
"[N@@]": 142,
"[P].[Cl-1]": 143,
"[S@+1]": 144,
"[Branch2]": 145,
"[I+1]": 146,
"[-/Ring1]": 147,
"[/S]": 148,
"[P@]": 149,
"[P@@+1]": 150,
"[=Sn]": 151,
"[NH3+1]": 152,
"[=N-1]": 153,
"[=NH1+1]": 154,
"[NH1]": 155,
"[Si@]": 156,
"[N@@H1+1]": 157,
"[N]": 158,
"[Ring1].[Cl-1]": 159,
"[BH3-1]": 160,
"[B]": 161,
"[/C@]": 162,
"[C]": 163,
"[Si@H1]": 164,
"[O]": 165,
"[=NH0]": 166,
"[P@+1]": 167,
"[=N].[Cl-1]": 168,
"[/N-1]": 169,
"[S@@+1]": 170,
"[=O]": 171,
"[C@@H1]": 172
},
"unk_token": "[UNK]"
}
}