selfies-bart / tokenizer.json
maykcaldas's picture
add tokenizer
fb57838
raw
history blame
2.01 kB
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [],
"normalizer": null,
"pre_tokenizer": {
"type": "WhitespaceSplit"
},
"post_processor": null,
"decoder": null,
"model": {
"type": "WordPiece",
"unk_token": "[unk]",
"continuing_subword_prefix": "##",
"max_input_chars_per_word": 100,
"vocab": {
"[As+1]": 0,
"[=SH0]": 1,
"[=SH1]": 2,
"[=Ring2]": 3,
"[=Ring1]": 4,
"[CH1]": 5,
"[S]": 6,
"[NH2+1]": 7,
"[B]": 8,
"[C-1]": 9,
"[#C]": 10,
"[=P]": 11,
"[As]": 12,
"[B-1]": 13,
"[bos]": 14,
"[O]": 15,
"[OH0]": 16,
"[I]": 17,
"[nop]": 18,
"[Cl]": 19,
"[SiH2]": 20,
"[Ring1]": 21,
"[Fe-4]": 22,
"[CH0]": 23,
"[Fe]": 24,
"[Fe+2]": 25,
"[CH1-1]": 26,
"[=Branch3]": 27,
"[#Branch1]": 28,
"[=Branch2]": 29,
"[NH0]": 30,
"[N-1]": 31,
"[C]": 32,
"[=NH2+1]": 33,
"[NH1-1]": 34,
"[#N+1]": 35,
"[SeH1]": 36,
"[Branch3]": 37,
"[SH1]": 38,
"[CH2-1]": 39,
"[SH0]": 40,
"[=Se]": 41,
"[NH1+1]": 42,
"[K]": 43,
"[Ring2]": 44,
"[#N]": 45,
"[O-1]": 46,
"[OH1+1]": 47,
"[#Branch2]": 48,
"[=C]": 49,
"[I+1]": 50,
"[Si]": 51,
"[F]": 52,
"[=N+1]": 53,
"[=OH1+1]": 54,
"[Branch2]": 55,
"[=O+1]": 56,
"[#S]": 57,
"[Na]": 58,
"[C+1]": 59,
"[=B]": 60,
"[S+1]": 61,
"[unk]": 62,
"[=Fe]": 63,
"[P]": 64,
"[=N]": 65,
"[SiH1]": 66,
"[NH3+1]": 67,
"[Fe-3]": 68,
"[CH1+1]": 69,
"[Branch1]": 70,
"[Fe+1]": 71,
"[=Branch1]": 72,
"[=S]": 73,
"[Se]": 74,
"[N]": 75,
"[=As]": 76,
"[#Ring2]": 77,
"[Br]": 78,
"[=O]": 79,
"[P+1]": 80,
"[N+1]": 81,
"[eos]": 82,
"[Se+1]": 83
}
}
}