|
{ |
|
"version": "1.0", |
|
"truncation": { |
|
"direction": "Right", |
|
"max_length": 512, |
|
"strategy": "LongestFirst", |
|
"stride": 0 |
|
}, |
|
"padding": { |
|
"strategy": { |
|
"Fixed": 512 |
|
}, |
|
"direction": "Right", |
|
"pad_to_multiple_of": null, |
|
"pad_id": 0, |
|
"pad_type_id": 0, |
|
"pad_token": "[PAD]" |
|
}, |
|
"added_tokens": [ |
|
{ |
|
"id": 0, |
|
"content": "[PAD]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": true |
|
}, |
|
{ |
|
"id": 11, |
|
"content": "[UNK]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": true |
|
}, |
|
{ |
|
"id": 12, |
|
"content": "[CLS]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": true |
|
}, |
|
{ |
|
"id": 13, |
|
"content": "[SEP]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": true |
|
}, |
|
{ |
|
"id": 14, |
|
"content": "[MASK]", |
|
"single_word": false, |
|
"lstrip": true, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 591, |
|
"content": "<s>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": true |
|
}, |
|
{ |
|
"id": 592, |
|
"content": "</s>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": true |
|
} |
|
], |
|
"normalizer": null, |
|
"pre_tokenizer": { |
|
"type": "ByteLevel", |
|
"add_prefix_space": false, |
|
"trim_offsets": true, |
|
"use_regex": true |
|
}, |
|
"post_processor": { |
|
"type": "RobertaProcessing", |
|
"sep": [ |
|
"[SEP]", |
|
13 |
|
], |
|
"cls": [ |
|
"[CLS]", |
|
12 |
|
], |
|
"trim_offsets": true, |
|
"add_prefix_space": false |
|
}, |
|
"decoder": { |
|
"type": "ByteLevel", |
|
"add_prefix_space": true, |
|
"trim_offsets": true, |
|
"use_regex": true |
|
}, |
|
"model": { |
|
"type": "BPE", |
|
"dropout": null, |
|
"unk_token": null, |
|
"continuing_subword_prefix": "", |
|
"end_of_word_suffix": "", |
|
"fuse_unk": false, |
|
"byte_fallback": false, |
|
"ignore_merges": false, |
|
"vocab": { |
|
"[PAD]": 0, |
|
"[unused1]": 1, |
|
"[unused2]": 2, |
|
"[unused3]": 3, |
|
"[unused4]": 4, |
|
"[unused5]": 5, |
|
"[unused6]": 6, |
|
"[unused7]": 7, |
|
"[unused8]": 8, |
|
"[unused9]": 9, |
|
"[unused10]": 10, |
|
"[UNK]": 11, |
|
"[CLS]": 12, |
|
"[SEP]": 13, |
|
"[MASK]": 14, |
|
"c": 15, |
|
"C": 16, |
|
"(": 17, |
|
")": 18, |
|
"O": 19, |
|
"1": 20, |
|
"2": 21, |
|
"=": 22, |
|
"N": 23, |
|
".": 24, |
|
"n": 25, |
|
"3": 26, |
|
"F": 27, |
|
"Cl": 28, |
|
">>": 29, |
|
"~": 30, |
|
"-": 31, |
|
"4": 32, |
|
"[C@H]": 33, |
|
"S": 34, |
|
"[C@@H]": 35, |
|
"[O-]": 36, |
|
"Br": 37, |
|
"#": 38, |
|
"/": 39, |
|
"[nH]": 40, |
|
"[N+]": 41, |
|
"s": 42, |
|
"5": 43, |
|
"o": 44, |
|
"P": 45, |
|
"[Na+]": 46, |
|
"[Si]": 47, |
|
"I": 48, |
|
"[Na]": 49, |
|
"[Pd]": 50, |
|
"[K+]": 51, |
|
"[K]": 52, |
|
"[P]": 53, |
|
"B": 54, |
|
"[C@]": 55, |
|
"[C@@]": 56, |
|
"[Cl-]": 57, |
|
"6": 58, |
|
"[OH-]": 59, |
|
"\\": 60, |
|
"[N-]": 61, |
|
"[Li]": 62, |
|
"[H]": 63, |
|
"[2H]": 64, |
|
"[NH4+]": 65, |
|
"[c-]": 66, |
|
"[P-]": 67, |
|
"[Cs+]": 68, |
|
"[Li+]": 69, |
|
"[Cs]": 70, |
|
"[NaH]": 71, |
|
"[H-]": 72, |
|
"[O+]": 73, |
|
"[BH4-]": 74, |
|
"[Cu]": 75, |
|
"7": 76, |
|
"[Mg]": 77, |
|
"[Fe+2]": 78, |
|
"[n+]": 79, |
|
"[Sn]": 80, |
|
"[BH-]": 81, |
|
"[Pd+2]": 82, |
|
"[CH]": 83, |
|
"[I-]": 84, |
|
"[Br-]": 85, |
|
"[C-]": 86, |
|
"[Zn]": 87, |
|
"[B-]": 88, |
|
"[F-]": 89, |
|
"[Al]": 90, |
|
"[P+]": 91, |
|
"[BH3-]": 92, |
|
"[Fe]": 93, |
|
"[C]": 94, |
|
"[AlH4]": 95, |
|
"[Ni]": 96, |
|
"[SiH]": 97, |
|
"8": 98, |
|
"[Cu+2]": 99, |
|
"[Mn]": 100, |
|
"[AlH]": 101, |
|
"[nH+]": 102, |
|
"[AlH4-]": 103, |
|
"[O-2]": 104, |
|
"[Cr]": 105, |
|
"[Mg+2]": 106, |
|
"[NH3+]": 107, |
|
"[S@]": 108, |
|
"[Pt]": 109, |
|
"[Al+3]": 110, |
|
"[S@@]": 111, |
|
"[S-]": 112, |
|
"[Ti]": 113, |
|
"[Zn+2]": 114, |
|
"[PH]": 115, |
|
"[NH2+]": 116, |
|
"[Ru]": 117, |
|
"[Ag+]": 118, |
|
"[S+]": 119, |
|
"[I+3]": 120, |
|
"[NH+]": 121, |
|
"[Ca+2]": 122, |
|
"[Ag]": 123, |
|
"9": 124, |
|
"[Os]": 125, |
|
"[Se]": 126, |
|
"[SiH2]": 127, |
|
"[Ca]": 128, |
|
"[Ti+4]": 129, |
|
"[Ac]": 130, |
|
"[Cu+]": 131, |
|
"[S]": 132, |
|
"[Rh]": 133, |
|
"[Cl+3]": 134, |
|
"[cH-]": 135, |
|
"[Zn+]": 136, |
|
"[O]": 137, |
|
"[Cl+]": 138, |
|
"[SH]": 139, |
|
"[H+]": 140, |
|
"[Pd+]": 141, |
|
"[se]": 142, |
|
"[PH+]": 143, |
|
"[I]": 144, |
|
"[Pt+2]": 145, |
|
"[C+]": 146, |
|
"[Mg+]": 147, |
|
"[Hg]": 148, |
|
"[W]": 149, |
|
"[SnH]": 150, |
|
"[SiH3]": 151, |
|
"[Fe+3]": 152, |
|
"[NH]": 153, |
|
"[Mo]": 154, |
|
"[CH2+]": 155, |
|
"%10": 156, |
|
"[CH2-]": 157, |
|
"[CH2]": 158, |
|
"[n-]": 159, |
|
"[Ce+4]": 160, |
|
"[NH-]": 161, |
|
"[Co]": 162, |
|
"[I+]": 163, |
|
"[PH2]": 164, |
|
"[Pt+4]": 165, |
|
"[Ce]": 166, |
|
"[B]": 167, |
|
"[Sn+2]": 168, |
|
"[Ba+2]": 169, |
|
"%11": 170, |
|
"[Fe-3]": 171, |
|
"[18F]": 172, |
|
"[SH-]": 173, |
|
"[Pb+2]": 174, |
|
"[Os-2]": 175, |
|
"[Zr+4]": 176, |
|
"[N]": 177, |
|
"[Ir]": 178, |
|
"[Bi]": 179, |
|
"[Ni+2]": 180, |
|
"[P@]": 181, |
|
"[Co+2]": 182, |
|
"[s+]": 183, |
|
"[As]": 184, |
|
"[P+3]": 185, |
|
"[Hg+2]": 186, |
|
"[Yb+3]": 187, |
|
"[CH-]": 188, |
|
"[Zr+2]": 189, |
|
"[Mn+2]": 190, |
|
"[CH+]": 191, |
|
"[In]": 192, |
|
"[KH]": 193, |
|
"[Ce+3]": 194, |
|
"[Zr]": 195, |
|
"[AlH2-]": 196, |
|
"[OH2+]": 197, |
|
"[Ti+3]": 198, |
|
"[Rh+2]": 199, |
|
"[Sb]": 200, |
|
"[S-2]": 201, |
|
"%12": 202, |
|
"[P@@]": 203, |
|
"[Si@H]": 204, |
|
"[Mn+4]": 205, |
|
"p": 206, |
|
"[Ba]": 207, |
|
"[NH2-]": 208, |
|
"[Ge]": 209, |
|
"[Pb+4]": 210, |
|
"[Cr+3]": 211, |
|
"[Au]": 212, |
|
"[LiH]": 213, |
|
"[Sc+3]": 214, |
|
"[o+]": 215, |
|
"[Rh-3]": 216, |
|
"%13": 217, |
|
"[Br]": 218, |
|
"[Sb-]": 219, |
|
"[S@+]": 220, |
|
"[I+2]": 221, |
|
"[Ar]": 222, |
|
"[V]": 223, |
|
"[Cu-]": 224, |
|
"[Al-]": 225, |
|
"[Te]": 226, |
|
"[13c]": 227, |
|
"[13C]": 228, |
|
"[Cl]": 229, |
|
"[PH4+]": 230, |
|
"[SiH4]": 231, |
|
"[te]": 232, |
|
"[CH3-]": 233, |
|
"[S@@+]": 234, |
|
"[Rh+3]": 235, |
|
"[SH+]": 236, |
|
"[Bi+3]": 237, |
|
"[Br+2]": 238, |
|
"[La]": 239, |
|
"[La+3]": 240, |
|
"[Pt-2]": 241, |
|
"[N@@]": 242, |
|
"[PH3+]": 243, |
|
"[N@]": 244, |
|
"[Si+4]": 245, |
|
"[Sr+2]": 246, |
|
"[Al+]": 247, |
|
"[Pb]": 248, |
|
"[SeH]": 249, |
|
"[Si-]": 250, |
|
"[V+5]": 251, |
|
"[Y+3]": 252, |
|
"[Re]": 253, |
|
"[Ru+]": 254, |
|
"[Sm]": 255, |
|
"*": 256, |
|
"[3H]": 257, |
|
"[NH2]": 258, |
|
"[Ag-]": 259, |
|
"[13CH3]": 260, |
|
"[OH+]": 261, |
|
"[Ru+3]": 262, |
|
"[OH]": 263, |
|
"[Gd+3]": 264, |
|
"[13CH2]": 265, |
|
"[In+3]": 266, |
|
"[Si@@]": 267, |
|
"[Si@]": 268, |
|
"[Ti+2]": 269, |
|
"[Sn+]": 270, |
|
"[Cl+2]": 271, |
|
"[AlH-]": 272, |
|
"[Pd-2]": 273, |
|
"[SnH3]": 274, |
|
"[B+3]": 275, |
|
"[Cu-2]": 276, |
|
"[Nd+3]": 277, |
|
"[Pb+3]": 278, |
|
"[13cH]": 279, |
|
"[Fe-4]": 280, |
|
"[Ga]": 281, |
|
"[Sn+4]": 282, |
|
"[Hg+]": 283, |
|
"[11CH3]": 284, |
|
"[Hf]": 285, |
|
"[Pr]": 286, |
|
"[Y]": 287, |
|
"[S+2]": 288, |
|
"[Cd]": 289, |
|
"[Cr+6]": 290, |
|
"[Zr+3]": 291, |
|
"[Rh+]": 292, |
|
"[CH3]": 293, |
|
"[N-3]": 294, |
|
"[Hf+2]": 295, |
|
"[Th]": 296, |
|
"[Sb+3]": 297, |
|
"%14": 298, |
|
"[Cr+2]": 299, |
|
"[Ru+2]": 300, |
|
"[Hf+4]": 301, |
|
"[14C]": 302, |
|
"[Ta]": 303, |
|
"[Tl+]": 304, |
|
"[B+]": 305, |
|
"[Os+4]": 306, |
|
"[PdH2]": 307, |
|
"[Pd-]": 308, |
|
"[Cd+2]": 309, |
|
"[Co+3]": 310, |
|
"[S+4]": 311, |
|
"[Nb+5]": 312, |
|
"[123I]": 313, |
|
"[c+]": 314, |
|
"[Rb+]": 315, |
|
"[V+2]": 316, |
|
"[CH3+]": 317, |
|
"[Ag+2]": 318, |
|
"[cH+]": 319, |
|
"[Mn+3]": 320, |
|
"[Se-]": 321, |
|
"[As-]": 322, |
|
"[Eu+3]": 323, |
|
"[SH2]": 324, |
|
"[Sm+3]": 325, |
|
"[IH+]": 326, |
|
"%15": 327, |
|
"[OH3+]": 328, |
|
"[PH3]": 329, |
|
"[IH2+]": 330, |
|
"[SH2+]": 331, |
|
"[Ir+3]": 332, |
|
"[AlH3]": 333, |
|
"[Sc]": 334, |
|
"[Yb]": 335, |
|
"[15NH2]": 336, |
|
"[Lu]": 337, |
|
"[sH+]": 338, |
|
"[Gd]": 339, |
|
"[18F-]": 340, |
|
"[SH3+]": 341, |
|
"[SnH4]": 342, |
|
"[TeH]": 343, |
|
"[Si@@H]": 344, |
|
"[Ga+3]": 345, |
|
"[CaH2]": 346, |
|
"[Tl]": 347, |
|
"[Ta+5]": 348, |
|
"[GeH]": 349, |
|
"[Br+]": 350, |
|
"[Sr]": 351, |
|
"[Tl+3]": 352, |
|
"[Sm+2]": 353, |
|
"[PH5]": 354, |
|
"%16": 355, |
|
"[N@@+]": 356, |
|
"[Au+3]": 357, |
|
"[C-4]": 358, |
|
"[Nd]": 359, |
|
"[Ti+]": 360, |
|
"[IH]": 361, |
|
"[N@+]": 362, |
|
"[125I]": 363, |
|
"[Eu]": 364, |
|
"[Sn+3]": 365, |
|
"[Nb]": 366, |
|
"[Er+3]": 367, |
|
"[123I-]": 368, |
|
"[14c]": 369, |
|
"%17": 370, |
|
"[SnH2]": 371, |
|
"[YH]": 372, |
|
"[Sb+5]": 373, |
|
"[Pr+3]": 374, |
|
"[Ir+]": 375, |
|
"[N+3]": 376, |
|
"[AlH2]": 377, |
|
"[19F]": 378, |
|
"%18": 379, |
|
"[Tb]": 380, |
|
"[14CH]": 381, |
|
"[Mo+4]": 382, |
|
"[Si+]": 383, |
|
"[BH]": 384, |
|
"[Be]": 385, |
|
"[Rb]": 386, |
|
"[pH]": 387, |
|
"%19": 388, |
|
"%20": 389, |
|
"[Xe]": 390, |
|
"[Ir-]": 391, |
|
"[Be+2]": 392, |
|
"[C+4]": 393, |
|
"[RuH2]": 394, |
|
"[15NH]": 395, |
|
"[U+2]": 396, |
|
"[Au-]": 397, |
|
"%21": 398, |
|
"%22": 399, |
|
"[Au+]": 400, |
|
"[15n]": 401, |
|
"[Al+2]": 402, |
|
"[Tb+3]": 403, |
|
"[15N]": 404, |
|
"[V+3]": 405, |
|
"[W+6]": 406, |
|
"[14CH3]": 407, |
|
"[Cr+4]": 408, |
|
"[ClH+]": 409, |
|
"b": 410, |
|
"[Ti+6]": 411, |
|
"[Nd+]": 412, |
|
"[Zr+]": 413, |
|
"[PH2+]": 414, |
|
"[Fm]": 415, |
|
"[N@H+]": 416, |
|
"[RuH]": 417, |
|
"[Dy+3]": 418, |
|
"%23": 419, |
|
"[Hf+3]": 420, |
|
"[W+4]": 421, |
|
"[11C]": 422, |
|
"[13CH]": 423, |
|
"[Er]": 424, |
|
"[124I]": 425, |
|
"[LaH]": 426, |
|
"[F]": 427, |
|
"[siH]": 428, |
|
"[Ga+]": 429, |
|
"[Cm]": 430, |
|
"[GeH3]": 431, |
|
"[IH-]": 432, |
|
"[U+6]": 433, |
|
"[SeH+]": 434, |
|
"[32P]": 435, |
|
"[SeH-]": 436, |
|
"[Pt-]": 437, |
|
"[Ir+2]": 438, |
|
"[se+]": 439, |
|
"[U]": 440, |
|
"[F+]": 441, |
|
"[BH2]": 442, |
|
"[As+]": 443, |
|
"[Cf]": 444, |
|
"[ClH2+]": 445, |
|
"[Ni+]": 446, |
|
"[TeH3]": 447, |
|
"[SbH2]": 448, |
|
"[Ag+3]": 449, |
|
"%24": 450, |
|
"[18O]": 451, |
|
"[PH4]": 452, |
|
"[Os+2]": 453, |
|
"[Na-]": 454, |
|
"[Sb+2]": 455, |
|
"[V+4]": 456, |
|
"[Ho+3]": 457, |
|
"[68Ga]": 458, |
|
"[PH-]": 459, |
|
"[Bi+2]": 460, |
|
"[Ce+2]": 461, |
|
"[Pd+3]": 462, |
|
"[99Tc]": 463, |
|
"[13C@@H]": 464, |
|
"[Fe+6]": 465, |
|
"[c]": 466, |
|
"[GeH2]": 467, |
|
"[10B]": 468, |
|
"[Cu+3]": 469, |
|
"[Mo+2]": 470, |
|
"[Cr+]": 471, |
|
"[Pd+4]": 472, |
|
"[Dy]": 473, |
|
"[AsH]": 474, |
|
"[Ba+]": 475, |
|
"[SeH2]": 476, |
|
"[In+]": 477, |
|
"[TeH2]": 478, |
|
"[BrH+]": 479, |
|
"[14cH]": 480, |
|
"[W+]": 481, |
|
"[13C@H]": 482, |
|
"[AsH2]": 483, |
|
"[In+2]": 484, |
|
"[N+2]": 485, |
|
"[N@@H+]": 486, |
|
"[SbH]": 487, |
|
"[60Co]": 488, |
|
"[AsH4+]": 489, |
|
"[AsH3]": 490, |
|
"[18OH]": 491, |
|
"[Ru-2]": 492, |
|
"[Na-2]": 493, |
|
"[CuH2]": 494, |
|
"[31P]": 495, |
|
"[Ti+5]": 496, |
|
"[35S]": 497, |
|
"[P@@H]": 498, |
|
"[ArH]": 499, |
|
"[Co+]": 500, |
|
"[Zr-2]": 501, |
|
"[BH2-]": 502, |
|
"[131I]": 503, |
|
"[SH5]": 504, |
|
"[VH]": 505, |
|
"[B+2]": 506, |
|
"[Yb+2]": 507, |
|
"[14C@H]": 508, |
|
"[211At]": 509, |
|
"[NH3+2]": 510, |
|
"[IrH]": 511, |
|
"[IrH2]": 512, |
|
"[Rh-]": 513, |
|
"[Cr-]": 514, |
|
"[Sb+]": 515, |
|
"[Ni+3]": 516, |
|
"[TaH3]": 517, |
|
"[Tl+2]": 518, |
|
"[64Cu]": 519, |
|
"[Tc]": 520, |
|
"[Cd+]": 521, |
|
"[1H]": 522, |
|
"[15nH]": 523, |
|
"[AlH2+]": 524, |
|
"[FH+2]": 525, |
|
"[BiH3]": 526, |
|
"[Ru-]": 527, |
|
"[Mo+6]": 528, |
|
"[AsH+]": 529, |
|
"[BaH2]": 530, |
|
"[BaH]": 531, |
|
"[Fe+4]": 532, |
|
"[229Th]": 533, |
|
"[Th+4]": 534, |
|
"[As+3]": 535, |
|
"[NH+3]": 536, |
|
"[P@H]": 537, |
|
"[Li-]": 538, |
|
"[7NaH]": 539, |
|
"[Bi+]": 540, |
|
"[PtH+2]": 541, |
|
"[p-]": 542, |
|
"[Re+5]": 543, |
|
"[NiH]": 544, |
|
"[Ni-]": 545, |
|
"[Xe+]": 546, |
|
"[Ca+]": 547, |
|
"[11c]": 548, |
|
"[Rh+4]": 549, |
|
"[AcH]": 550, |
|
"[HeH]": 551, |
|
"[Sc+2]": 552, |
|
"[Mn+]": 553, |
|
"[UH]": 554, |
|
"[14CH2]": 555, |
|
"[SiH4+]": 556, |
|
"[18OH2]": 557, |
|
"[Ac-]": 558, |
|
"[Re+4]": 559, |
|
"[118Sn]": 560, |
|
"[153Sm]": 561, |
|
"[P+2]": 562, |
|
"[9CH]": 563, |
|
"[9CH3]": 564, |
|
"[Y-]": 565, |
|
"[NiH2]": 566, |
|
"[Si+2]": 567, |
|
"[Mn+6]": 568, |
|
"[ZrH2]": 569, |
|
"[C-2]": 570, |
|
"[Bi+5]": 571, |
|
"[24NaH]": 572, |
|
"[Fr]": 573, |
|
"[15CH]": 574, |
|
"[Se+]": 575, |
|
"[At]": 576, |
|
"[P-3]": 577, |
|
"[124I-]": 578, |
|
"[CuH2-]": 579, |
|
"[Nb+4]": 580, |
|
"[Nb+3]": 581, |
|
"[MgH]": 582, |
|
"[Ir+4]": 583, |
|
"[67Ga+3]": 584, |
|
"[67Ga]": 585, |
|
"[13N]": 586, |
|
"[15OH2]": 587, |
|
"[2NH]": 588, |
|
"[Ho]": 589, |
|
"[Cn]": 590 |
|
}, |
|
"merges": [] |
|
} |
|
} |