{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "[BOS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[EOS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "[MASK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 5, "content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 6, "content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "Split", "pattern": { "Regex": "(\\[[^\\]]+]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|\\(|\\)|\\.|=|-|\\+|\\\\|\\/|:|~|@|\\?|>>?|\\*|\\$|\\%[0-9]{2}|[0-9])" }, "behavior": "Isolated", "invert": false }, "post_processor": null, "decoder": null, "model": { "type": "WordLevel", "vocab": { "[BOS]": 0, "[EOS]": 1, "[PAD]": 2, "[UNK]": 3, "[MASK]": 4, "[CLS]": 5, "[SEP]": 6, "C": 7, "c": 8, "(": 9, ")": 10, "O": 11, "1": 12, "2": 13, "=": 14, "N": 15, "3": 16, "n": 17, "[C@H]": 18, "[C@@H]": 19, "4": 20, "F": 21, "[NH+]": 22, "S": 23, "o": 24, "Cl": 25, "s": 26, "[nH]": 27, "5": 28, "[NH2+]": 29, "#": 30, "/": 31, "Br": 32, "[C@@]": 33, "[C@]": 34, "[O-]": 35, "\\": 36, "[nH+]": 37, "[NH3+]": 38, "[n-]": 39, "6": 40, "I": 41, "[N-]": 42, "-": 43, "7": 44, "[H]": 45, "[Si]": 46, "P": 47, "[n+]": 48, "[S-]": 49, "[S@]": 50, "[S@@]": 51, "[N+]": 52, "8": 53, "B": 54, "[CH]": 55, "[C]": 56, "9": 57, "[P@@]": 58, "[P@]": 59, "[S+]": 60, "[N@@+]": 61, "[N@+]": 62, "[CH2]": 63, "[O]": 64, "[s+]": 65, "[Sn]": 66, "[P+]": 67, "[B-]": 68, "[S@@+]": 69, "[S@+]": 70, "p": 71, "[N]": 72, "%10": 73, "[C+]": 74, "[o+]": 75, "%11": 76, "[N@]": 77, "[P@@H]": 78, "[n@]": 79, "[C-]": 80, "[c+]": 81, "[IH2]": 82, "%13": 83, "[Si@@]": 84, "%12": 85, "[Si@]": 86, "[N@@]": 87, "[BH3-]": 88, "[P@H]": 89, "[CH-]": 90, "[Sn@]": 91, "[s@@]": 92, "[s@]": 93, "[P@+]": 94, "[P@@+]": 95, "[Sn@@]": 96, "[c-]": 97, "[17O]": 98, "[BH-]": 99, "[SnH4+2]": 100, "[B@-]": 101, "[B@@-]": 102, "[cH-]": 103, "[O+]": 104, "[SnH2+]": 105, "[SnH]": 106, "%14": 107, "[Sn+2]": 108, "[I+]": 109, "[P@@H+]": 110, "%15": 111, "%16": 112, "%18": 113, "[Br+]": 114, "[NH]": 115, "[Sn+]": 116, "[n@@]": 117, "%17": 118, "%19": 119, "%20": 120, "%21": 121, "%22": 122, "[18OH]": 123, "[BH2-]": 124, "[S@@-]": 125, "[S@@H]": 126, "[Sn+3]": 127, "[SnH2]": 128, "[SnH6+3]": 129, "[pH]": 130, "[S@H]": 131, "[SH3]": 132, "[SiH2]": 133, "[SiH3]": 134, "[Sn-]": 135, "[p+]": 136 }, "unk_token": "[UNK]" } }