{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "[MASK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "Whitespace" }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "[CLS]", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 0 } } ], "pair": [ { "SpecialToken": { "id": "[CLS]", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 0 } }, { "SpecialToken": { "id": "[CLS]", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 0 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 0 } } ], "special_tokens": { "[CLS]": { "id": "[CLS]", "ids": [ 2 ], "tokens": [ "[CLS]" ] }, "[SEP]": { "id": "[SEP]", "ids": [ 3 ], "tokens": [ "[SEP]" ] } } }, "decoder": { "type": "BPEDecoder", "suffix": "" }, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "[PAD]": 0, "[UNK]": 1, "[CLS]": 2, "[SEP]": 3, "[MASK]": 4, "!": 5, "\"": 6, "%": 7, "&": 8, "'": 9, "(": 10, ")": 11, "*": 12, "+": 13, ",": 14, "-": 15, ".": 16, "/": 17, "0": 18, "1": 19, "2": 20, "3": 21, "4": 22, "5": 23, "6": 24, "7": 25, "8": 26, "9": 27, ":": 28, ";": 29, "?": 30, "A": 31, "B": 32, "C": 33, "D": 34, "E": 35, "F": 36, "G": 37, "H": 38, "I": 39, "J": 40, "K": 41, "L": 42, "M": 43, "N": 44, "O": 45, "P": 46, "Q": 47, "R": 48, "S": 49, "T": 50, "U": 51, "V": 52, "W": 53, "X": 54, "Y": 55, "Z": 56, "[": 57, "]": 58, "_": 59, "a": 60, "b": 61, "c": 62, "d": 63, "e": 64, "f": 65, "g": 66, "h": 67, "i": 68, "j": 69, "k": 70, "l": 71, "m": 72, "n": 73, "o": 74, "p": 75, "q": 76, "r": 77, "s": 78, "t": 79, "u": 80, "v": 81, "w": 82, "x": 83, "y": 84, "z": 85, "|": 86, "§": 87, "Á": 88, "Æ": 89, "á": 90, "æ": 91, "ç": 92, "è": 93, "é": 94, "í": 95, "ð": 96, "ö": 97, "ú": 98, "ü": 99, "þ": 100, "ā": 101, "ē": 102, "ŋ": 103, "ƿ": 104, "ɑ": 105, "ɒ": 106, "ɔ": 107, "ɖ": 108, "ə": 109, "ɚ": 110, "ɛ": 111, "ɜ": 112, "ɡ": 113, "ɪ": 114, "ɫ": 115, "ɹ": 116, "ɾ": 117, "ʃ": 118, "ʈ": 119, "ʊ": 120, "ʌ": 121, "ʍ": 122, "ʒ": 123, "ʔ": 124, "ʰ": 125, "ʱ": 126, "ʲ": 127, "ʷ": 128, "ˈ": 129, "ː": 130, "ˑ": 131, "̚": 132, "̥": 133, "̩": 134, "̪": 135, "̯": 136, "͡": 137, "θ": 138, "‑": 139, "–": 140, "—": 141, "∅": 142, "⟨": 143, "⟩": 144, "an": 145, "th": 146, "in": 147, "on": 148, "er": 149, "is": 150, "es": 151, "or": 152, "the": 153, "ti": 154, "ar": 155, "al": 156, "en": 157, "ed": 158, "of": 159, "and": 160, "gl": 161, "ish": 162, "ngl": 163, "Engl": 164, "English": 165, "as": 166, "ic": 167, "ou": 168, "20": 169, "tion": 170, "ing": 171, "ec": 172, "om": 173, "at": 174, "st": 175, "it": 176, "le": 177, "ge": 178, "re": 179, "gu": 180, "angu": 181, "angua": 182, "ch": 183, "ent": 184, "ve": 185, "to": 186, ").": 187, "ation": 188, "ri": 189, "ly": 190, "am": 191, "oun": 192, "ers": 193, "anguage": 194, "for": 195, "fr": 196, "ll": 197, "us": 198, "200": 199, "he": 200, "tic": 201, "pr": 202, "di": 203, "ow": 204, "et": 205, "ig": 206, "19": 207, "pe": 208, "ac": 209, ".[": 210, "ur": 211, "wi": 212, "201": 213, "ect": 214, "iv": 215, "ess": 216, "The": 217, "ol": 218, "ter": 219, "de": 220, "language": 221, "wor": 222, "from": 223, "un": 224, "In": 225, "ver": 226, "ir": 227, "are": 228, "cl": 229, "ther": 230, "ad": 231, "man": 232, "con": 233, "ab": 234, "ex": 235, "with": 236, "pp": 237, "wh": 238, "el": 239, "97": 240, "ary": 241, "10": 242, "su": 243, "ph": 244, "ul": 245, "po": 246, "978": 247, "ld": 248, "ak": 249, "si": 250, "ru": 251, "tive": 252, "ds": 253, "oc": 254, "enc": 255 }, "merges": [ "a n", "t h", "i n", "o n", "e r", "i s", "e s", "o r", "th e", "t i", "a r", "a l", "e n", "e d", "o f", "an d", "g l", "is h", "n gl", "E ngl", "Engl ish", "a s", "i c", "o u", "2 0", "ti on", "in g", "e c", "o m", "a t", "s t", "i t", "l e", "g e", "r e", "g u", "an gu", "angu a", "c h", "en t", "v e", "t o", ") .", "a tion", "r i", "l y", "a m", "ou n", "er s", "angua ge", "f or", "f r", "l l", "u s", "20 0", "h e", "ti c", "p r", "d i", "o w", "e t", "i g", "1 9", "p e", "a c", ". [", "u r", "w i", "20 1", "ec t", "i v", "es s", "T he", "o l", "t er", "d e", "l anguage", "w or", "fr om", "u n", "I n", "v er", "i r", "ar e", "c l", "th er", "a d", "m an", "c on", "a b", "e x", "wi th", "p p", "w h", "e l", "9 7", "ar y", "1 0", "s u", "p h", "u l", "p o", "97 8", "l d", "a k", "s i", "r u", "ti ve", "d s", "o c", "en c" ] } }