{
  "version": "1.0",
  "truncation": null,
  "padding": null,
  "added_tokens": [
    {
      "id": 0,
      "content": "[PAD]",
      "single_word": false,
      "lstrip": false,
      "rstrip": false,
      "normalized": false,
      "special": true
    },
    {
      "id": 1,
      "content": "[UNK]",
      "single_word": false,
      "lstrip": false,
      "rstrip": false,
      "normalized": false,
      "special": true
    },
    {
      "id": 2,
      "content": "[CLS]",
      "single_word": false,
      "lstrip": false,
      "rstrip": false,
      "normalized": false,
      "special": true
    },
    {
      "id": 3,
      "content": "[SEP]",
      "single_word": false,
      "lstrip": false,
      "rstrip": false,
      "normalized": false,
      "special": true
    },
    {
      "id": 4,
      "content": "[MASK]",
      "single_word": false,
      "lstrip": false,
      "rstrip": false,
      "normalized": false,
      "special": true
    }
  ],
  "normalizer": null,
  "pre_tokenizer": {
    "type": "Whitespace"
  },
  "post_processor": {
    "type": "TemplateProcessing",
    "single": [
      {
        "SpecialToken": {
          "id": "[CLS]",
          "type_id": 0
        }
      },
      {
        "Sequence": {
          "id": "A",
          "type_id": 0
        }
      },
      {
        "SpecialToken": {
          "id": "[SEP]",
          "type_id": 0
        }
      }
    ],
    "pair": [
      {
        "SpecialToken": {
          "id": "[CLS]",
          "type_id": 0
        }
      },
      {
        "Sequence": {
          "id": "A",
          "type_id": 0
        }
      },
      {
        "SpecialToken": {
          "id": "[SEP]",
          "type_id": 0
        }
      },
      {
        "SpecialToken": {
          "id": "[CLS]",
          "type_id": 0
        }
      },
      {
        "Sequence": {
          "id": "B",
          "type_id": 0
        }
      },
      {
        "SpecialToken": {
          "id": "[SEP]",
          "type_id": 0
        }
      }
    ],
    "special_tokens": {
      "[CLS]": {
        "id": "[CLS]",
        "ids": [
          2
        ],
        "tokens": [
          "[CLS]"
        ]
      },
      "[SEP]": {
        "id": "[SEP]",
        "ids": [
          3
        ],
        "tokens": [
          "[SEP]"
        ]
      }
    }
  },
  "decoder": {
    "type": "BPEDecoder",
    "suffix": "</w>"
  },
  "model": {
    "type": "BPE",
    "dropout": null,
    "unk_token": null,
    "continuing_subword_prefix": null,
    "end_of_word_suffix": null,
    "fuse_unk": false,
    "byte_fallback": false,
    "ignore_merges": false,
    "vocab": {
      "[PAD]": 0,
      "[UNK]": 1,
      "[CLS]": 2,
      "[SEP]": 3,
      "[MASK]": 4,
      "!": 5,
      "\"": 6,
      "%": 7,
      "&": 8,
      "'": 9,
      "(": 10,
      ")": 11,
      "*": 12,
      "+": 13,
      ",": 14,
      "-": 15,
      ".": 16,
      "/": 17,
      "0": 18,
      "1": 19,
      "2": 20,
      "3": 21,
      "4": 22,
      "5": 23,
      "6": 24,
      "7": 25,
      "8": 26,
      "9": 27,
      ":": 28,
      ";": 29,
      "?": 30,
      "A": 31,
      "B": 32,
      "C": 33,
      "D": 34,
      "E": 35,
      "F": 36,
      "G": 37,
      "H": 38,
      "I": 39,
      "J": 40,
      "K": 41,
      "L": 42,
      "M": 43,
      "N": 44,
      "O": 45,
      "P": 46,
      "Q": 47,
      "R": 48,
      "S": 49,
      "T": 50,
      "U": 51,
      "V": 52,
      "W": 53,
      "X": 54,
      "Y": 55,
      "Z": 56,
      "[": 57,
      "]": 58,
      "_": 59,
      "a": 60,
      "b": 61,
      "c": 62,
      "d": 63,
      "e": 64,
      "f": 65,
      "g": 66,
      "h": 67,
      "i": 68,
      "j": 69,
      "k": 70,
      "l": 71,
      "m": 72,
      "n": 73,
      "o": 74,
      "p": 75,
      "q": 76,
      "r": 77,
      "s": 78,
      "t": 79,
      "u": 80,
      "v": 81,
      "w": 82,
      "x": 83,
      "y": 84,
      "z": 85,
      "|": 86,
      "§": 87,
      "Á": 88,
      "Æ": 89,
      "á": 90,
      "æ": 91,
      "ç": 92,
      "è": 93,
      "é": 94,
      "í": 95,
      "ð": 96,
      "ö": 97,
      "ú": 98,
      "ü": 99,
      "þ": 100,
      "ā": 101,
      "ē": 102,
      "ŋ": 103,
      "ƿ": 104,
      "ɑ": 105,
      "ɒ": 106,
      "ɔ": 107,
      "ɖ": 108,
      "ə": 109,
      "ɚ": 110,
      "ɛ": 111,
      "ɜ": 112,
      "ɡ": 113,
      "ɪ": 114,
      "ɫ": 115,
      "ɹ": 116,
      "ɾ": 117,
      "ʃ": 118,
      "ʈ": 119,
      "ʊ": 120,
      "ʌ": 121,
      "ʍ": 122,
      "ʒ": 123,
      "ʔ": 124,
      "ʰ": 125,
      "ʱ": 126,
      "ʲ": 127,
      "ʷ": 128,
      "ˈ": 129,
      "ː": 130,
      "ˑ": 131,
      "̚": 132,
      "̥": 133,
      "̩": 134,
      "̪": 135,
      "̯": 136,
      "͡": 137,
      "θ": 138,
      "‑": 139,
      "–": 140,
      "—": 141,
      "∅": 142,
      "⟨": 143,
      "⟩": 144,
      "an": 145,
      "th": 146,
      "in": 147,
      "on": 148,
      "er": 149,
      "is": 150,
      "es": 151,
      "or": 152,
      "the": 153,
      "ti": 154,
      "ar": 155,
      "al": 156,
      "en": 157,
      "ed": 158,
      "of": 159,
      "and": 160,
      "gl": 161,
      "ish": 162,
      "ngl": 163,
      "Engl": 164,
      "English": 165,
      "as": 166,
      "ic": 167,
      "ou": 168,
      "20": 169,
      "tion": 170,
      "ing": 171,
      "ec": 172,
      "om": 173,
      "at": 174,
      "st": 175,
      "it": 176,
      "le": 177,
      "ge": 178,
      "re": 179,
      "gu": 180,
      "angu": 181,
      "angua": 182,
      "ch": 183,
      "ent": 184,
      "ve": 185,
      "to": 186,
      ").": 187,
      "ation": 188,
      "ri": 189,
      "ly": 190,
      "am": 191,
      "oun": 192,
      "ers": 193,
      "anguage": 194,
      "for": 195,
      "fr": 196,
      "ll": 197,
      "us": 198,
      "200": 199,
      "he": 200,
      "tic": 201,
      "pr": 202,
      "di": 203,
      "ow": 204,
      "et": 205,
      "ig": 206,
      "19": 207,
      "pe": 208,
      "ac": 209,
      ".[": 210,
      "ur": 211,
      "wi": 212,
      "201": 213,
      "ect": 214,
      "iv": 215,
      "ess": 216,
      "The": 217,
      "ol": 218,
      "ter": 219,
      "de": 220,
      "language": 221,
      "wor": 222,
      "from": 223,
      "un": 224,
      "In": 225,
      "ver": 226,
      "ir": 227,
      "are": 228,
      "cl": 229,
      "ther": 230,
      "ad": 231,
      "man": 232,
      "con": 233,
      "ab": 234,
      "ex": 235,
      "with": 236,
      "pp": 237,
      "wh": 238,
      "el": 239,
      "97": 240,
      "ary": 241,
      "10": 242,
      "su": 243,
      "ph": 244,
      "ul": 245,
      "po": 246,
      "978": 247,
      "ld": 248,
      "ak": 249,
      "si": 250,
      "ru": 251,
      "tive": 252,
      "ds": 253,
      "oc": 254,
      "enc": 255
    },
    "merges": [
      "a n",
      "t h",
      "i n",
      "o n",
      "e r",
      "i s",
      "e s",
      "o r",
      "th e",
      "t i",
      "a r",
      "a l",
      "e n",
      "e d",
      "o f",
      "an d",
      "g l",
      "is h",
      "n gl",
      "E ngl",
      "Engl ish",
      "a s",
      "i c",
      "o u",
      "2 0",
      "ti on",
      "in g",
      "e c",
      "o m",
      "a t",
      "s t",
      "i t",
      "l e",
      "g e",
      "r e",
      "g u",
      "an gu",
      "angu a",
      "c h",
      "en t",
      "v e",
      "t o",
      ") .",
      "a tion",
      "r i",
      "l y",
      "a m",
      "ou n",
      "er s",
      "angua ge",
      "f or",
      "f r",
      "l l",
      "u s",
      "20 0",
      "h e",
      "ti c",
      "p r",
      "d i",
      "o w",
      "e t",
      "i g",
      "1 9",
      "p e",
      "a c",
      ". [",
      "u r",
      "w i",
      "20 1",
      "ec t",
      "i v",
      "es s",
      "T he",
      "o l",
      "t er",
      "d e",
      "l anguage",
      "w or",
      "fr om",
      "u n",
      "I n",
      "v er",
      "i r",
      "ar e",
      "c l",
      "th er",
      "a d",
      "m an",
      "c on",
      "a b",
      "e x",
      "wi th",
      "p p",
      "w h",
      "e l",
      "9 7",
      "ar y",
      "1 0",
      "s u",
      "p h",
      "u l",
      "p o",
      "97 8",
      "l d",
      "a k",
      "s i",
      "r u",
      "ti ve",
      "d s",
      "o c",
      "en c"
    ]
  }
}