{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "UNK", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "PAD", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "BOS", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "EOS", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 196, "content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Sequence", "normalizers": [ { "type": "Replace", "pattern": { "String": "\n" }, "content": " UTT_BOUNDARY" }, { "type": "Strip", "strip_left": true, "strip_right": true } ] }, "pre_tokenizer": { "type": "Whitespace" }, "post_processor": null, "decoder": null, "model": { "type": "WordLevel", "vocab": { "UNK": 0, "PAD": 1, "BOS": 2, "EOS": 3, "WORD_BOUNDARY": 4, "UTT_BOUNDARY": 5, "s": 6, "iː": 7, "ð": 8, "ɛ": 9, "ɹ": 10, "z": 11, "ʌ": 12, "f": 13, "eɪ": 14, "w": 15, "ɪ": 16, "ɡ": 17, "l": 18, "æ": 19, "ɑ": 20, "h": 21, "ə": 22, "ʊ": 23, "k": 24, "p": 25, "uː": 26, "b": 27, "i": 28, "t": 29, "aɪ": 30, "θ": 31, "ŋ": 32, "j": 33, "ɔ": 34, "m": 35, "ɔɪ": 36, "n": 37, "d": 38, "oʊ": 39, "aʊ": 40, "v": 41, "ɜː": 42, "t̠ʃ": 43, "d̠ʒ": 44, "ʃ": 45, "iə": 46, "ʒ": 47, "ɑ̃": 48, "r": 49, "x": 50, "nʲ": 51, "ɒ": 52, "a": 53, "ɑː": 54, "ɔː": 55, "əʊ": 56, "ɐ": 57, "eə": 58, "ʊə": 59, "n̩": 60, "aː": 61, "y": 62, "ɛ̃": 63, "ʁ": 64, "e": 65, "ɔ̃": 66, "u": 67, "o": 68, "œ̃": 69, "ø": 70, "œ": 71, "oː": 72, "yː": 73, "ɲ": 74, "ts": 75, "eː": 76, "ʀ": 77, "ç": 78, "ɛɪ": 79, "ʏ": 80, "ɛː": 81, "pf": 82, "øː": 83, "ã": 84, "ɾ": 85, "β": 86, "ʎ": 87, "ɣ": 88, "ʝ": 89, "oɪ": 90, "eʊ": 91, "pː": 92, "ɟ": 93, "ʋ": 94, "ɪː": 95, "ɵ": 96, "œy": 97, "tʲ": 98, "au̯": 99, "ʂ": 100, "ɤ": 101, "kʰ": 102, "ʈʂʰ": 103, "ɕ": 104, "pʰ": 105, "tɕ": 106, "ʈʂ": 107, "ɹ̩": 108, "tɕʰ": 109, "tʰ": 110, "ɻ": 111, "ɥ": 112, "tsʰ": 113, "ei̯": 114, "ou̯": 115, "ɻ̩": 116, "ai̯": 117, "ɯː": 118, "ɯ": 119, "pʲ": 120, "ɸ": 121, "rʲ": 122, "kʲ": 123, "bʲ": 124, "mʲ": 125, "kː": 126, "æi": 127, "yi": 128, "ɵː": 129, "tː": 130, "æː": 131, "dʑ": 132, "l̩": 133, "œː": 134, "ʌː": 135, "ɜ": 136, "ʔ": 137, "s̺": 138, "ts̻": 139, "s̻": 140, "c": 141, "ts̺": 142, "tsː": 143, "ɟː": 144, "t̠ʃː": 145, "ɡː": 146, "dː": 147, "cː": 148, "bː": 149, "dzː": 150, "ɫ": 151, "ʊː": 152, "q": 153, "øy": 154, "χ": 155, "i̯": 156, "t̪": 157, "d̪": 158, "lʲ": 159, "ɡʲ": 160, "hʲ": 161, "dʲ": 162, "çʲ": 163, "uə": 164, "ŭ": 165, "fʲ": 166, "aɨ": 167, "ɨ": 168, "uɨ": 169, "əɪ": 170, "ɨː": 171, "ɬ": 172, "əɨ": 173, "ɪu": 174, "ʉ": 175, "ʉː": 176, "ɑɪ": 177, "ʑ": 178, "dz": 179, "d̠ʒː": 180, "ɐ̃": 181, "ɛʊ": 182, "ũ": 183, "iʊ": 184, "õ": 185, "uɪ": 186, "sʲ": 187, "t̠ʃʲ": 188, "ɔa": 189, "ea": 190, "iɪ": 191, "tsʲ": 192, "eo": 193, "d̠ʒʲ": 194, "ɾʲ": 195 }, "unk_token": "UNK" } }