{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "UNK", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "PAD", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "BOS", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "EOS", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Sequence", "normalizers": [ { "type": "Replace", "pattern": { "String": "\n" }, "content": " UTT_BOUNDARY" }, { "type": "Strip", "strip_left": true, "strip_right": true } ] }, "pre_tokenizer": { "type": "Whitespace" }, "post_processor": null, "decoder": null, "model": { "type": "WordLevel", "vocab": { "UNK": 0, "PAD": 1, "BOS": 2, "EOS": 3, "WORD_BOUNDARY": 4, "UTT_BOUNDARY": 5, "s": 6, "iː": 7, "ð": 8, "ɛ": 9, "ɹ": 10, "z": 11, "ʌ": 12, "f": 13, "eɪ": 14, "w": 15, "ɪ": 16, "ɡ": 17, "l": 18, "æ": 19, "ɑ": 20, "h": 21, "ə": 22, "ʊ": 23, "k": 24, "p": 25, "uː": 26, "b": 27, "i": 28, "t": 29, "aɪ": 30, "θ": 31, "ŋ": 32, "j": 33, "ɔ": 34, "m": 35, "ɔɪ": 36, "n": 37, "d": 38, "oʊ": 39, "aʊ": 40, "v": 41, "ɜː": 42, "t̠ʃ": 43, "d̠ʒ": 44, "ʃ": 45, "iə": 46, "ʒ": 47, "ɑ̃": 48, "r": 49, "x": 50, "y": 51, "ɛ̃": 52, "a": 53, "ʁ": 54, "e": 55, "ɔ̃": 56, "u": 57, "o": 58, "œ̃": 59, "ø": 60, "œ": 61, "oː": 62, "yː": 63, "ɲ": 64, "aː": 65, "ts": 66, "eː": 67, "ʀ": 68, "ç": 69, "ɐ": 70, "ɛɪ": 71, "ʏ": 72, "ɛː": 73, "pf": 74, "øː": 75, "ã": 76, "ɔː": 77, "ɾ": 78, "β": 79, "ʎ": 80, "ɣ": 81, "ʝ": 82, "oɪ": 83, "eʊ": 84, "pː": 85, "ɟ": 86, "ʋ": 87, "ɪː": 88, "ɵ": 89, "œy": 90, "tʲ": 91, "au̯": 92, "˥˩": 93, "ʂ": 94, "ɻ̩": 95, "˧˥": 96, "ɤ": 97, "kʰ": 98, "˥": 99, "ʈʂʰ": 100, "ɕ": 101, "ei̯": 102, "pʰ": 103, "ai̯": 104, "ou̯": 105, "tɕ": 106, "ʈʂ": 107, "ɹ̩": 108, "tɕʰ": 109, "tʰ": 110, "ɻ": 111, "ɥ": 112, "tsʰ": 113, "ɚ": 114, "ɯː": 115, "ɯ": 116, "pʲ": 117, "ɸ": 118, "rʲ": 119, "kʲ": 120, "bʲ": 121, "mʲ": 122, "˧": 123, "˨˩": 124, "ei": 125, "˩˧": 126, "˨": 127, "ɐi": 128, "m̩": 129, "ou": 130, "aːĭ": 131, "ɵy": 132, "ɔːĭ": 133, "ɐu": 134, "iːŭ": 135, "aːŭ": 136, "œː": 137, "uːĭ": 138, "kː": 139, "æi": 140, "yi": 141, "ɵː": 142, "tː": 143, "æː": 144, "dʑ": 145, "l̩": 146, "ɒ": 147, "ʌː": 148, "ɜ": 149, "ʔ": 150, "s̺": 151, "ts̻": 152, "s̻": 153, "c": 154, "ts̺": 155, "tsː": 156, "ɟː": 157, "t̠ʃː": 158, "ɡː": 159, "ɑː": 160, "dː": 161, "cː": 162, "bː": 163, "ɫ": 164, "ʊː": 165, "q": 166, "øy": 167, "χ": 168 }, "unk_token": "UNK" } }