{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "UNK", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "PAD", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "WORD_BOUNDARY", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "UTT_BOUNDARY", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Sequence", "normalizers": [ { "type": "Strip", "strip_left": true, "strip_right": true } ] }, "pre_tokenizer": { "type": "WhitespaceSplit" }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "UTT_BOUNDARY", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } } ], "pair": [ { "SpecialToken": { "id": "UTT_BOUNDARY", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "UTT_BOUNDARY", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } } ], "special_tokens": { "UTT_BOUNDARY": { "id": "UTT_BOUNDARY", "ids": [ 3 ], "tokens": [ "UTT_BOUNDARY" ] } } }, "decoder": null, "model": { "type": "WordLevel", "vocab": { "UNK": 0, "PAD": 1, "WORD_BOUNDARY": 2, "UTT_BOUNDARY": 3, "d̠ʒ": 4, "ʌ": 5, "s": 6, "t": 7, "l": 8, "aɪ": 9, "k": 10, "j": 11, "ʊ": 12, "ɹ": 13, "b": 14, "æ": 15, "h": 16, "oʊ": 17, "m": 18, "iː": 19, "ð": 20, "ɛ": 21, "z": 22, "f": 23, "eɪ": 24, "w": 25, "ɪ": 26, "ɡ": 27, "ɑ": 28, "ə": 29, "p": 30, "uː": 31, "i": 32, "θ": 33, "ŋ": 34, "ɔ": 35, "ɔɪ": 36, "n": 37, "d": 38, "aʊ": 39, "v": 40, "ɜː": 41, "t̠ʃ": 42, "ʃ": 43, "iə": 44, "ʒ": 45, "x": 46, "tʰ": 47, "ɑː": 48, "ɒ": 49, "e": 50, "kʰ": 51, "ɔː": 52, "əʊ": 53, "ɪə": 54, "pʰ": 55, "ɐ": 56, "eə": 57, "ʊə": 58, "n̩": 59, "a": 60, "ɑ̃": 61, "ʁ": 62, "o": 63, "ɛ̃": 64, "y": 65, "ɔ̃": 66, "u": 67, "œ": 68, "ø": 69, "ɲ": 70, "aː": 71, "oː": 72, "øː": 73, "ɛː": 74, "yː": 75, "eː": 76, "d̺": 77, "t̺ʰ": 78, "ç": 79, "ʀ": 80, "ʏ": 81, "ts": 82, "pf": 83, "ɾ": 84, "e̞": 85, "o̞": 86, "β": 87, "ʝ": 88, "r": 89, "tl": 90, "ãː": 91, "ɦ": 92, "ɛi": 93, "ʋ": 94, "ɣ": 95, "ʌu": 96, "œy": 97, "tʲ": 98, "ã": 99, "au": 100, "ʃ̺": 101, "ɤ": 102, "t̠ʃ̺ʰ": 103, "ɕ": 104, "tɕ": 105, "t̠ʃ̺": 106, "ɹ̪̩": 107, "tɕʰ": 108, "ɻ": 109, "ɥ": 110, "tsʰ": 111, "ei": 112, "ou": 113, "ɻ̩": 114, "ai": 115, "kʲ": 116, "ɯ": 117, "ɯː": 118, "ɡʲ": 119, "ɸ": 120, "pʲ": 121, "ɾʲ": 122, "bʲ": 123, "mʲ": 124, "sʲ": 125, "æi": 126, "kː": 127, "tː": 128, "mː": 129, "sː": 130, "pː": 131, "æː": 132, "ɤː": 133, "lː": 134, "rː": 135, "nː": 136, "tʲː": 137, "øɪ̯": 138, "dʲ": 139, "sʲː": 140, "ʃː": 141, "fː": 142, "dː": 143, "yi": 144, "jː": 145, "t̪": 146, "d̪": 147, "t̪s": 148, "ʎ": 149, "q": 150, "oˤ": 151, "ɑˤː": 152, "eˤ": 153, "ɔˤ": 154, "uˤ": 155, "iˤ": 156, "ɒː": 157, "aˤ": 158, "ɜ": 159, "œː": 160, "ʔ": 161, "ai̯": 162, "s̪̻": 163, "ɟ": 164, "ei̯": 165, "t̺s̺": 166, "oi̯": 167, "s̺": 168, "t̪̻s̪̻": 169, "au̯": 170, "c": 171, "eu̯": 172, "l̪": 173, "s̻": 174, "z̻": 175, "t̪ː": 176, "n̪": 177, "t̻s̻": 178, "r̪": 179, "ɟʝ": 180, "s̻ː": 181, "z̻ː": 182, "l̪ː": 183, "ɟʝː": 184, "n̪ː": 185, "ɲː": 186, "r̪ː": 187, "t̠ʃː": 188, "bː": 189, "cç": 190, "t̻s̻ː": 191, "d̪ː": 192, "ɡː": 193, "d̻z̻": 194, "vː": 195, "cçː": 196, "hː": 197, "lʲ": 198, "l̪ˠ": 199, "z̪": 200, "s̪": 201, "a̟": 202, "t̪ʰ": 203, "ɢ": 204, "r̥": 205, "ä": 206, "θ̻": 207, "ɬ": 208, "ð̺̞": 209, "n̪̥": 210, "äu̯": 211, "ŋ̥": 212, "cʰ": 213, "ou̯": 214, "äi̯": 215, "ɰ": 216, "ʏː": 217, "ɪː": 218, "m̥": 219, "ɔi̯": 220, "ɲ̥": 221, "ɾ̪ʲ": 222, "d̪ˠ": 223, "n̪ˠ": 224, "ɛ̝": 225, "ɾ̪ˠ": 226, "mˠ": 227, "sˠ": 228, "bˠ": 229, "pˠʰ": 230, "t̪ʲʰ": 231, "ɔ̝": 232, "t̪ˠʰ": 233, "vˠ": 234, "fˠ": 235, "l̪ʲ": 236, "iːə": 237, "uːe": 238, "n̪ʲ": 239, "d̪ʲ": 240, "pʲʰ": 241, "ɐɪ": 242, "i̞": 243, "fʲ": 244, "χ": 245, "vʲ": 246, "ɔi": 247, "ʊi": 248, "əi": 249, "ɪu": 250, "ɛu": 251, "ɤ̞": 252, "dʑ": 253, "ɯi": 254, "t̠ʃʰ": 255, "ʉ̟": 256, "ʂ": 257, "ɵ": 258, "ɧ": 259, "o̞ː": 260, "ʉː": 261, "ʉ": 262, "ɒ̝": 263, "ø̞ː": 264, "øy": 265, "æʉ": 266, "ɔy": 267, "pʼ": 268, "tʼ": 269, "t̠ʃʼ": 270, "kʼ": 271, "qʼ": 272, "n̺": 273, "z̺": 274, "ɾ̺": 275, "r̺": 276, "u̯": 277, "ɫ̺": 278, "ɲ̟": 279, "ʎ̟": 280, "ts̺": 281, "ɐː": 282, "dz": 283, "d̠ʒː": 284, "tsː": 285, "dzː": 286, "ɐ̃": 287, "ɐ̃i": 288, "ɐ̃u̜": 289, "ũ": 290, "au̜": 291, "eu̜": 292, "ɐi": 293, "ɛu̜": 294, "ĩ": 295, "ũi": 296, "õ": 297, "õi": 298, "ẽ": 299, "oi": 300, "iu̜": 301, "ui": 302, "aʊ̯": 303, "oɪ̯": 304, "eʊ̯": 305, "ɐ̃ʊ̯̃": 306, "eɪ̯": 307, "ẽɪ̯̃": 308, "uɪ̯": 309, "iʊ̯": 310, "oʊ̯": 311, "aɪ̯": 312, "ɔɪ̯": 313, "ɛɪ̯": 314, "ɛʊ̯": 315, "ɪ̯": 316, "ɾ̪": 317, "t̠ʃʲ": 318, "e̯ä": 319, "ʃʲ": 320, "o̯ä": 321, "ɨ": 322, "uɪ": 323, "t̪s̪": 324, "əɪ": 325, "tsʲ": 326, "zʲ": 327, "iɪ": 328, "nʲ": 329, "eʊ": 330, "iʊ": 331, "eo": 332, "d̠ʒʲ": 333, "oɪ": 334, "t̪̻": 335, "ʒ̺": 336, "d̪̻": 337, "t̻ʃ̻": 338, "z̪̻": 339, "d̻ʒ̻": 340, "ʑ": 341 }, "unk_token": "UNK" } }