{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "UNK", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "PAD", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 5, "content": "UTT_BOUNDARY", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Sequence", "normalizers": [ { "type": "Replace", "pattern": { "String": "\n" }, "content": " UTT_BOUNDARY" }, { "type": "Strip", "strip_left": true, "strip_right": true } ] }, "pre_tokenizer": { "type": "Whitespace" }, "post_processor": null, "decoder": null, "model": { "type": "WordLevel", "vocab": { "UNK": 0, "PAD": 1, "BOS": 2, "EOS": 3, "WORD_BOUNDARY": 4, "UTT_BOUNDARY": 5, "s": 6, "æ": 7, "n": 8, "o": 9, "j": 10, "ʊ": 11, "ɔ": 12, "a": 13, "r": 14, "m": 15, "ɯ": 16, "k": 17, "ɪ": 18, "l": 19, "i": 20, "ɛ": 21, "v": 22, "d": 23, "d̠ʒ": 24, "y": 25, "t": 26, "b": 27, "u": 28, "z": 29, "ʃ": 30, "ɟ": 31, "e": 32, "p": 33, "ɡ": 34, "ɫ": 35, "h": 36, "t̠ʃ": 37, "ɾ": 38, "f": 39, "ø": 40, "œ": 41, "aː": 42, "c": 43, "ʊː": 44, "tː": 45, "oː": 46, "œː": 47, "ʒ": 48 }, "unk_token": "UNK" } }