Saitomar commited on
Commit
668499f
1 Parent(s): 6c6ac01

add tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<s>": 123, "</s>": 124}
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "./", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"/": 1, "0": 2, "1": 3, "2": 4, "3": 5, "4": 6, "5": 7, "8": 8, "9": 9, "_": 10, "a": 11, "b": 12, "c": 13, "d": 14, "e": 15, "f": 16, "g": 17, "h": 18, "i": 19, "j": 20, "k": 21, "l": 22, "m": 23, "n": 24, "o": 25, "p": 26, "r": 27, "s": 28, "t": 29, "u": 30, "v": 31, "w": 32, "x": 33, "y": 34, "z": 35, "“": 36, "”": 37, "œ": 38, "।": 39, "ঁ": 40, "ং": 41, "ঃ": 42, "অ": 43, "আ": 44, "ই": 45, "ঈ": 46, "উ": 47, "ঊ": 48, "ঋ": 49, "এ": 50, "ঐ": 51, "ও": 52, "ঔ": 53, "ক": 54, "খ": 55, "গ": 56, "ঘ": 57, "ঙ": 58, "চ": 59, "ছ": 60, "জ": 61, "ঝ": 62, "ঞ": 63, "ট": 64, "ঠ": 65, "ড": 66, "ঢ": 67, "ণ": 68, "ত": 69, "থ": 70, "দ": 71, "ধ": 72, "ন": 73, "প": 74, "ফ": 75, "ব": 76, "ভ": 77, "ম": 78, "য": 79, "র": 80, "ল": 81, "শ": 82, "ষ": 83, "স": 84, "হ": 85, "়": 86, "া": 87, "ি": 88, "ী": 89, "ু": 90, "ূ": 91, "ৃ": 92, "ে": 93, "ৈ": 94, "ো": 95, "ৌ": 96, "্": 97, "ৎ": 98, "ৗ": 99, "ড়": 100, "ঢ়": 101, "য়": 102, "০": 103, "১": 104, "২": 105, "৩": 106, "৪": 107, "৫": 108, "৬": 109, "৭": 110, "৮": 111, "৯": 112, "ৰ": 113, "‌": 114, "‍": 115, "‎": 116, "–": 117, "—": 118, "’": 119, "…": 120, "|": 0, "[UNK]": 121, "[PAD]": 122}