AKulk commited on
Commit
5185e15
1 Parent(s): 1c4f545

add tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +1 -0
  2. tokenizer_config.json +1 -0
  3. vocab.json +1 -0
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"zh": 0, "dcl": 1, "em": 2, "t": 3, "ih": 4, "g": 5, "bcl": 6, "ix": 7, "ao": 8, "eh": 9, "iy": 10, "k": 11, "aa": 12, "pau": 13, "uw": 14, "er": 15, "l": 16, "eng": 17, "ey": 18, "ux": 19, "hv": 20, "gcl": 21, "m": 22, "oy": 23, "epi": 24, "h#": 25, "q": 26, "b": 27, "s": 28, "ae": 29, "nx": 30, "jh": 31, "w": 32, "uh": 33, "v": 34, "axr": 35, "r": 36, "aw": 37, "el": 38, "dx": 39, "d": 40, "ax-h": 41, "ow": 42, "p": 43, "ng": 44, "kcl": 45, "ax": 46, "pcl": 47, "y": 48, "th": 49, "ah": 50, "ch": 51, "sh": 52, "f": 53, "z": 54, "dh": 55, "ay": 56, "n": 57, "hh": 58, "en": 59, "tcl": 60, "[UNK]": 61, "[PAD]": 62}