codebyzeb commited on
Commit
c172f9e
1 Parent(s): 871e003

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +42 -1
  2. vocab.json +1 -1
tokenizer.json CHANGED
@@ -105,7 +105,48 @@
105
  "UNK": 0,
106
  "PAD": 1,
107
  "WORD_BOUNDARY": 2,
108
- "UTT_BOUNDARY": 3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  },
110
  "unk_token": "UNK"
111
  }
 
105
  "UNK": 0,
106
  "PAD": 1,
107
  "WORD_BOUNDARY": 2,
108
+ "UTT_BOUNDARY": 3,
109
+ "d": 4,
110
+ "e": 5,
111
+ "ɛ": 6,
112
+ "n": 7,
113
+ "m": 8,
114
+ "s": 9,
115
+ "t": 10,
116
+ "k": 11,
117
+ "j": 12,
118
+ "f": 13,
119
+ "ɑ": 14,
120
+ "ɒ": 15,
121
+ "ə": 16,
122
+ "ʋ": 17,
123
+ "a": 18,
124
+ "l": 19,
125
+ "h": 20,
126
+ "b": 21,
127
+ "ʁ": 22,
128
+ "p": 23,
129
+ "œ": 24,
130
+ "i": 25,
131
+ "ɡ": 26,
132
+ "ʌ": 27,
133
+ "u": 28,
134
+ "ʃ": 29,
135
+ "ɔ": 30,
136
+ "w": 31,
137
+ "ð": 32,
138
+ "o": 33,
139
+ "y": 34,
140
+ "ŋ": 35,
141
+ "aɪ": 36,
142
+ "œː": 37,
143
+ "aː": 38,
144
+ "d̠ʒ": 39,
145
+ "uː": 40,
146
+ "ʌː": 41,
147
+ "ɜ": 42,
148
+ "oː": 43,
149
+ "yː": 44
150
  },
151
  "unk_token": "UNK"
152
  }
vocab.json CHANGED
@@ -1 +1 @@
1
- {"UNK":0,"PAD":1,"WORD_BOUNDARY":2,"UTT_BOUNDARY":3}
 
1
+ {"UNK":0,"PAD":1,"WORD_BOUNDARY":2,"UTT_BOUNDARY":3,"d":4,"e":5,"ɛ":6,"n":7,"m":8,"s":9,"t":10,"k":11,"j":12,"f":13,"ɑ":14,"ɒ":15,"ə":16,"ʋ":17,"a":18,"l":19,"h":20,"b":21,"ʁ":22,"p":23,"œ":24,"i":25,"ɡ":26,"ʌ":27,"u":28,"ʃ":29,"ɔ":30,"w":31,"ð":32,"o":33,"y":34,"ŋ":35,"aɪ":36,"œː":37,"aː":38,"d̠ʒ":39,"uː":40,"ʌː":41,"ɜ":42,"oː":43,"yː":44}