codebyzeb commited on
Commit
2a1c1fa
1 Parent(s): 05288fc

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +46 -61
  2. vocab.json +1 -1
tokenizer.json CHANGED
@@ -115,68 +115,53 @@
115
  "PAD": 1,
116
  "WORD_BOUNDARY": 2,
117
  "UTT_BOUNDARY": 3,
118
- "d": 4,
119
- "": 5,
120
- "j": 6,
121
- "w": 7,
122
- "ɔ": 8,
123
- "n": 9,
124
- "t": 10,
125
- "ə": 11,
126
- "l": 12,
127
- "ʊ": 13,
128
- "k": 14,
129
- "æ": 15,
130
- "ð": 16,
131
- "ʌ": 17,
132
- "ɪ": 18,
133
- "s": 19,
134
- "ɛ": 20,
135
- "z": 21,
136
- "": 22,
137
- "ɹ": 23,
138
- "f": 24,
139
- "": 25,
140
- "ɡ": 26,
141
- "ɑ": 27,
142
- "h": 28,
143
- "p": 29,
144
- "b": 30,
145
- "i": 31,
146
- "t̠ʃ": 32,
147
- "": 33,
148
- "θ": 34,
149
- "ŋ": 35,
150
- "m": 36,
151
- "ɔɪ": 37,
152
- "": 38,
153
- "": 39,
154
- "v": 40,
155
- "ɜː": 41,
156
- "d̠ʒ": 42,
157
- "ʃ": 43,
158
- "": 44,
159
  "ʒ": 45,
160
- "ɑ̃": 46,
161
- "r": 47,
162
- "": 48,
163
- "x": 49,
164
- "ɬ": 50,
165
- "ç": 51,
166
- "e": 52,
167
- "o": 53,
168
- "ɛː": 54,
169
- "ɪː": 55,
170
- "u": 56,
171
- "q": 57,
172
- "tɕ": 58,
173
- "tʰ": 59,
174
- "ɯ": 60,
175
- "r̩": 61,
176
- "əʊ": 62,
177
- "a": 63,
178
- "ɒ": 64,
179
- "eə": 65
180
  },
181
  "unk_token": "UNK"
182
  }
 
115
  "PAD": 1,
116
  "WORD_BOUNDARY": 2,
117
  "UTT_BOUNDARY": 3,
118
+ "j": 4,
119
+ "ɛ": 5,
120
+ "h": 6,
121
+ "k": 7,
122
+ "ɑ": 8,
123
+ "m": 9,
124
+ "p": 10,
125
+ "": 11,
126
+ "n": 12,
127
+ "d": 13,
128
+ "z": 14,
129
+ "θ": 15,
130
+ "ɪ": 16,
131
+ "ŋ": 17,
132
+ "l": 18,
133
+ "": 19,
134
+ "s": 20,
135
+ "ɜː": 21,
136
+ "t": 22,
137
+ "w": 23,
138
+ "v": 24,
139
+ "ð": 25,
140
+ "æ": 26,
141
+ "ɔ": 27,
142
+ "ɹ": 28,
143
+ "ʌ": 29,
144
+ "f": 30,
145
+ "ə": 31,
146
+ "b": 32,
147
+ "": 33,
148
+ "": 34,
149
+ "": 35,
150
+ "d̠ʒ": 36,
151
+ "i": 37,
152
+ "": 38,
153
+ "": 39,
154
+ "ʊ": 40,
155
+ "ɡ": 41,
156
+ "t̠ʃ": 42,
157
+ "ɔɪ": 43,
158
+ "ʃ": 44,
159
  "ʒ": 45,
160
+ "r": 46,
161
+ "x": 47,
162
+ "ɬ": 48,
163
+ "ɑ̃": 49,
164
+ "": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  },
166
  "unk_token": "UNK"
167
  }
vocab.json CHANGED
@@ -1 +1 @@
1
- {"UNK":0,"PAD":1,"WORD_BOUNDARY":2,"UTT_BOUNDARY":3,"d":4,"":5,"j":6,"w":7,"ɔ":8,"n":9,"t":10,"ə":11,"l":12,"ʊ":13,"k":14,"æ":15,"ð":16,"ʌ":17,"ɪ":18,"s":19,"ɛ":20,"z":21,"":22,"ɹ":23,"f":24,"":25,"ɡ":26,"ɑ":27,"h":28,"p":29,"b":30,"i":31,"t̠ʃ":32,"":33,"θ":34,"ŋ":35,"m":36,"ɔɪ":37,"":38,"":39,"v":40,"ɜː":41,"d̠ʒ":42,"ʃ":43,"":44,"ʒ":45,"ɑ̃":46,"r":47,"":48,"x":49,"ɬ":50,"ç":51,"e":52,"o":53,"ɛː":54,"ɪː":55,"u":56,"q":57,"tɕ":58,"tʰ":59,"ɯ":60,"r̩":61,"əʊ":62,"a":63,"ɒ":64,"eə":65}
 
1
+ {"UNK":0,"PAD":1,"WORD_BOUNDARY":2,"UTT_BOUNDARY":3,"j":4,"ɛ":5,"h":6,"k":7,"ɑ":8,"m":9,"p":10,"":11,"n":12,"d":13,"z":14,"θ":15,"ɪ":16,"ŋ":17,"l":18,"":19,"s":20,"ɜː":21,"t":22,"w":23,"v":24,"ð":25,"æ":26,"ɔ":27,"ɹ":28,"ʌ":29,"f":30,"ə":31,"b":32,"":33,"":34,"":35,"d̠ʒ":36,"i":37,"":38,"":39,"ʊ":40,"ɡ":41,"t̠ʃ":42,"ɔɪ":43,"ʃ":44,"ʒ":45,"r":46,"x":47,"ɬ":48,"ɑ̃":49,"":50}