codebyzeb commited on
Commit
a34d0ba
1 Parent(s): f7adba0

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +49 -1
  2. vocab.json +1 -1
tokenizer.json CHANGED
@@ -105,7 +105,55 @@
105
  "UNK": 0,
106
  "PAD": 1,
107
  "WORD_BOUNDARY": 2,
108
- "UTT_BOUNDARY": 3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  },
110
  "unk_token": "UNK"
111
  }
 
105
  "UNK": 0,
106
  "PAD": 1,
107
  "WORD_BOUNDARY": 2,
108
+ "UTT_BOUNDARY": 3,
109
+ "aː": 4,
110
+ "ɾ": 5,
111
+ "r": 6,
112
+ "ɪ": 7,
113
+ "ɛ": 8,
114
+ "d": 9,
115
+ "s": 10,
116
+ "j": 11,
117
+ "a": 12,
118
+ "b": 13,
119
+ "iː": 14,
120
+ "k": 15,
121
+ "ʋ": 16,
122
+ "ɛː": 17,
123
+ "θ": 18,
124
+ "i": 19,
125
+ "l": 20,
126
+ "n": 21,
127
+ "uː": 22,
128
+ "ð": 23,
129
+ "ɡ": 24,
130
+ "ɔ": 25,
131
+ "h": 26,
132
+ "aʊ": 27,
133
+ "y": 28,
134
+ "m": 29,
135
+ "f": 30,
136
+ "ɔː": 31,
137
+ "x": 32,
138
+ "ɟ": 33,
139
+ "t": 34,
140
+ "eɪ": 35,
141
+ "oʊ": 36,
142
+ "p": 37,
143
+ "ŋ": 38,
144
+ "ɣ": 39,
145
+ "yː": 40,
146
+ "u": 41,
147
+ "ɪː": 42,
148
+ "œ": 43,
149
+ "aɪ": 44,
150
+ "ç": 45,
151
+ "ə": 46,
152
+ "øy": 47,
153
+ "c": 48,
154
+ "ɲ": 49,
155
+ "œː": 50,
156
+ "ɔɪ": 51
157
  },
158
  "unk_token": "UNK"
159
  }
vocab.json CHANGED
@@ -1 +1 @@
1
- {"UNK":0,"PAD":1,"WORD_BOUNDARY":2,"UTT_BOUNDARY":3}
 
1
+ {"UNK":0,"PAD":1,"WORD_BOUNDARY":2,"UTT_BOUNDARY":3,"aː":4,"ɾ":5,"r":6,"ɪ":7,"ɛ":8,"d":9,"s":10,"j":11,"a":12,"b":13,"iː":14,"k":15,"ʋ":16,"ɛː":17,"θ":18,"i":19,"l":20,"n":21,"uː":22,"ð":23,"ɡ":24,"ɔ":25,"h":26,"aʊ":27,"y":28,"m":29,"f":30,"ɔː":31,"x":32,"ɟ":33,"t":34,"eɪ":35,"oʊ":36,"p":37,"ŋ":38,"ɣ":39,"yː":40,"u":41,"ɪː":42,"œ":43,"aɪ":44,"ç":45,"ə":46,"øy":47,"c":48,"ɲ":49,"œː":50,"ɔɪ":51}