codebyzeb commited on
Commit
8b573d8
1 Parent(s): 82666c5

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +49 -1
  2. vocab.json +1 -1
tokenizer.json CHANGED
@@ -106,7 +106,55 @@
106
  "PAD": 1,
107
  "WORD_BOUNDARY": 2,
108
  "UTT_BOUNDARY": 3,
109
- "R": 4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  },
111
  "unk_token": "UNK"
112
  }
 
106
  "PAD": 1,
107
  "WORD_BOUNDARY": 2,
108
  "UTT_BOUNDARY": 3,
109
+ "m": 4,
110
+ "ɛ": 5,
111
+ "ɲ": 6,
112
+ "y": 7,
113
+ "n": 8,
114
+ "k": 9,
115
+ "ɑ": 10,
116
+ "r": 11,
117
+ "aː": 12,
118
+ "d": 13,
119
+ "i": 14,
120
+ "o": 15,
121
+ "h": 16,
122
+ "z": 17,
123
+ "v": 18,
124
+ "l": 19,
125
+ "eː": 20,
126
+ "j": 21,
127
+ "ʃ": 22,
128
+ "ɟ": 23,
129
+ "s": 24,
130
+ "oː": 25,
131
+ "p": 26,
132
+ "t": 27,
133
+ "tsː": 28,
134
+ "b": 29,
135
+ "u": 30,
136
+ "ɡ": 31,
137
+ "tː": 32,
138
+ "f": 33,
139
+ "ø": 34,
140
+ "t̠ʃ": 35,
141
+ "uː": 36,
142
+ "iː": 37,
143
+ "ts": 38,
144
+ "ɟː": 39,
145
+ "yː": 40,
146
+ "øː": 41,
147
+ "ʎ": 42,
148
+ "t̠ʃː": 43,
149
+ "c": 44,
150
+ "ɡː": 45,
151
+ "kː": 46,
152
+ "ɑː": 47,
153
+ "dː": 48,
154
+ "pː": 49,
155
+ "ʒ": 50,
156
+ "cː": 51,
157
+ "bː": 52
158
  },
159
  "unk_token": "UNK"
160
  }
vocab.json CHANGED
@@ -1 +1 @@
1
- {"UNK":0,"PAD":1,"WORD_BOUNDARY":2,"UTT_BOUNDARY":3,"R":4}
 
1
+ {"UNK":0,"PAD":1,"WORD_BOUNDARY":2,"UTT_BOUNDARY":3,"m":4,"ɛ":5,"ɲ":6,"y":7,"n":8,"k":9,"ɑ":10,"r":11,"aː":12,"d":13,"i":14,"o":15,"h":16,"z":17,"v":18,"l":19,"eː":20,"j":21,"ʃ":22,"ɟ":23,"s":24,"oː":25,"p":26,"t":27,"tsː":28,"b":29,"u":30,"ɡ":31,"tː":32,"f":33,"ø":34,"t̠ʃ":35,"uː":36,"iː":37,"ts":38,"ɟː":39,"yː":40,"øː":41,"ʎ":42,"t̠ʃː":43,"c":44,"ɡː":45,"kː":46,"ɑː":47,"dː":48,"pː":49,"ʒ":50,"cː":51,"bː":52}