Upload tokenizer
Browse files- tokenizer.json +196 -66
- vocab.json +1 -1
tokenizer.json
CHANGED
@@ -191,6 +191,123 @@
|
|
191 |
"rstrip": false,
|
192 |
"normalized": false,
|
193 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
194 |
}
|
195 |
],
|
196 |
"normalizer": null,
|
@@ -229,72 +346,85 @@
|
|
229 |
"<|meter_13|>": 18,
|
230 |
"<|meter_14|>": 19,
|
231 |
"<|meter_15|>": 20,
|
232 |
-
"
|
233 |
-
"
|
234 |
-
"
|
235 |
-
"
|
236 |
-
"
|
237 |
-
"
|
238 |
-
"
|
239 |
-
"
|
240 |
-
"
|
241 |
-
"
|
242 |
-
"
|
243 |
-
"
|
244 |
-
"
|
245 |
-
"
|
246 |
-
"
|
247 |
-
"
|
248 |
-
"
|
249 |
-
"
|
250 |
-
"
|
251 |
-
"
|
252 |
-
"
|
253 |
-
"
|
254 |
-
"
|
255 |
-
"
|
256 |
-
"
|
257 |
-
"
|
258 |
-
"
|
259 |
-
"
|
260 |
-
"
|
261 |
-
"
|
262 |
-
"
|
263 |
-
"
|
264 |
-
"
|
265 |
-
"
|
266 |
-
"
|
267 |
-
"
|
268 |
-
"
|
269 |
-
"
|
270 |
-
"
|
271 |
-
"
|
272 |
-
"
|
273 |
-
"
|
274 |
-
"
|
275 |
-
"
|
276 |
-
"
|
277 |
-
"
|
278 |
-
"
|
279 |
-
"
|
280 |
-
"
|
281 |
-
"
|
282 |
-
"
|
283 |
-
"
|
284 |
-
"
|
285 |
-
"
|
286 |
-
"
|
287 |
-
"
|
288 |
-
"
|
289 |
-
"
|
290 |
-
"
|
291 |
-
"
|
292 |
-
"
|
293 |
-
"
|
294 |
-
"
|
295 |
-
"
|
296 |
-
"
|
297 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
298 |
},
|
299 |
"merges": []
|
300 |
}
|
|
|
191 |
"rstrip": false,
|
192 |
"normalized": false,
|
193 |
"special": true
|
194 |
+
},
|
195 |
+
{
|
196 |
+
"id": 21,
|
197 |
+
"content": "<|res_0|>",
|
198 |
+
"single_word": false,
|
199 |
+
"lstrip": false,
|
200 |
+
"rstrip": false,
|
201 |
+
"normalized": false,
|
202 |
+
"special": true
|
203 |
+
},
|
204 |
+
{
|
205 |
+
"id": 22,
|
206 |
+
"content": "<|res_1|>",
|
207 |
+
"single_word": false,
|
208 |
+
"lstrip": false,
|
209 |
+
"rstrip": false,
|
210 |
+
"normalized": false,
|
211 |
+
"special": true
|
212 |
+
},
|
213 |
+
{
|
214 |
+
"id": 23,
|
215 |
+
"content": "<|res_2|>",
|
216 |
+
"single_word": false,
|
217 |
+
"lstrip": false,
|
218 |
+
"rstrip": false,
|
219 |
+
"normalized": false,
|
220 |
+
"special": true
|
221 |
+
},
|
222 |
+
{
|
223 |
+
"id": 24,
|
224 |
+
"content": "<|res_3|>",
|
225 |
+
"single_word": false,
|
226 |
+
"lstrip": false,
|
227 |
+
"rstrip": false,
|
228 |
+
"normalized": false,
|
229 |
+
"special": true
|
230 |
+
},
|
231 |
+
{
|
232 |
+
"id": 25,
|
233 |
+
"content": "<|res_4|>",
|
234 |
+
"single_word": false,
|
235 |
+
"lstrip": false,
|
236 |
+
"rstrip": false,
|
237 |
+
"normalized": false,
|
238 |
+
"special": true
|
239 |
+
},
|
240 |
+
{
|
241 |
+
"id": 26,
|
242 |
+
"content": "<|res_5|>",
|
243 |
+
"single_word": false,
|
244 |
+
"lstrip": false,
|
245 |
+
"rstrip": false,
|
246 |
+
"normalized": false,
|
247 |
+
"special": true
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"id": 27,
|
251 |
+
"content": "<|res_6|>",
|
252 |
+
"single_word": false,
|
253 |
+
"lstrip": false,
|
254 |
+
"rstrip": false,
|
255 |
+
"normalized": false,
|
256 |
+
"special": true
|
257 |
+
},
|
258 |
+
{
|
259 |
+
"id": 28,
|
260 |
+
"content": "<|res_7|>",
|
261 |
+
"single_word": false,
|
262 |
+
"lstrip": false,
|
263 |
+
"rstrip": false,
|
264 |
+
"normalized": false,
|
265 |
+
"special": true
|
266 |
+
},
|
267 |
+
{
|
268 |
+
"id": 29,
|
269 |
+
"content": "<|res_8|>",
|
270 |
+
"single_word": false,
|
271 |
+
"lstrip": false,
|
272 |
+
"rstrip": false,
|
273 |
+
"normalized": false,
|
274 |
+
"special": true
|
275 |
+
},
|
276 |
+
{
|
277 |
+
"id": 30,
|
278 |
+
"content": "<|res_9|>",
|
279 |
+
"single_word": false,
|
280 |
+
"lstrip": false,
|
281 |
+
"rstrip": false,
|
282 |
+
"normalized": false,
|
283 |
+
"special": true
|
284 |
+
},
|
285 |
+
{
|
286 |
+
"id": 31,
|
287 |
+
"content": "<|res_10|>",
|
288 |
+
"single_word": false,
|
289 |
+
"lstrip": false,
|
290 |
+
"rstrip": false,
|
291 |
+
"normalized": false,
|
292 |
+
"special": true
|
293 |
+
},
|
294 |
+
{
|
295 |
+
"id": 32,
|
296 |
+
"content": "<|res_11|>",
|
297 |
+
"single_word": false,
|
298 |
+
"lstrip": false,
|
299 |
+
"rstrip": false,
|
300 |
+
"normalized": false,
|
301 |
+
"special": true
|
302 |
+
},
|
303 |
+
{
|
304 |
+
"id": 33,
|
305 |
+
"content": "<|res_12|>",
|
306 |
+
"single_word": false,
|
307 |
+
"lstrip": false,
|
308 |
+
"rstrip": false,
|
309 |
+
"normalized": false,
|
310 |
+
"special": true
|
311 |
}
|
312 |
],
|
313 |
"normalizer": null,
|
|
|
346 |
"<|meter_13|>": 18,
|
347 |
"<|meter_14|>": 19,
|
348 |
"<|meter_15|>": 20,
|
349 |
+
"<|res_0|>": 21,
|
350 |
+
"<|res_1|>": 22,
|
351 |
+
"<|res_2|>": 23,
|
352 |
+
"<|res_3|>": 24,
|
353 |
+
"<|res_4|>": 25,
|
354 |
+
"<|res_5|>": 26,
|
355 |
+
"<|res_6|>": 27,
|
356 |
+
"<|res_7|>": 28,
|
357 |
+
"<|res_8|>": 29,
|
358 |
+
"<|res_9|>": 30,
|
359 |
+
"<|res_10|>": 31,
|
360 |
+
"<|res_11|>": 32,
|
361 |
+
"<|res_12|>": 33,
|
362 |
+
" ": 34,
|
363 |
+
"0": 35,
|
364 |
+
"1": 36,
|
365 |
+
"2": 37,
|
366 |
+
"3": 38,
|
367 |
+
"4": 39,
|
368 |
+
"5": 40,
|
369 |
+
"6": 41,
|
370 |
+
"7": 42,
|
371 |
+
"8": 43,
|
372 |
+
"9": 44,
|
373 |
+
"<": 45,
|
374 |
+
">": 46,
|
375 |
+
"_": 47,
|
376 |
+
"b": 48,
|
377 |
+
"e": 49,
|
378 |
+
"m": 50,
|
379 |
+
"p": 51,
|
380 |
+
"r": 52,
|
381 |
+
"s": 53,
|
382 |
+
"t": 54,
|
383 |
+
"v": 55,
|
384 |
+
"|": 56,
|
385 |
+
"~": 57,
|
386 |
+
"ء": 58,
|
387 |
+
"أ": 59,
|
388 |
+
"ؤ": 60,
|
389 |
+
"ئ": 61,
|
390 |
+
"ا": 62,
|
391 |
+
"ب": 63,
|
392 |
+
"ة": 64,
|
393 |
+
"ت": 65,
|
394 |
+
"ث": 66,
|
395 |
+
"ج": 67,
|
396 |
+
"ح": 68,
|
397 |
+
"خ": 69,
|
398 |
+
"د": 70,
|
399 |
+
"ذ": 71,
|
400 |
+
"ر": 72,
|
401 |
+
"ز": 73,
|
402 |
+
"س": 74,
|
403 |
+
"ش": 75,
|
404 |
+
"ص": 76,
|
405 |
+
"ض": 77,
|
406 |
+
"ط": 78,
|
407 |
+
"ظ": 79,
|
408 |
+
"ع": 80,
|
409 |
+
"غ": 81,
|
410 |
+
"ف": 82,
|
411 |
+
"ق": 83,
|
412 |
+
"ك": 84,
|
413 |
+
"ل": 85,
|
414 |
+
"م": 86,
|
415 |
+
"ن": 87,
|
416 |
+
"ه": 88,
|
417 |
+
"و": 89,
|
418 |
+
"ى": 90,
|
419 |
+
"ي": 91,
|
420 |
+
"ً": 92,
|
421 |
+
"ٌ": 93,
|
422 |
+
"ٍ": 94,
|
423 |
+
"َ": 95,
|
424 |
+
"ُ": 96,
|
425 |
+
"ِ": 97,
|
426 |
+
"ّ": 98,
|
427 |
+
"ْ": 99
|
428 |
},
|
429 |
"merges": []
|
430 |
}
|
vocab.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"<|endoftext|>":0,"<|vsep|>":1,"<|bsep|>":2,"<|pad|>":3,"<|psep|>":4,"<|meter_0|>":5,"<|meter_1|>":6,"<|meter_2|>":7,"<|meter_3|>":8,"<|meter_4|>":9,"<|meter_5|>":10,"<|meter_6|>":11,"<|meter_7|>":12,"<|meter_8|>":13,"<|meter_9|>":14,"<|meter_10|>":15,"<|meter_11|>":16,"<|meter_12|>":17,"<|meter_13|>":18,"<|meter_14|>":19,"<|meter_15|>":20,"
|
|
|
1 |
+
{"<|endoftext|>":0,"<|vsep|>":1,"<|bsep|>":2,"<|pad|>":3,"<|psep|>":4,"<|meter_0|>":5,"<|meter_1|>":6,"<|meter_2|>":7,"<|meter_3|>":8,"<|meter_4|>":9,"<|meter_5|>":10,"<|meter_6|>":11,"<|meter_7|>":12,"<|meter_8|>":13,"<|meter_9|>":14,"<|meter_10|>":15,"<|meter_11|>":16,"<|meter_12|>":17,"<|meter_13|>":18,"<|meter_14|>":19,"<|meter_15|>":20,"<|res_0|>":21,"<|res_1|>":22,"<|res_2|>":23,"<|res_3|>":24,"<|res_4|>":25,"<|res_5|>":26,"<|res_6|>":27,"<|res_7|>":28,"<|res_8|>":29,"<|res_9|>":30,"<|res_10|>":31,"<|res_11|>":32,"<|res_12|>":33," ":34,"0":35,"1":36,"2":37,"3":38,"4":39,"5":40,"6":41,"7":42,"8":43,"9":44,"<":45,">":46,"_":47,"b":48,"e":49,"m":50,"p":51,"r":52,"s":53,"t":54,"v":55,"|":56,"~":57,"ء":58,"أ":59,"ؤ":60,"ئ":61,"ا":62,"ب":63,"ة":64,"ت":65,"ث":66,"ج":67,"ح":68,"خ":69,"د":70,"ذ":71,"ر":72,"ز":73,"س":74,"ش":75,"ص":76,"ض":77,"ط":78,"ظ":79,"ع":80,"غ":81,"ف":82,"ق":83,"ك":84,"ل":85,"م":86,"ن":87,"ه":88,"و":89,"ى":90,"ي":91,"ً":92,"ٌ":93,"ٍ":94,"َ":95,"ُ":96,"ِ":97,"ّ":98,"ْ":99}
|