nrshoudi commited on
Commit
38b53b1
1 Parent(s): eed16a8

Upload tokenizer

Browse files
Files changed (2) hide show
  1. added_tokens.json +2 -2
  2. vocab.json +45 -48
added_tokens.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "</s>": 50,
3
- "<s>": 49
4
  }
 
1
  {
2
+ "</s>": 47,
3
+ "<s>": 46
4
  }
vocab.json CHANGED
@@ -1,51 +1,48 @@
1
  {
2
- "[PAD]": 48,
3
- "[UNK]": 47,
4
- "e": 1,
5
- "g": 2,
6
- "t": 3,
7
  "|": 0,
8
- "ء": 4,
9
- "آ": 5,
10
- "أ": 6,
11
- "ؤ": 7,
12
- "إ": 8,
13
- "ئ": 9,
14
- "ا": 10,
15
- "ب": 11,
16
- "ة": 12,
17
- "ت": 13,
18
- "ث": 14,
19
- "ج": 15,
20
- "ح": 16,
21
- "خ": 17,
22
- "د": 18,
23
- "ذ": 19,
24
- "ر": 20,
25
- "ز": 21,
26
- "س": 22,
27
- "ش": 23,
28
- "ص": 24,
29
- "ض": 25,
30
- "ط": 26,
31
- "ظ": 27,
32
- "ع": 28,
33
- "غ": 29,
34
- "ف": 30,
35
- "ق": 31,
36
- "ك": 32,
37
- "ل": 33,
38
- "م": 34,
39
- "ن": 35,
40
- "ه": 36,
41
- "و": 37,
42
- "ى": 38,
43
- "ي": 39,
44
- "چ": 40,
45
- "ڨ": 41,
46
- "ک": 42,
47
- "ھ": 43,
48
- "ی": 44,
49
- "ﺃ": 45,
50
- "ﻻ": 46
51
  }
 
1
  {
2
+ "[PAD]": 45,
3
+ "[UNK]": 44,
 
 
 
4
  "|": 0,
5
+ "ء": 1,
6
+ "آ": 2,
7
+ "أ": 3,
8
+ "ؤ": 4,
9
+ "إ": 5,
10
+ "ئ": 6,
11
+ "ا": 7,
12
+ "ب": 8,
13
+ "ة": 9,
14
+ "ت": 10,
15
+ "ث": 11,
16
+ "ج": 12,
17
+ "ح": 13,
18
+ "خ": 14,
19
+ "د": 15,
20
+ "ذ": 16,
21
+ "ر": 17,
22
+ "ز": 18,
23
+ "س": 19,
24
+ "ش": 20,
25
+ "ص": 21,
26
+ "ض": 22,
27
+ "ط": 23,
28
+ "ظ": 24,
29
+ "ع": 25,
30
+ "غ": 26,
31
+ "ف": 27,
32
+ "ق": 28,
33
+ "ك": 29,
34
+ "ل": 30,
35
+ "م": 31,
36
+ "ن": 32,
37
+ "ه": 33,
38
+ "و": 34,
39
+ "ى": 35,
40
+ "ي": 36,
41
+ "چ": 37,
42
+ "ڨ": 38,
43
+ "ک": 39,
44
+ "ھ": 40,
45
+ "ی": 41,
46
+ "ﺃ": 42,
47
+ "ﻻ": 43
48
  }