Xenova HF staff commited on
Commit
269f137
1 Parent(s): f91a965

Upload tokenizer.json

Browse files
Files changed (1) hide show
  1. tokenizer.json +97 -0
tokenizer.json ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 39,
8
+ "content": "<unk>",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ }
15
+ ],
16
+ "normalizer": {
17
+ "type": "Sequence",
18
+ "normalizers": [
19
+ {
20
+ "type": "Lowercase"
21
+ },
22
+ {
23
+ "type": "Replace",
24
+ "pattern": {
25
+ "Regex": "[^\u0627\u0646\u0643\u0639\u0625\u063a\u0630\u0629\u0633\u0631\u0637\u062e\u062a\u062c\u0638\u064a\u062f\u2013\u0635\u062b\u0623\u0649\u0636\u062d\u0647 \u0621\u0622\u0628\u0648\u0645\u0644\u0634\u0642\u0632\u0624\u0641_\u0626]"
26
+ },
27
+ "content": ""
28
+ },
29
+ {
30
+ "type": "Strip",
31
+ "strip_left": true,
32
+ "strip_right": true
33
+ },
34
+ {
35
+ "type": "Replace",
36
+ "pattern": {
37
+ "Regex": "(?=.)|(?<!^)$"
38
+ },
39
+ "content": "\u0627"
40
+ }
41
+ ]
42
+ },
43
+ "pre_tokenizer": {
44
+ "type": "Split",
45
+ "pattern": {
46
+ "Regex": ""
47
+ },
48
+ "behavior": "Isolated",
49
+ "invert": false
50
+ },
51
+ "post_processor": null,
52
+ "decoder": null,
53
+ "model": {
54
+ "vocab": {
55
+ "\u0627": 0,
56
+ "\u0646": 1,
57
+ "\u0643": 2,
58
+ "\u0639": 3,
59
+ "\u0625": 4,
60
+ "\u063a": 5,
61
+ "\u0630": 6,
62
+ "\u0629": 7,
63
+ "\u0633": 8,
64
+ "\u0631": 9,
65
+ "\u0637": 10,
66
+ "\u062e": 11,
67
+ "\u062a": 12,
68
+ "\u062c": 13,
69
+ "\u0638": 14,
70
+ "\u064a": 15,
71
+ "\u062f": 16,
72
+ "\u2013": 17,
73
+ "\u0635": 18,
74
+ "\u062b": 19,
75
+ "\u0623": 20,
76
+ "\u0649": 21,
77
+ "\u0636": 22,
78
+ "\u062d": 23,
79
+ "\u0647": 24,
80
+ " ": 25,
81
+ "\u0621": 26,
82
+ "\u0622": 27,
83
+ "\u0628": 28,
84
+ "\u0648": 29,
85
+ "\u0645": 30,
86
+ "\u0644": 31,
87
+ "\u0634": 32,
88
+ "\u0642": 33,
89
+ "\u0632": 34,
90
+ "\u0624": 35,
91
+ "\u0641": 36,
92
+ "_": 37,
93
+ "\u0626": 38,
94
+ "<unk>": 39
95
+ }
96
+ }
97
+ }