Vectorrent commited on
Commit
7b0d7a6
·
verified ·
1 Parent(s): ae7273a

Upload 3 files

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +30 -0
  2. tokenizer.json +1879 -0
  3. tokenizer_config.json +43 -0
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "[BOS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "[EOS]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "[UNK]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
@@ -0,0 +1,1879 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "[UNK]",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "[PAD]",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 2,
26
+ "content": "[BOS]",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ },
33
+ {
34
+ "id": 3,
35
+ "content": "[EOS]",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ }
42
+ ],
43
+ "normalizer": {
44
+ "type": "NFC"
45
+ },
46
+ "pre_tokenizer": {
47
+ "type": "Sequence",
48
+ "pretokenizers": [
49
+ {
50
+ "type": "ByteLevel",
51
+ "add_prefix_space": false,
52
+ "trim_offsets": true,
53
+ "use_regex": true
54
+ },
55
+ {
56
+ "type": "Punctuation",
57
+ "behavior": "Isolated"
58
+ },
59
+ {
60
+ "type": "Digits",
61
+ "individual_digits": true
62
+ }
63
+ ]
64
+ },
65
+ "post_processor": {
66
+ "type": "ByteLevel",
67
+ "add_prefix_space": true,
68
+ "trim_offsets": true,
69
+ "use_regex": true
70
+ },
71
+ "decoder": {
72
+ "type": "ByteLevel",
73
+ "add_prefix_space": true,
74
+ "trim_offsets": true,
75
+ "use_regex": true
76
+ },
77
+ "model": {
78
+ "type": "BPE",
79
+ "dropout": 0.1,
80
+ "unk_token": "[UNK]",
81
+ "continuing_subword_prefix": null,
82
+ "end_of_word_suffix": null,
83
+ "fuse_unk": true,
84
+ "byte_fallback": true,
85
+ "ignore_merges": false,
86
+ "vocab": {
87
+ "[UNK]": 0,
88
+ "[PAD]": 1,
89
+ "[BOS]": 2,
90
+ "[EOS]": 3,
91
+ "!": 4,
92
+ "\"": 5,
93
+ "#": 6,
94
+ "$": 7,
95
+ "%": 8,
96
+ "&": 9,
97
+ "'": 10,
98
+ "(": 11,
99
+ ")": 12,
100
+ "*": 13,
101
+ "+": 14,
102
+ ",": 15,
103
+ "-": 16,
104
+ ".": 17,
105
+ "/": 18,
106
+ "0": 19,
107
+ "1": 20,
108
+ "2": 21,
109
+ "3": 22,
110
+ "4": 23,
111
+ "5": 24,
112
+ "6": 25,
113
+ "7": 26,
114
+ "8": 27,
115
+ "9": 28,
116
+ ":": 29,
117
+ ";": 30,
118
+ "<": 31,
119
+ "=": 32,
120
+ ">": 33,
121
+ "?": 34,
122
+ "@": 35,
123
+ "A": 36,
124
+ "B": 37,
125
+ "C": 38,
126
+ "D": 39,
127
+ "E": 40,
128
+ "F": 41,
129
+ "G": 42,
130
+ "H": 43,
131
+ "I": 44,
132
+ "J": 45,
133
+ "K": 46,
134
+ "L": 47,
135
+ "M": 48,
136
+ "N": 49,
137
+ "O": 50,
138
+ "P": 51,
139
+ "Q": 52,
140
+ "R": 53,
141
+ "S": 54,
142
+ "T": 55,
143
+ "U": 56,
144
+ "V": 57,
145
+ "W": 58,
146
+ "X": 59,
147
+ "Y": 60,
148
+ "Z": 61,
149
+ "[": 62,
150
+ "\\": 63,
151
+ "]": 64,
152
+ "^": 65,
153
+ "_": 66,
154
+ "`": 67,
155
+ "a": 68,
156
+ "b": 69,
157
+ "c": 70,
158
+ "d": 71,
159
+ "e": 72,
160
+ "f": 73,
161
+ "g": 74,
162
+ "h": 75,
163
+ "i": 76,
164
+ "j": 77,
165
+ "k": 78,
166
+ "l": 79,
167
+ "m": 80,
168
+ "n": 81,
169
+ "o": 82,
170
+ "p": 83,
171
+ "q": 84,
172
+ "r": 85,
173
+ "s": 86,
174
+ "t": 87,
175
+ "u": 88,
176
+ "v": 89,
177
+ "w": 90,
178
+ "x": 91,
179
+ "y": 92,
180
+ "z": 93,
181
+ "{": 94,
182
+ "|": 95,
183
+ "}": 96,
184
+ "~": 97,
185
+ "¡": 98,
186
+ "¢": 99,
187
+ "£": 100,
188
+ "¤": 101,
189
+ "¥": 102,
190
+ "¦": 103,
191
+ "§": 104,
192
+ "¨": 105,
193
+ "©": 106,
194
+ "ª": 107,
195
+ "«": 108,
196
+ "¬": 109,
197
+ "®": 110,
198
+ "¯": 111,
199
+ "°": 112,
200
+ "±": 113,
201
+ "²": 114,
202
+ "³": 115,
203
+ "´": 116,
204
+ "µ": 117,
205
+ "¶": 118,
206
+ "·": 119,
207
+ "¸": 120,
208
+ "¹": 121,
209
+ "º": 122,
210
+ "»": 123,
211
+ "¼": 124,
212
+ "½": 125,
213
+ "¾": 126,
214
+ "¿": 127,
215
+ "À": 128,
216
+ "Á": 129,
217
+ "Â": 130,
218
+ "Ã": 131,
219
+ "Ä": 132,
220
+ "Å": 133,
221
+ "Æ": 134,
222
+ "Ç": 135,
223
+ "È": 136,
224
+ "É": 137,
225
+ "Ê": 138,
226
+ "Ë": 139,
227
+ "Ì": 140,
228
+ "Í": 141,
229
+ "Î": 142,
230
+ "Ï": 143,
231
+ "Ð": 144,
232
+ "Ñ": 145,
233
+ "Ò": 146,
234
+ "Ó": 147,
235
+ "Ô": 148,
236
+ "Õ": 149,
237
+ "Ö": 150,
238
+ "×": 151,
239
+ "Ø": 152,
240
+ "Ù": 153,
241
+ "Ú": 154,
242
+ "Û": 155,
243
+ "Ü": 156,
244
+ "Ý": 157,
245
+ "Þ": 158,
246
+ "ß": 159,
247
+ "à": 160,
248
+ "á": 161,
249
+ "â": 162,
250
+ "ã": 163,
251
+ "ä": 164,
252
+ "å": 165,
253
+ "æ": 166,
254
+ "ç": 167,
255
+ "è": 168,
256
+ "é": 169,
257
+ "ê": 170,
258
+ "ë": 171,
259
+ "ì": 172,
260
+ "í": 173,
261
+ "î": 174,
262
+ "ï": 175,
263
+ "ð": 176,
264
+ "ñ": 177,
265
+ "ò": 178,
266
+ "ó": 179,
267
+ "ô": 180,
268
+ "õ": 181,
269
+ "ö": 182,
270
+ "÷": 183,
271
+ "ø": 184,
272
+ "ù": 185,
273
+ "ú": 186,
274
+ "û": 187,
275
+ "ü": 188,
276
+ "ý": 189,
277
+ "þ": 190,
278
+ "ÿ": 191,
279
+ "Ā": 192,
280
+ "ā": 193,
281
+ "Ă": 194,
282
+ "ă": 195,
283
+ "Ą": 196,
284
+ "ą": 197,
285
+ "Ć": 198,
286
+ "ć": 199,
287
+ "Ĉ": 200,
288
+ "ĉ": 201,
289
+ "Ċ": 202,
290
+ "ċ": 203,
291
+ "Č": 204,
292
+ "č": 205,
293
+ "Ď": 206,
294
+ "ď": 207,
295
+ "Đ": 208,
296
+ "đ": 209,
297
+ "Ē": 210,
298
+ "ē": 211,
299
+ "Ĕ": 212,
300
+ "ĕ": 213,
301
+ "Ė": 214,
302
+ "ė": 215,
303
+ "Ę": 216,
304
+ "ę": 217,
305
+ "Ě": 218,
306
+ "ě": 219,
307
+ "Ĝ": 220,
308
+ "ĝ": 221,
309
+ "Ğ": 222,
310
+ "ğ": 223,
311
+ "Ġ": 224,
312
+ "ġ": 225,
313
+ "Ģ": 226,
314
+ "ģ": 227,
315
+ "Ĥ": 228,
316
+ "ĥ": 229,
317
+ "Ħ": 230,
318
+ "ħ": 231,
319
+ "Ĩ": 232,
320
+ "ĩ": 233,
321
+ "Ī": 234,
322
+ "ī": 235,
323
+ "Ĭ": 236,
324
+ "ĭ": 237,
325
+ "Į": 238,
326
+ "į": 239,
327
+ "İ": 240,
328
+ "ı": 241,
329
+ "IJ": 242,
330
+ "ij": 243,
331
+ "Ĵ": 244,
332
+ "ĵ": 245,
333
+ "Ķ": 246,
334
+ "ķ": 247,
335
+ "ĸ": 248,
336
+ "Ĺ": 249,
337
+ "ĺ": 250,
338
+ "Ļ": 251,
339
+ "ļ": 252,
340
+ "Ľ": 253,
341
+ "ľ": 254,
342
+ "Ŀ": 255,
343
+ "ŀ": 256,
344
+ "Ł": 257,
345
+ "ł": 258,
346
+ "Ń": 259,
347
+ "Ġt": 260,
348
+ "Ġa": 261,
349
+ "he": 262,
350
+ "in": 263,
351
+ "re": 264,
352
+ "on": 265,
353
+ "er": 266,
354
+ "Ġo": 267,
355
+ "at": 268,
356
+ "Ġs": 269,
357
+ "en": 270,
358
+ "Ġc": 271,
359
+ "Ġw": 272,
360
+ "es": 273,
361
+ "is": 274,
362
+ "nd": 275,
363
+ "it": 276,
364
+ "or": 277,
365
+ "Ġp": 278,
366
+ "al": 279,
367
+ "Ġb": 280,
368
+ "ed": 281,
369
+ "an": 282,
370
+ "Ġf": 283,
371
+ "ou": 284,
372
+ "ar": 285,
373
+ "Ġm": 286,
374
+ "ic": 287,
375
+ "Ġd": 288,
376
+ "le": 289,
377
+ "ro": 290,
378
+ "as": 291,
379
+ "Ġh": 292,
380
+ "ct": 293,
381
+ "Ġe": 294,
382
+ "il": 295,
383
+ "om": 296,
384
+ "ve": 297,
385
+ "Ġl": 298,
386
+ "st": 299,
387
+ "Ġn": 300,
388
+ "ly": 301,
389
+ "âĢ": 302,
390
+ "et": 303,
391
+ "ĠT": 304,
392
+ "ol": 305,
393
+ "se": 306,
394
+ "Ġg": 307,
395
+ "im": 308,
396
+ "id": 309,
397
+ "ot": 310,
398
+ "ut": 311,
399
+ "ow": 312,
400
+ "ce": 313,
401
+ "ur": 314,
402
+ "ra": 315,
403
+ "ch": 316,
404
+ "ĠA": 317,
405
+ "ig": 318,
406
+ "ĠS": 319,
407
+ "Ġu": 320,
408
+ "ĠC": 321,
409
+ "ir": 322,
410
+ "ĠI": 323,
411
+ "Ġy": 324,
412
+ "ts": 325,
413
+ "el": 326,
414
+ "ul": 327,
415
+ "ay": 328,
416
+ "am": 329,
417
+ "ad": 330,
418
+ "if": 331,
419
+ "od": 332,
420
+ "ĠM": 333,
421
+ "op": 334,
422
+ "iv": 335,
423
+ "ge": 336,
424
+ "th": 337,
425
+ "ec": 338,
426
+ "ĠP": 339,
427
+ "Ġv": 340,
428
+ "um": 341,
429
+ "ab": 342,
430
+ "ĠB": 343,
431
+ "em": 344,
432
+ "us": 345,
433
+ "ti": 346,
434
+ "ac": 347,
435
+ "ĠH": 348,
436
+ "os": 349,
437
+ "ld": 350,
438
+ "ke": 351,
439
+ "nt": 352,
440
+ "qu": 353,
441
+ "ĠW": 354,
442
+ "ri": 355,
443
+ "pp": 356,
444
+ "ĠD": 357,
445
+ "ud": 358,
446
+ "ĠE": 359,
447
+ "oc": 360,
448
+ "ĠR": 361,
449
+ "Ġr": 362,
450
+ "ĠF": 363,
451
+ "si": 364,
452
+ "un": 365,
453
+ "ff": 366,
454
+ "ll": 367,
455
+ "og": 368,
456
+ "ĠG": 369,
457
+ "ĠN": 370,
458
+ "ls": 371,
459
+ "ew": 372,
460
+ "ht": 373,
461
+ "ho": 374,
462
+ "ĠL": 375,
463
+ "te": 376,
464
+ "ta": 377,
465
+ "ss": 378,
466
+ "la": 379,
467
+ "du": 380,
468
+ "ap": 381,
469
+ "ev": 382,
470
+ "ep": 383,
471
+ "ĠO": 384,
472
+ "ĠJ": 385,
473
+ "tr": 386,
474
+ "ub": 387,
475
+ "Ġk": 388,
476
+ "xp": 389,
477
+ "ag": 390,
478
+ "ak": 391,
479
+ "ĠU": 392,
480
+ "iz": 393,
481
+ "gh": 394,
482
+ "ft": 395,
483
+ "hi": 396,
484
+ "cc": 397,
485
+ "ov": 398,
486
+ "ef": 399,
487
+ "ci": 400,
488
+ "oo": 401,
489
+ "ds": 402,
490
+ "li": 403,
491
+ "pe": 404,
492
+ "to": 405,
493
+ "ks": 406,
494
+ "fe": 407,
495
+ "lo": 408,
496
+ "av": 409,
497
+ "ia": 410,
498
+ "cl": 411,
499
+ "eg": 412,
500
+ "au": 413,
501
+ "gs": 414,
502
+ "ry": 415,
503
+ "ru": 416,
504
+ "Ġj": 417,
505
+ "hr": 418,
506
+ "ue": 419,
507
+ "ĠK": 420,
508
+ "pt": 421,
509
+ "hy": 422,
510
+ "xt": 423,
511
+ "ee": 424,
512
+ "pl": 425,
513
+ "tt": 426,
514
+ "pa": 427,
515
+ "rs": 428,
516
+ "ms": 429,
517
+ "ne": 430,
518
+ "ps": 431,
519
+ "ĠV": 432,
520
+ "ny": 433,
521
+ "ea": 434,
522
+ "we": 435,
523
+ "sp": 436,
524
+ "je": 437,
525
+ "ng": 438,
526
+ "ĠY": 439,
527
+ "ib": 440,
528
+ "wo": 441,
529
+ "ty": 442,
530
+ "ns": 443,
531
+ "tu": 444,
532
+ "dd": 445,
533
+ "ph": 446,
534
+ "ma": 447,
535
+ "su": 448,
536
+ "uc": 449,
537
+ "rt": 450,
538
+ "ob": 451,
539
+ "wn": 452,
540
+ "lp": 453,
541
+ "ug": 454,
542
+ "eb": 455,
543
+ "cy": 456,
544
+ "ip": 457,
545
+ "ha": 458,
546
+ "rg": 459,
547
+ "In": 460,
548
+ "ie": 461,
549
+ "aw": 462,
550
+ "ga": 463,
551
+ "yp": 464,
552
+ "ym": 465,
553
+ "nc": 466,
554
+ "up": 467,
555
+ "lu": 468,
556
+ "vi": 469,
557
+ "bs": 470,
558
+ "de": 471,
559
+ "ys": 472,
560
+ "ck": 473,
561
+ "Th": 474,
562
+ "mo": 475,
563
+ "mb": 476,
564
+ "dv": 477,
565
+ "gu": 478,
566
+ "mp": 479,
567
+ "di": 480,
568
+ "pr": 481,
569
+ "ni": 482,
570
+ "gy": 483,
571
+ "nm": 484,
572
+ "co": 485,
573
+ "fa": 486,
574
+ "ex": 487,
575
+ "sc": 488,
576
+ "eh": 489,
577
+ "me": 490,
578
+ "rd": 491,
579
+ "rn": 492,
580
+ "gr": 493,
581
+ "ey": 494,
582
+ "gi": 495,
583
+ "sa": 496,
584
+ "tm": 497,
585
+ "lf": 498,
586
+ "ox": 499,
587
+ "rc": 500,
588
+ "ix": 501,
589
+ "po": 502,
590
+ "aj": 503,
591
+ "va": 504,
592
+ "be": 505,
593
+ "bo": 506,
594
+ "wa": 507,
595
+ "lt": 508,
596
+ "oy": 509,
597
+ "ax": 510,
598
+ "io": 511,
599
+ "fr": 512,
600
+ "sh": 513,
601
+ "sk": 514,
602
+ "sf": 515,
603
+ "rm": 516,
604
+ "It": 517,
605
+ "Wh": 518,
606
+ "oh": 519,
607
+ "fl": 520,
608
+ "nn": 521,
609
+ "ah": 522,
610
+ "ye": 523,
611
+ "fo": 524,
612
+ "sy": 525,
613
+ "br": 526,
614
+ "dm": 527,
615
+ "sm": 528,
616
+ "go": 529,
617
+ "ĠZ": 530,
618
+ "af": 531,
619
+ "ik": 532,
620
+ "ĠQ": 533,
621
+ "ww": 534,
622
+ "vo": 535,
623
+ "lv": 536,
624
+ "az": 537,
625
+ "tl": 538,
626
+ "lw": 539,
627
+ "na": 540,
628
+ "We": 541,
629
+ "of": 542,
630
+ "If": 543,
631
+ "bi": 544,
632
+ "mm": 545,
633
+ "yn": 546,
634
+ "yd": 547,
635
+ "bl": 548,
636
+ "bu": 549,
637
+ "As": 550,
638
+ "by": 551,
639
+ "sl": 552,
640
+ "xc": 553,
641
+ "ca": 554,
642
+ "SA": 555,
643
+ "NA": 556,
644
+ "nf": 557,
645
+ "Ġz": 558,
646
+ "py": 559,
647
+ "eo": 560,
648
+ "ba": 561,
649
+ "hm": 562,
650
+ "no": 563,
651
+ "tc": 564,
652
+ "ky": 565,
653
+ "dl": 566,
654
+ "ER": 567,
655
+ "do": 568,
656
+ "gl": 569,
657
+ "ai": 570,
658
+ "He": 571,
659
+ "An": 572,
660
+ "ze": 573,
661
+ "rr": 574,
662
+ "kn": 575,
663
+ "fi": 576,
664
+ "Ġi": 577,
665
+ "ws": 578,
666
+ "hs": 579,
667
+ "To": 580,
668
+ "St": 581,
669
+ "ĠX": 582,
670
+ "ek": 583,
671
+ "tn": 584,
672
+ "eu": 585,
673
+ "gt": 586,
674
+ "ju": 587,
675
+ "gg": 588,
676
+ "rb": 589,
677
+ "On": 590,
678
+ "xa": 591,
679
+ "pi": 592,
680
+ "é": 593,
681
+ "ID": 594,
682
+ "ka": 595,
683
+ "nv": 596,
684
+ "cu": 597,
685
+ "ES": 598,
686
+ "gn": 599,
687
+ "cr": 600,
688
+ "AT": 601,
689
+ "ae": 602,
690
+ "ĠÂ": 603,
691
+ "AR": 604,
692
+ "so": 605,
693
+ "yc": 606,
694
+ "hl": 607,
695
+ "dr": 608,
696
+ "xi": 609,
697
+ "uy": 610,
698
+ "IN": 611,
699
+ "yl": 612,
700
+ "ON": 613,
701
+ "Ch": 614,
702
+ "IS": 615,
703
+ "yr": 616,
704
+ "Mo": 617,
705
+ "OR": 618,
706
+ "nl": 619,
707
+ "Al": 620,
708
+ "II": 621,
709
+ "ok": 622,
710
+ "AN": 623,
711
+ "So": 624,
712
+ "yt": 625,
713
+ "pd": 626,
714
+ "fu": 627,
715
+ "IC": 628,
716
+ "lm": 629,
717
+ "ln": 630,
718
+ "wh": 631,
719
+ "hb": 632,
720
+ "AS": 633,
721
+ "uk": 634,
722
+ "EN": 635,
723
+ "IV": 636,
724
+ "gm": 637,
725
+ "da": 638,
726
+ "AC": 639,
727
+ "Ġx": 640,
728
+ "ED": 641,
729
+ "yg": 642,
730
+ "IT": 643,
731
+ "At": 644,
732
+ "mi": 645,
733
+ "By": 646,
734
+ "hu": 647,
735
+ "sn": 648,
736
+ "PA": 649,
737
+ "Ev": 650,
738
+ "lb": 651,
739
+ "bb": 652,
740
+ "kl": 653,
741
+ "Am": 654,
742
+ "oz": 655,
743
+ "ki": 656,
744
+ "AL": 657,
745
+ "oa": 658,
746
+ "oi": 659,
747
+ "ez": 660,
748
+ "hw": 661,
749
+ "Re": 662,
750
+ "HE": 663,
751
+ "RE": 664,
752
+ "ST": 665,
753
+ "gd": 666,
754
+ "oe": 667,
755
+ "DA": 668,
756
+ "Ġâ": 669,
757
+ "rw": 670,
758
+ "Cl": 671,
759
+ "Le": 672,
760
+ "ya": 673,
761
+ "EC": 674,
762
+ "sw": 675,
763
+ "SS": 676,
764
+ "AM": 677,
765
+ "°": 678,
766
+ "uz": 679,
767
+ "à¤": 680,
768
+ "yz": 681,
769
+ "yb": 682,
770
+ "ET": 683,
771
+ "np": 684,
772
+ "dw": 685,
773
+ "EM": 686,
774
+ "dy": 687,
775
+ "my": 688,
776
+ "SB": 689,
777
+ "za": 690,
778
+ "PS": 691,
779
+ "lg": 692,
780
+ "No": 693,
781
+ "Do": 694,
782
+ "CO": 695,
783
+ "Is": 696,
784
+ "dn": 697,
785
+ "bn": 698,
786
+ "RO": 699,
787
+ "ux": 700,
788
+ "Äģ": 701,
789
+ "bt": 702,
790
+ "AD": 703,
791
+ "ĠÃ": 704,
792
+ "CC": 705,
793
+ "nw": 706,
794
+ "Go": 707,
795
+ "AP": 708,
796
+ "EL": 709,
797
+ "OT": 710,
798
+ "dh": 711,
799
+ "ml": 712,
800
+ "Ph": 713,
801
+ "ND": 714,
802
+ "OS": 715,
803
+ "SD": 716,
804
+ "Sh": 717,
805
+ "RI": 718,
806
+ "BC": 719,
807
+ "Sp": 720,
808
+ "Un": 721,
809
+ "sb": 722,
810
+ "ÃŃ": 723,
811
+ "ii": 724,
812
+ "kw": 725,
813
+ "De": 726,
814
+ "DF": 727,
815
+ "OV": 728,
816
+ "ei": 729,
817
+ "US": 730,
818
+ "Dr": 731,
819
+ "MS": 732,
820
+ "Ot": 733,
821
+ "Ab": 734,
822
+ "DC": 735,
823
+ "ĠÎ": 736,
824
+ "HO": 737,
825
+ "Po": 738,
826
+ "mf": 739,
827
+ "Ex": 740,
828
+ "CT": 741,
829
+ "wr": 742,
830
+ "Pl": 743,
831
+ "Te": 744,
832
+ "CE": 745,
833
+ "IL": 746,
834
+ "Ne": 747,
835
+ "bd": 748,
836
+ "hd": 749,
837
+ "My": 750,
838
+ "hn": 751,
839
+ "Se": 752,
840
+ "Su": 753,
841
+ "UR": 754,
842
+ "Ma": 755,
843
+ "jo": 756,
844
+ "LE": 757,
845
+ "df": 758,
846
+ "yw": 759,
847
+ "xf": 760,
848
+ "Us": 761,
849
+ "CS": 762,
850
+ "Qu": 763,
851
+ "CR": 764,
852
+ "tz": 765,
853
+ "Pe": 766,
854
+ "Co": 767,
855
+ "kp": 768,
856
+ "xh": 769,
857
+ "IP": 770,
858
+ "OM": 771,
859
+ "EP": 772,
860
+ "LA": 773,
861
+ "tw": 774,
862
+ "Ju": 775,
863
+ "km": 776,
864
+ "Pr": 777,
865
+ "Ov": 778,
866
+ "Be": 779,
867
+ "Pa": 780,
868
+ "OL": 781,
869
+ "TI": 782,
870
+ "Di": 783,
871
+ "DP": 784,
872
+ "ao": 785,
873
+ "Ar": 786,
874
+ "ML": 787,
875
+ "TS": 788,
876
+ "IA": 789,
877
+ "BS": 790,
878
+ "CA": 791,
879
+ "IR": 792,
880
+ "IG": 793,
881
+ "Sc": 794,
882
+ "Bl": 795,
883
+ "pm": 796,
884
+ "wd": 797,
885
+ "Ha": 798,
886
+ "SP": 799,
887
+ "mn": 800,
888
+ "ij": 801,
889
+ "IM": 802,
890
+ "ua": 803,
891
+ "kt": 804,
892
+ "OP": 805,
893
+ "Li": 806,
894
+ "lc": 807,
895
+ "tp": 808,
896
+ "Me": 809,
897
+ "gf": 810,
898
+ "Or": 811,
899
+ "ĠÐ": 812,
900
+ "CH": 813,
901
+ "dj": 814,
902
+ "kf": 815,
903
+ "kh": 816,
904
+ "UN": 817,
905
+ "RA": 818,
906
+ "sd": 819,
907
+ "OC": 820,
908
+ "tf": 821,
909
+ "Br": 822,
910
+ "Tr": 823,
911
+ "NS": 824,
912
+ "AA": 825,
913
+ "â": 826,
914
+ "kb": 827,
915
+ "wl": 828,
916
+ "Of": 829,
917
+ "Fi": 830,
918
+ "lz": 831,
919
+ "nb": 832,
920
+ "HD": 833,
921
+ "UT": 834,
922
+ "PR": 835,
923
+ "CD": 836,
924
+ "mu": 837,
925
+ "EE": 838,
926
+ "uh": 839,
927
+ "lk": 840,
928
+ "CP": 841,
929
+ "Vi": 842,
930
+ "fs": 843,
931
+ "MA": 844,
932
+ "cC": 845,
933
+ "HA": 846,
934
+ "TP": 847,
935
+ "ä": 848,
936
+ "ko": 849,
937
+ "Fo": 850,
938
+ "Ed": 851,
939
+ "OD": 852,
940
+ "ji": 853,
941
+ "SC": 854,
942
+ "zy": 855,
943
+ "pw": 856,
944
+ "AB": 857,
945
+ "EF": 858,
946
+ "OW": 859,
947
+ "HS": 860,
948
+ "pb": 861,
949
+ "La": 862,
950
+ "zi": 863,
951
+ "zo": 864,
952
+ "MP": 865,
953
+ "nh": 866,
954
+ "RC": 867,
955
+ "hf": 868,
956
+ "DS": 869,
957
+ "Ea": 870,
958
+ "Fr": 871,
959
+ "CL": 872,
960
+ "BT": 873,
961
+ "Ad": 874,
962
+ "uv": 875,
963
+ "SO": 876,
964
+ "IF": 877,
965
+ "EG": 878,
966
+ "а": 879,
967
+ "Ùİ": 880,
968
+ "®": 881,
969
+ "rk": 882,
970
+ "TH": 883,
971
+ "MC": 884,
972
+ "Fa": 885,
973
+ "Sa": 886,
974
+ "pg": 887,
975
+ "ñ": 888,
976
+ "UL": 889,
977
+ "MD": 890,
978
+ "ja": 891,
979
+ "fn": 892,
980
+ "EA": 893,
981
+ "е": 894,
982
+ "Ca": 895,
983
+ "SL": 896,
984
+ "GE": 897,
985
+ "MI": 898,
986
+ "PL": 899,
987
+ "TA": 900,
988
+ "Wr": 901,
989
+ "Sm": 902,
990
+ "Op": 903,
991
+ "Ġ×": 904,
992
+ "SE": 905,
993
+ "à¥": 906,
994
+ "sg": 907,
995
+ "PI": 908,
996
+ "xu": 909,
997
+ "Ta": 910,
998
+ "Gr": 911,
999
+ "zl": 912,
1000
+ "fy": 913,
1001
+ "ĪĴ": 914,
1002
+ "EO": 915,
1003
+ "AF": 916,
1004
+ "DL": 917,
1005
+ "ĠØ": 918,
1006
+ "Ge": 919,
1007
+ "Ac": 920,
1008
+ "hD": 921,
1009
+ "Ke": 922,
1010
+ "è": 923,
1011
+ "NC": 924,
1012
+ "Ĥ¬": 925,
1013
+ "nu": 926,
1014
+ "DR": 927,
1015
+ "PP": 928,
1016
+ "db": 929,
1017
+ "LS": 930,
1018
+ "CB": 931,
1019
+ "и": 932,
1020
+ "TR": 933,
1021
+ "td": 934,
1022
+ "ãģ": 935,
1023
+ "QL": 936,
1024
+ "Ĩij": 937,
1025
+ "ih": 938,
1026
+ "wb": 939,
1027
+ "Ps": 940,
1028
+ "GS": 941,
1029
+ "nk": 942,
1030
+ "PV": 943,
1031
+ "ui": 944,
1032
+ "NT": 945,
1033
+ "RS": 946,
1034
+ "CI": 947,
1035
+ "En": 948,
1036
+ "Fe": 949,
1037
+ "Os": 950,
1038
+ "BI": 951,
1039
+ "Im": 952,
1040
+ "gb": 953,
1041
+ "α": 954,
1042
+ "BA": 955,
1043
+ "SI": 956,
1044
+ "Gu": 957,
1045
+ "UC": 958,
1046
+ "Hz": 959,
1047
+ "iy": 960,
1048
+ "SM": 961,
1049
+ "Bu": 962,
1050
+ "HT": 963,
1051
+ "Jo": 964,
1052
+ "Fl": 965,
1053
+ "LL": 966,
1054
+ "BM": 967,
1055
+ "Kn": 968,
1056
+ "Eu": 969,
1057
+ "TE": 970,
1058
+ "iu": 971,
1059
+ "Lo": 972,
1060
+ "PU": 973,
1061
+ "NE": 974,
1062
+ "TC": 975,
1063
+ "uf": 976,
1064
+ "wi": 977,
1065
+ "SF": 978,
1066
+ "tg": 979,
1067
+ "©": 980,
1068
+ "ÙĦ": 981,
1069
+ "Bi": 982,
1070
+ "UM": 983,
1071
+ "Ob": 984,
1072
+ "AV": 985,
1073
+ "Ra": 986,
1074
+ "CM": 987,
1075
+ "vy": 988,
1076
+ "RL": 989,
1077
+ "BD": 990,
1078
+ "ÑĤ": 991,
1079
+ "RT": 992,
1080
+ "Cr": 993,
1081
+ "PD": 994,
1082
+ "EV": 995,
1083
+ "cm": 996,
1084
+ "xy": 997,
1085
+ "dg": 998,
1086
+ "âĦ": 999,
1087
+ "rh": 1000,
1088
+ "PT": 1001,
1089
+ "ÑĢ": 1002,
1090
+ "PC": 1003,
1091
+ "LC": 1004,
1092
+ "OU": 1005,
1093
+ "pc": 1006,
1094
+ "nj": 1007,
1095
+ "ĠÏ": 1008,
1096
+ "wf": 1009,
1097
+ "Mu": 1010,
1098
+ "FA": 1011,
1099
+ "Ñģ": 1012,
1100
+ "ME": 1013,
1101
+ "rp": 1014,
1102
+ "VD": 1015,
1103
+ "PM": 1016,
1104
+ "FC": 1017,
1105
+ "Åį": 1018,
1106
+ "tb": 1019,
1107
+ "sv": 1020,
1108
+ "kg": 1021,
1109
+ "WS": 1022,
1110
+ "HC": 1023
1111
+ },
1112
+ "merges": [
1113
+ "Ġ t",
1114
+ "Ġ a",
1115
+ "h e",
1116
+ "i n",
1117
+ "r e",
1118
+ "o n",
1119
+ "e r",
1120
+ "Ġ o",
1121
+ "a t",
1122
+ "Ġ s",
1123
+ "e n",
1124
+ "Ġ c",
1125
+ "Ġ w",
1126
+ "e s",
1127
+ "i s",
1128
+ "n d",
1129
+ "i t",
1130
+ "o r",
1131
+ "Ġ p",
1132
+ "a l",
1133
+ "Ġ b",
1134
+ "e d",
1135
+ "a n",
1136
+ "Ġ f",
1137
+ "o u",
1138
+ "a r",
1139
+ "Ġ m",
1140
+ "i c",
1141
+ "Ġ d",
1142
+ "l e",
1143
+ "r o",
1144
+ "a s",
1145
+ "Ġ h",
1146
+ "c t",
1147
+ "Ġ e",
1148
+ "i l",
1149
+ "o m",
1150
+ "v e",
1151
+ "Ġ l",
1152
+ "s t",
1153
+ "Ġ n",
1154
+ "l y",
1155
+ "â Ģ",
1156
+ "e t",
1157
+ "Ġ T",
1158
+ "o l",
1159
+ "s e",
1160
+ "Ġ g",
1161
+ "i m",
1162
+ "i d",
1163
+ "o t",
1164
+ "u t",
1165
+ "o w",
1166
+ "c e",
1167
+ "u r",
1168
+ "r a",
1169
+ "c h",
1170
+ "Ġ A",
1171
+ "i g",
1172
+ "Ġ S",
1173
+ "Ġ u",
1174
+ "Ġ C",
1175
+ "i r",
1176
+ "Ġ I",
1177
+ "Ġ y",
1178
+ "t s",
1179
+ "e l",
1180
+ "u l",
1181
+ "a y",
1182
+ "a m",
1183
+ "a d",
1184
+ "i f",
1185
+ "o d",
1186
+ "Ġ M",
1187
+ "o p",
1188
+ "i v",
1189
+ "g e",
1190
+ "t h",
1191
+ "e c",
1192
+ "Ġ P",
1193
+ "Ġ v",
1194
+ "u m",
1195
+ "a b",
1196
+ "Ġ B",
1197
+ "e m",
1198
+ "u s",
1199
+ "t i",
1200
+ "a c",
1201
+ "Ġ H",
1202
+ "o s",
1203
+ "l d",
1204
+ "k e",
1205
+ "n t",
1206
+ "q u",
1207
+ "Ġ W",
1208
+ "r i",
1209
+ "p p",
1210
+ "Ġ D",
1211
+ "u d",
1212
+ "Ġ E",
1213
+ "o c",
1214
+ "Ġ R",
1215
+ "Ġ r",
1216
+ "Ġ F",
1217
+ "s i",
1218
+ "u n",
1219
+ "f f",
1220
+ "l l",
1221
+ "o g",
1222
+ "Ġ G",
1223
+ "Ġ N",
1224
+ "l s",
1225
+ "e w",
1226
+ "h t",
1227
+ "h o",
1228
+ "Ġ L",
1229
+ "t e",
1230
+ "t a",
1231
+ "s s",
1232
+ "l a",
1233
+ "d u",
1234
+ "a p",
1235
+ "e v",
1236
+ "e p",
1237
+ "Ġ O",
1238
+ "Ġ J",
1239
+ "t r",
1240
+ "u b",
1241
+ "Ġ k",
1242
+ "x p",
1243
+ "a g",
1244
+ "a k",
1245
+ "Ġ U",
1246
+ "i z",
1247
+ "g h",
1248
+ "f t",
1249
+ "h i",
1250
+ "c c",
1251
+ "o v",
1252
+ "e f",
1253
+ "c i",
1254
+ "o o",
1255
+ "d s",
1256
+ "l i",
1257
+ "p e",
1258
+ "t o",
1259
+ "k s",
1260
+ "f e",
1261
+ "l o",
1262
+ "a v",
1263
+ "i a",
1264
+ "c l",
1265
+ "e g",
1266
+ "a u",
1267
+ "g s",
1268
+ "r y",
1269
+ "r u",
1270
+ "Ġ j",
1271
+ "h r",
1272
+ "u e",
1273
+ "Ġ K",
1274
+ "p t",
1275
+ "h y",
1276
+ "x t",
1277
+ "e e",
1278
+ "p l",
1279
+ "t t",
1280
+ "p a",
1281
+ "r s",
1282
+ "m s",
1283
+ "n e",
1284
+ "p s",
1285
+ "Ġ V",
1286
+ "n y",
1287
+ "e a",
1288
+ "w e",
1289
+ "s p",
1290
+ "j e",
1291
+ "n g",
1292
+ "Ġ Y",
1293
+ "i b",
1294
+ "w o",
1295
+ "t y",
1296
+ "n s",
1297
+ "t u",
1298
+ "d d",
1299
+ "p h",
1300
+ "m a",
1301
+ "s u",
1302
+ "u c",
1303
+ "r t",
1304
+ "o b",
1305
+ "w n",
1306
+ "l p",
1307
+ "u g",
1308
+ "e b",
1309
+ "c y",
1310
+ "i p",
1311
+ "h a",
1312
+ "r g",
1313
+ "I n",
1314
+ "i e",
1315
+ "a w",
1316
+ "g a",
1317
+ "y p",
1318
+ "y m",
1319
+ "n c",
1320
+ "u p",
1321
+ "l u",
1322
+ "v i",
1323
+ "b s",
1324
+ "d e",
1325
+ "y s",
1326
+ "c k",
1327
+ "T h",
1328
+ "m o",
1329
+ "m b",
1330
+ "d v",
1331
+ "g u",
1332
+ "m p",
1333
+ "d i",
1334
+ "p r",
1335
+ "n i",
1336
+ "g y",
1337
+ "n m",
1338
+ "c o",
1339
+ "f a",
1340
+ "e x",
1341
+ "s c",
1342
+ "e h",
1343
+ "m e",
1344
+ "r d",
1345
+ "r n",
1346
+ "g r",
1347
+ "e y",
1348
+ "g i",
1349
+ "s a",
1350
+ "t m",
1351
+ "l f",
1352
+ "o x",
1353
+ "r c",
1354
+ "i x",
1355
+ "p o",
1356
+ "a j",
1357
+ "v a",
1358
+ "b e",
1359
+ "b o",
1360
+ "w a",
1361
+ "l t",
1362
+ "o y",
1363
+ "a x",
1364
+ "i o",
1365
+ "f r",
1366
+ "s h",
1367
+ "s k",
1368
+ "s f",
1369
+ "r m",
1370
+ "I t",
1371
+ "W h",
1372
+ "o h",
1373
+ "f l",
1374
+ "n n",
1375
+ "a h",
1376
+ "y e",
1377
+ "f o",
1378
+ "s y",
1379
+ "b r",
1380
+ "d m",
1381
+ "s m",
1382
+ "g o",
1383
+ "Ġ Z",
1384
+ "a f",
1385
+ "i k",
1386
+ "Ġ Q",
1387
+ "w w",
1388
+ "v o",
1389
+ "l v",
1390
+ "a z",
1391
+ "t l",
1392
+ "l w",
1393
+ "n a",
1394
+ "W e",
1395
+ "o f",
1396
+ "I f",
1397
+ "b i",
1398
+ "m m",
1399
+ "y n",
1400
+ "y d",
1401
+ "b l",
1402
+ "b u",
1403
+ "A s",
1404
+ "b y",
1405
+ "s l",
1406
+ "x c",
1407
+ "c a",
1408
+ "S A",
1409
+ "N A",
1410
+ "n f",
1411
+ "Ġ z",
1412
+ "p y",
1413
+ "e o",
1414
+ "b a",
1415
+ "h m",
1416
+ "n o",
1417
+ "t c",
1418
+ "k y",
1419
+ "d l",
1420
+ "E R",
1421
+ "d o",
1422
+ "g l",
1423
+ "a i",
1424
+ "H e",
1425
+ "A n",
1426
+ "z e",
1427
+ "r r",
1428
+ "k n",
1429
+ "f i",
1430
+ "Ġ i",
1431
+ "w s",
1432
+ "h s",
1433
+ "T o",
1434
+ "S t",
1435
+ "Ġ X",
1436
+ "e k",
1437
+ "t n",
1438
+ "e u",
1439
+ "g t",
1440
+ "j u",
1441
+ "g g",
1442
+ "r b",
1443
+ "O n",
1444
+ "x a",
1445
+ "p i",
1446
+ "Ã ©",
1447
+ "I D",
1448
+ "k a",
1449
+ "n v",
1450
+ "c u",
1451
+ "E S",
1452
+ "g n",
1453
+ "c r",
1454
+ "A T",
1455
+ "a e",
1456
+ "Ġ Â",
1457
+ "A R",
1458
+ "s o",
1459
+ "y c",
1460
+ "h l",
1461
+ "d r",
1462
+ "x i",
1463
+ "u y",
1464
+ "I N",
1465
+ "y l",
1466
+ "O N",
1467
+ "C h",
1468
+ "I S",
1469
+ "y r",
1470
+ "M o",
1471
+ "O R",
1472
+ "n l",
1473
+ "A l",
1474
+ "I I",
1475
+ "o k",
1476
+ "A N",
1477
+ "S o",
1478
+ "y t",
1479
+ "p d",
1480
+ "f u",
1481
+ "I C",
1482
+ "l m",
1483
+ "l n",
1484
+ "w h",
1485
+ "h b",
1486
+ "A S",
1487
+ "u k",
1488
+ "E N",
1489
+ "I V",
1490
+ "g m",
1491
+ "d a",
1492
+ "A C",
1493
+ "Ġ x",
1494
+ "E D",
1495
+ "y g",
1496
+ "I T",
1497
+ "A t",
1498
+ "m i",
1499
+ "B y",
1500
+ "h u",
1501
+ "s n",
1502
+ "P A",
1503
+ "E v",
1504
+ "l b",
1505
+ "b b",
1506
+ "k l",
1507
+ "A m",
1508
+ "o z",
1509
+ "k i",
1510
+ "A L",
1511
+ "o a",
1512
+ "o i",
1513
+ "e z",
1514
+ "h w",
1515
+ "R e",
1516
+ "H E",
1517
+ "R E",
1518
+ "S T",
1519
+ "g d",
1520
+ "o e",
1521
+ "D A",
1522
+ "Ġ â",
1523
+ "r w",
1524
+ "C l",
1525
+ "L e",
1526
+ "y a",
1527
+ "E C",
1528
+ "s w",
1529
+ "S S",
1530
+ "A M",
1531
+ "Â °",
1532
+ "u z",
1533
+ "à ¤",
1534
+ "y z",
1535
+ "y b",
1536
+ "E T",
1537
+ "n p",
1538
+ "d w",
1539
+ "E M",
1540
+ "d y",
1541
+ "m y",
1542
+ "S B",
1543
+ "z a",
1544
+ "P S",
1545
+ "l g",
1546
+ "N o",
1547
+ "D o",
1548
+ "C O",
1549
+ "I s",
1550
+ "d n",
1551
+ "b n",
1552
+ "R O",
1553
+ "u x",
1554
+ "Ä ģ",
1555
+ "b t",
1556
+ "A D",
1557
+ "Ġ Ã",
1558
+ "C C",
1559
+ "n w",
1560
+ "G o",
1561
+ "A P",
1562
+ "E L",
1563
+ "O T",
1564
+ "d h",
1565
+ "m l",
1566
+ "P h",
1567
+ "N D",
1568
+ "O S",
1569
+ "S D",
1570
+ "S h",
1571
+ "R I",
1572
+ "B C",
1573
+ "S p",
1574
+ "U n",
1575
+ "s b",
1576
+ "Ã Ń",
1577
+ "i i",
1578
+ "k w",
1579
+ "D e",
1580
+ "D F",
1581
+ "O V",
1582
+ "e i",
1583
+ "U S",
1584
+ "D r",
1585
+ "M S",
1586
+ "O t",
1587
+ "A b",
1588
+ "D C",
1589
+ "Ġ Î",
1590
+ "H O",
1591
+ "P o",
1592
+ "m f",
1593
+ "E x",
1594
+ "C T",
1595
+ "w r",
1596
+ "P l",
1597
+ "T e",
1598
+ "C E",
1599
+ "I L",
1600
+ "N e",
1601
+ "b d",
1602
+ "h d",
1603
+ "M y",
1604
+ "h n",
1605
+ "S e",
1606
+ "S u",
1607
+ "U R",
1608
+ "M a",
1609
+ "j o",
1610
+ "L E",
1611
+ "d f",
1612
+ "y w",
1613
+ "x f",
1614
+ "U s",
1615
+ "C S",
1616
+ "Q u",
1617
+ "C R",
1618
+ "t z",
1619
+ "P e",
1620
+ "C o",
1621
+ "k p",
1622
+ "x h",
1623
+ "I P",
1624
+ "O M",
1625
+ "E P",
1626
+ "L A",
1627
+ "t w",
1628
+ "J u",
1629
+ "k m",
1630
+ "P r",
1631
+ "O v",
1632
+ "B e",
1633
+ "P a",
1634
+ "O L",
1635
+ "T I",
1636
+ "D i",
1637
+ "D P",
1638
+ "a o",
1639
+ "A r",
1640
+ "M L",
1641
+ "T S",
1642
+ "I A",
1643
+ "B S",
1644
+ "C A",
1645
+ "I R",
1646
+ "I G",
1647
+ "S c",
1648
+ "B l",
1649
+ "p m",
1650
+ "w d",
1651
+ "H a",
1652
+ "S P",
1653
+ "m n",
1654
+ "i j",
1655
+ "I M",
1656
+ "u a",
1657
+ "k t",
1658
+ "O P",
1659
+ "L i",
1660
+ "l c",
1661
+ "t p",
1662
+ "M e",
1663
+ "g f",
1664
+ "O r",
1665
+ "Ġ Ð",
1666
+ "C H",
1667
+ "d j",
1668
+ "k f",
1669
+ "k h",
1670
+ "U N",
1671
+ "R A",
1672
+ "s d",
1673
+ "O C",
1674
+ "t f",
1675
+ "B r",
1676
+ "T r",
1677
+ "N S",
1678
+ "A A",
1679
+ "Ã ¢",
1680
+ "k b",
1681
+ "w l",
1682
+ "O f",
1683
+ "F i",
1684
+ "l z",
1685
+ "n b",
1686
+ "H D",
1687
+ "U T",
1688
+ "P R",
1689
+ "C D",
1690
+ "m u",
1691
+ "E E",
1692
+ "u h",
1693
+ "l k",
1694
+ "C P",
1695
+ "V i",
1696
+ "f s",
1697
+ "M A",
1698
+ "c C",
1699
+ "H A",
1700
+ "T P",
1701
+ "Ã ¤",
1702
+ "k o",
1703
+ "F o",
1704
+ "E d",
1705
+ "O D",
1706
+ "j i",
1707
+ "S C",
1708
+ "z y",
1709
+ "p w",
1710
+ "A B",
1711
+ "E F",
1712
+ "O W",
1713
+ "H S",
1714
+ "p b",
1715
+ "L a",
1716
+ "z i",
1717
+ "z o",
1718
+ "M P",
1719
+ "n h",
1720
+ "R C",
1721
+ "h f",
1722
+ "D S",
1723
+ "E a",
1724
+ "F r",
1725
+ "C L",
1726
+ "B T",
1727
+ "A d",
1728
+ "u v",
1729
+ "S O",
1730
+ "I F",
1731
+ "E G",
1732
+ "Ð °",
1733
+ "Ù İ",
1734
+ "Â ®",
1735
+ "r k",
1736
+ "T H",
1737
+ "M C",
1738
+ "F a",
1739
+ "S a",
1740
+ "p g",
1741
+ "Ã ±",
1742
+ "U L",
1743
+ "M D",
1744
+ "j a",
1745
+ "f n",
1746
+ "E A",
1747
+ "Ð µ",
1748
+ "C a",
1749
+ "S L",
1750
+ "G E",
1751
+ "M I",
1752
+ "P L",
1753
+ "T A",
1754
+ "W r",
1755
+ "S m",
1756
+ "O p",
1757
+ "Ġ ×",
1758
+ "S E",
1759
+ "à ¥",
1760
+ "s g",
1761
+ "P I",
1762
+ "x u",
1763
+ "T a",
1764
+ "G r",
1765
+ "z l",
1766
+ "f y",
1767
+ "Ī Ĵ",
1768
+ "E O",
1769
+ "A F",
1770
+ "D L",
1771
+ "Ġ Ø",
1772
+ "G e",
1773
+ "A c",
1774
+ "h D",
1775
+ "K e",
1776
+ "Ã ¨",
1777
+ "N C",
1778
+ "Ĥ ¬",
1779
+ "n u",
1780
+ "D R",
1781
+ "P P",
1782
+ "d b",
1783
+ "L S",
1784
+ "C B",
1785
+ "Ð ¸",
1786
+ "T R",
1787
+ "t d",
1788
+ "ã ģ",
1789
+ "Q L",
1790
+ "Ĩ ij",
1791
+ "i h",
1792
+ "w b",
1793
+ "P s",
1794
+ "G S",
1795
+ "n k",
1796
+ "P V",
1797
+ "u i",
1798
+ "N T",
1799
+ "R S",
1800
+ "C I",
1801
+ "E n",
1802
+ "F e",
1803
+ "O s",
1804
+ "B I",
1805
+ "I m",
1806
+ "g b",
1807
+ "Î ±",
1808
+ "B A",
1809
+ "S I",
1810
+ "G u",
1811
+ "U C",
1812
+ "H z",
1813
+ "i y",
1814
+ "S M",
1815
+ "B u",
1816
+ "H T",
1817
+ "J o",
1818
+ "F l",
1819
+ "L L",
1820
+ "B M",
1821
+ "K n",
1822
+ "E u",
1823
+ "T E",
1824
+ "i u",
1825
+ "L o",
1826
+ "P U",
1827
+ "N E",
1828
+ "T C",
1829
+ "u f",
1830
+ "w i",
1831
+ "S F",
1832
+ "t g",
1833
+ "Â ©",
1834
+ "Ù Ħ",
1835
+ "B i",
1836
+ "U M",
1837
+ "O b",
1838
+ "A V",
1839
+ "R a",
1840
+ "C M",
1841
+ "v y",
1842
+ "R L",
1843
+ "B D",
1844
+ "Ñ Ĥ",
1845
+ "R T",
1846
+ "C r",
1847
+ "P D",
1848
+ "E V",
1849
+ "c m",
1850
+ "x y",
1851
+ "d g",
1852
+ "â Ħ",
1853
+ "r h",
1854
+ "P T",
1855
+ "Ñ Ģ",
1856
+ "P C",
1857
+ "L C",
1858
+ "O U",
1859
+ "p c",
1860
+ "n j",
1861
+ "Ġ Ï",
1862
+ "w f",
1863
+ "M u",
1864
+ "F A",
1865
+ "Ñ ģ",
1866
+ "M E",
1867
+ "r p",
1868
+ "V D",
1869
+ "P M",
1870
+ "F C",
1871
+ "Å į",
1872
+ "t b",
1873
+ "s v",
1874
+ "k g",
1875
+ "W S",
1876
+ "H C"
1877
+ ]
1878
+ }
1879
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[UNK]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[PAD]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[BOS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[EOS]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ }
35
+ },
36
+ "bos_token": "[BOS]",
37
+ "clean_up_tokenization_spaces": true,
38
+ "eos_token": "[EOS]",
39
+ "model_max_length": 1000000000000000019884624838656,
40
+ "pad_token": "[PAD]",
41
+ "tokenizer_class": "PreTrainedTokenizerFast",
42
+ "unk_token": "[UNK]"
43
+ }