mjbuehler commited on
Commit
14ea26c
1 Parent(s): cf5b59d

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +1 -0
  2. tokenizer.json +1871 -0
  3. tokenizer_config.json +5 -0
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
tokenizer.json ADDED
@@ -0,0 +1,1871 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "<s>",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "<pad>",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 2,
26
+ "content": "</s>",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ },
33
+ {
34
+ "id": 3,
35
+ "content": "<unk>",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ },
42
+ {
43
+ "id": 4,
44
+ "content": "<mask>",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
+ }
51
+ ],
52
+ "normalizer": null,
53
+ "pre_tokenizer": {
54
+ "type": "ByteLevel",
55
+ "add_prefix_space": false,
56
+ "trim_offsets": true,
57
+ "use_regex": true
58
+ },
59
+ "post_processor": {
60
+ "type": "ByteLevel",
61
+ "add_prefix_space": true,
62
+ "trim_offsets": false,
63
+ "use_regex": true
64
+ },
65
+ "decoder": {
66
+ "type": "ByteLevel",
67
+ "add_prefix_space": true,
68
+ "trim_offsets": true,
69
+ "use_regex": true
70
+ },
71
+ "model": {
72
+ "type": "BPE",
73
+ "dropout": null,
74
+ "unk_token": null,
75
+ "continuing_subword_prefix": null,
76
+ "end_of_word_suffix": null,
77
+ "fuse_unk": false,
78
+ "byte_fallback": false,
79
+ "vocab": {
80
+ "<s>": 0,
81
+ "<pad>": 1,
82
+ "</s>": 2,
83
+ "<unk>": 3,
84
+ "<mask>": 4,
85
+ "!": 5,
86
+ "\"": 6,
87
+ "#": 7,
88
+ "$": 8,
89
+ "%": 9,
90
+ "&": 10,
91
+ "'": 11,
92
+ "(": 12,
93
+ ")": 13,
94
+ "*": 14,
95
+ "+": 15,
96
+ ",": 16,
97
+ "-": 17,
98
+ ".": 18,
99
+ "/": 19,
100
+ "0": 20,
101
+ "1": 21,
102
+ "2": 22,
103
+ "3": 23,
104
+ "4": 24,
105
+ "5": 25,
106
+ "6": 26,
107
+ "7": 27,
108
+ "8": 28,
109
+ "9": 29,
110
+ ":": 30,
111
+ ";": 31,
112
+ "<": 32,
113
+ "=": 33,
114
+ ">": 34,
115
+ "?": 35,
116
+ "@": 36,
117
+ "A": 37,
118
+ "B": 38,
119
+ "C": 39,
120
+ "D": 40,
121
+ "E": 41,
122
+ "F": 42,
123
+ "G": 43,
124
+ "H": 44,
125
+ "I": 45,
126
+ "J": 46,
127
+ "K": 47,
128
+ "L": 48,
129
+ "M": 49,
130
+ "N": 50,
131
+ "O": 51,
132
+ "P": 52,
133
+ "Q": 53,
134
+ "R": 54,
135
+ "S": 55,
136
+ "T": 56,
137
+ "U": 57,
138
+ "V": 58,
139
+ "W": 59,
140
+ "X": 60,
141
+ "Y": 61,
142
+ "Z": 62,
143
+ "[": 63,
144
+ "\\": 64,
145
+ "]": 65,
146
+ "^": 66,
147
+ "_": 67,
148
+ "`": 68,
149
+ "a": 69,
150
+ "b": 70,
151
+ "c": 71,
152
+ "d": 72,
153
+ "e": 73,
154
+ "f": 74,
155
+ "g": 75,
156
+ "h": 76,
157
+ "i": 77,
158
+ "j": 78,
159
+ "k": 79,
160
+ "l": 80,
161
+ "m": 81,
162
+ "n": 82,
163
+ "o": 83,
164
+ "p": 84,
165
+ "q": 85,
166
+ "r": 86,
167
+ "s": 87,
168
+ "t": 88,
169
+ "u": 89,
170
+ "v": 90,
171
+ "w": 91,
172
+ "x": 92,
173
+ "y": 93,
174
+ "z": 94,
175
+ "{": 95,
176
+ "|": 96,
177
+ "}": 97,
178
+ "~": 98,
179
+ "¡": 99,
180
+ "¢": 100,
181
+ "£": 101,
182
+ "¤": 102,
183
+ "¥": 103,
184
+ "¦": 104,
185
+ "§": 105,
186
+ "¨": 106,
187
+ "©": 107,
188
+ "ª": 108,
189
+ "«": 109,
190
+ "¬": 110,
191
+ "®": 111,
192
+ "¯": 112,
193
+ "°": 113,
194
+ "±": 114,
195
+ "²": 115,
196
+ "³": 116,
197
+ "´": 117,
198
+ "µ": 118,
199
+ "¶": 119,
200
+ "·": 120,
201
+ "¸": 121,
202
+ "¹": 122,
203
+ "º": 123,
204
+ "»": 124,
205
+ "¼": 125,
206
+ "½": 126,
207
+ "¾": 127,
208
+ "¿": 128,
209
+ "À": 129,
210
+ "Á": 130,
211
+ "Â": 131,
212
+ "Ã": 132,
213
+ "Ä": 133,
214
+ "Å": 134,
215
+ "Æ": 135,
216
+ "Ç": 136,
217
+ "È": 137,
218
+ "É": 138,
219
+ "Ê": 139,
220
+ "Ë": 140,
221
+ "Ì": 141,
222
+ "Í": 142,
223
+ "Î": 143,
224
+ "Ï": 144,
225
+ "Ð": 145,
226
+ "Ñ": 146,
227
+ "Ò": 147,
228
+ "Ó": 148,
229
+ "Ô": 149,
230
+ "Õ": 150,
231
+ "Ö": 151,
232
+ "×": 152,
233
+ "Ø": 153,
234
+ "Ù": 154,
235
+ "Ú": 155,
236
+ "Û": 156,
237
+ "Ü": 157,
238
+ "Ý": 158,
239
+ "Þ": 159,
240
+ "ß": 160,
241
+ "à": 161,
242
+ "á": 162,
243
+ "â": 163,
244
+ "ã": 164,
245
+ "ä": 165,
246
+ "å": 166,
247
+ "æ": 167,
248
+ "ç": 168,
249
+ "è": 169,
250
+ "é": 170,
251
+ "ê": 171,
252
+ "ë": 172,
253
+ "ì": 173,
254
+ "í": 174,
255
+ "î": 175,
256
+ "ï": 176,
257
+ "ð": 177,
258
+ "ñ": 178,
259
+ "ò": 179,
260
+ "ó": 180,
261
+ "ô": 181,
262
+ "õ": 182,
263
+ "ö": 183,
264
+ "÷": 184,
265
+ "ø": 185,
266
+ "ù": 186,
267
+ "ú": 187,
268
+ "û": 188,
269
+ "ü": 189,
270
+ "ý": 190,
271
+ "þ": 191,
272
+ "ÿ": 192,
273
+ "Ā": 193,
274
+ "ā": 194,
275
+ "Ă": 195,
276
+ "ă": 196,
277
+ "Ą": 197,
278
+ "ą": 198,
279
+ "Ć": 199,
280
+ "ć": 200,
281
+ "Ĉ": 201,
282
+ "ĉ": 202,
283
+ "Ċ": 203,
284
+ "ċ": 204,
285
+ "Č": 205,
286
+ "č": 206,
287
+ "Ď": 207,
288
+ "ď": 208,
289
+ "Đ": 209,
290
+ "đ": 210,
291
+ "Ē": 211,
292
+ "ē": 212,
293
+ "Ĕ": 213,
294
+ "ĕ": 214,
295
+ "Ė": 215,
296
+ "ė": 216,
297
+ "Ę": 217,
298
+ "ę": 218,
299
+ "Ě": 219,
300
+ "ě": 220,
301
+ "Ĝ": 221,
302
+ "ĝ": 222,
303
+ "Ğ": 223,
304
+ "ğ": 224,
305
+ "Ġ": 225,
306
+ "ġ": 226,
307
+ "Ģ": 227,
308
+ "ģ": 228,
309
+ "Ĥ": 229,
310
+ "ĥ": 230,
311
+ "Ħ": 231,
312
+ "ħ": 232,
313
+ "Ĩ": 233,
314
+ "ĩ": 234,
315
+ "Ī": 235,
316
+ "ī": 236,
317
+ "Ĭ": 237,
318
+ "ĭ": 238,
319
+ "Į": 239,
320
+ "į": 240,
321
+ "İ": 241,
322
+ "ı": 242,
323
+ "IJ": 243,
324
+ "ij": 244,
325
+ "Ĵ": 245,
326
+ "ĵ": 246,
327
+ "Ķ": 247,
328
+ "ķ": 248,
329
+ "ĸ": 249,
330
+ "Ĺ": 250,
331
+ "ĺ": 251,
332
+ "Ļ": 252,
333
+ "ļ": 253,
334
+ "Ľ": 254,
335
+ "ľ": 255,
336
+ "Ŀ": 256,
337
+ "ŀ": 257,
338
+ "Ł": 258,
339
+ "ł": 259,
340
+ "Ń": 260,
341
+ "Se": 261,
342
+ "ce": 262,
343
+ "en": 263,
344
+ "qu": 264,
345
+ "Sequ": 265,
346
+ "ence": 266,
347
+ "Sequence": 267,
348
+ "RR": 268,
349
+ "LL": 269,
350
+ "KK": 270,
351
+ "KR": 271,
352
+ "LA": 272,
353
+ "LV": 273,
354
+ "AA": 274,
355
+ "GR": 275,
356
+ "SS": 276,
357
+ "KV": 277,
358
+ "LI": 278,
359
+ "LS": 279,
360
+ "LG": 280,
361
+ "KI": 281,
362
+ "MA": 282,
363
+ "TV": 283,
364
+ "KA": 284,
365
+ "LF": 285,
366
+ "EE": 286,
367
+ "SV": 287,
368
+ "LR": 288,
369
+ "KT": 289,
370
+ "SG": 290,
371
+ "LT": 291,
372
+ "VV": 292,
373
+ "KN": 293,
374
+ "KG": 294,
375
+ "KE": 295,
376
+ "KS": 296,
377
+ "II": 297,
378
+ "GG": 298,
379
+ "FF": 299,
380
+ "VI": 300,
381
+ "KC": 301,
382
+ "LN": 302,
383
+ "TA": 303,
384
+ "LP": 304,
385
+ "KQ": 305,
386
+ "LE": 306,
387
+ "TT": 307,
388
+ "CG": 308,
389
+ "RA": 309,
390
+ "KP": 310,
391
+ "SR": 311,
392
+ "IA": 312,
393
+ "SA": 313,
394
+ "TG": 314,
395
+ "VA": 315,
396
+ "FG": 316,
397
+ "SN": 317,
398
+ "DP": 318,
399
+ "EA": 319,
400
+ "CP": 320,
401
+ "LQ": 321,
402
+ "IG": 322,
403
+ "RG": 323,
404
+ "DG": 324,
405
+ "SC": 325,
406
+ "SP": 326,
407
+ "TP": 327,
408
+ "SI": 328,
409
+ "DA": 329,
410
+ "RV": 330,
411
+ "NN": 331,
412
+ "SF": 332,
413
+ "RE": 333,
414
+ "LY": 334,
415
+ "RQ": 335,
416
+ "KH": 336,
417
+ "ST": 337,
418
+ "VP": 338,
419
+ "IV": 339,
420
+ "SH": 340,
421
+ "IN": 341,
422
+ "DE": 342,
423
+ "AG": 343,
424
+ "IP": 344,
425
+ "KY": 345,
426
+ "CC": 346,
427
+ "KF": 347,
428
+ "QQ": 348,
429
+ "IF": 349,
430
+ "NP": 350,
431
+ "KD": 351,
432
+ "LD": 352,
433
+ "SD": 353,
434
+ "SE": 354,
435
+ "QP": 355,
436
+ "VE": 356,
437
+ "IC": 357,
438
+ "GF": 358,
439
+ "TR": 359,
440
+ "LC": 360,
441
+ "NA": 361,
442
+ "VG": 362,
443
+ "MP": 363,
444
+ "TF": 364,
445
+ "SQ": 365,
446
+ "TI": 366,
447
+ "ME": 367,
448
+ "VF": 368,
449
+ "SY": 369,
450
+ "NG": 370,
451
+ "VR": 371,
452
+ "IE": 372,
453
+ "DD": 373,
454
+ "TY": 374,
455
+ "TE": 375,
456
+ "LH": 376,
457
+ "RP": 377,
458
+ "RN": 378,
459
+ "NE": 379,
460
+ "YG": 380,
461
+ "TC": 381,
462
+ "MG": 382,
463
+ "TQ": 383,
464
+ "VC": 384,
465
+ "RI": 385,
466
+ "AV": 386,
467
+ "MV": 387,
468
+ "AI": 388,
469
+ "TH": 389,
470
+ "RH": 390,
471
+ "LW": 391,
472
+ "QE": 392,
473
+ "FY": 393,
474
+ "LKK": 394,
475
+ "FV": 395,
476
+ "MR": 396,
477
+ "PA": 397,
478
+ "MQ": 398,
479
+ "TS": 399,
480
+ "PP": 400,
481
+ "KM": 401,
482
+ "NQ": 402,
483
+ "LM": 403,
484
+ "RC": 404,
485
+ "KGR": 405,
486
+ "YY": 406,
487
+ "EG": 407,
488
+ "DV": 408,
489
+ "YP": 409,
490
+ "TN": 410,
491
+ "DC": 411,
492
+ "FA": 412,
493
+ "QA": 413,
494
+ "YA": 414,
495
+ "HH": 415,
496
+ "RRR": 416,
497
+ "IT": 417,
498
+ "LK": 418,
499
+ "RF": 419,
500
+ "PG": 420,
501
+ "HG": 421,
502
+ "IY": 422,
503
+ "MKV": 423,
504
+ "NV": 424,
505
+ "MD": 425,
506
+ "CE": 426,
507
+ "CA": 427,
508
+ "MF": 428,
509
+ "TD": 429,
510
+ "QG": 430,
511
+ "KRR": 431,
512
+ "MN": 432,
513
+ "LRR": 433,
514
+ "IR": 434,
515
+ "GA": 435,
516
+ "DR": 436,
517
+ "IQ": 437,
518
+ "NR": 438,
519
+ "VY": 439,
520
+ "FP": 440,
521
+ "SW": 441,
522
+ "DQ": 442,
523
+ "SK": 443,
524
+ "RRRR": 444,
525
+ "WG": 445,
526
+ "RQG": 446,
527
+ "HR": 447,
528
+ "FC": 448,
529
+ "PV": 449,
530
+ "ED": 450,
531
+ "MKR": 451,
532
+ "YC": 452,
533
+ "QV": 453,
534
+ "NI": 454,
535
+ "MI": 455,
536
+ "EV": 456,
537
+ "NC": 457,
538
+ "TK": 458,
539
+ "YR": 459,
540
+ "GV": 460,
541
+ "GE": 461,
542
+ "MT": 462,
543
+ "DF": 463,
544
+ "YV": 464,
545
+ "QF": 465,
546
+ "DI": 466,
547
+ "LAV": 467,
548
+ "KIK": 468,
549
+ "GC": 469,
550
+ "GT": 470,
551
+ "HA": 471,
552
+ "MS": 472,
553
+ "EK": 473,
554
+ "DN": 474,
555
+ "AR": 475,
556
+ "CR": 476,
557
+ "KQRQG": 477,
558
+ "FR": 478,
559
+ "PE": 479,
560
+ "LLV": 480,
561
+ "GI": 481,
562
+ "QN": 482,
563
+ "AE": 483,
564
+ "FLL": 484,
565
+ "QR": 485,
566
+ "CY": 486,
567
+ "AP": 487,
568
+ "IS": 488,
569
+ "VIC": 489,
570
+ "LKR": 490,
571
+ "SRR": 491,
572
+ "KTV": 492,
573
+ "MY": 493,
574
+ "NY": 494,
575
+ "HP": 495,
576
+ "KW": 496,
577
+ "DY": 497,
578
+ "FI": 498,
579
+ "LLE": 499,
580
+ "AC": 500,
581
+ "ND": 501,
582
+ "NH": 502,
583
+ "NF": 503,
584
+ "LSA": 504,
585
+ "QI": 505,
586
+ "SL": 506,
587
+ "LAG": 507,
588
+ "SM": 508,
589
+ "VW": 509,
590
+ "SKR": 510,
591
+ "SLL": 511,
592
+ "VQ": 512,
593
+ "KAA": 513,
594
+ "RW": 514,
595
+ "LAA": 515,
596
+ "GM": 516,
597
+ "KL": 517,
598
+ "RAR": 518,
599
+ "FE": 519,
600
+ "RY": 520,
601
+ "TKN": 521,
602
+ "MAVP": 522,
603
+ "QC": 523,
604
+ "ID": 524,
605
+ "VD": 525,
606
+ "VM": 526,
607
+ "EP": 527,
608
+ "SLF": 528,
609
+ "GRP": 529,
610
+ "TLV": 530,
611
+ "MC": 531,
612
+ "RT": 532,
613
+ "YI": 533,
614
+ "LSS": 534,
615
+ "LLG": 535,
616
+ "LKA": 536,
617
+ "EI": 537,
618
+ "MH": 538,
619
+ "AAA": 539,
620
+ "TW": 540,
621
+ "EQ": 541,
622
+ "MKK": 542,
623
+ "SLS": 543,
624
+ "ER": 544,
625
+ "CF": 545,
626
+ "HV": 546,
627
+ "KKN": 547,
628
+ "GRKV": 548,
629
+ "YN": 549,
630
+ "RD": 550,
631
+ "CN": 551,
632
+ "SLA": 552,
633
+ "YE": 553,
634
+ "LLA": 554,
635
+ "TYQP": 555,
636
+ "VT": 556,
637
+ "FN": 557,
638
+ "LTV": 558,
639
+ "HE": 559,
640
+ "GP": 560,
641
+ "KSS": 561,
642
+ "CGE": 562,
643
+ "WA": 563,
644
+ "FQ": 564,
645
+ "SNP": 565,
646
+ "SRSR": 566,
647
+ "RKR": 567,
648
+ "TFQP": 568,
649
+ "HI": 569,
650
+ "HGF": 570,
651
+ "SLI": 571,
652
+ "LKT": 572,
653
+ "QD": 573,
654
+ "ET": 574,
655
+ "KVI": 575,
656
+ "KKA": 576,
657
+ "LIP": 577,
658
+ "AAV": 578,
659
+ "PR": 579,
660
+ "EN": 580,
661
+ "RRA": 581,
662
+ "YF": 582,
663
+ "GQ": 583,
664
+ "VN": 584,
665
+ "CQ": 585,
666
+ "KEA": 586,
667
+ "ES": 587,
668
+ "RASV": 588,
669
+ "GY": 589,
670
+ "ILV": 590,
671
+ "SKS": 591,
672
+ "RGM": 592,
673
+ "NT": 593,
674
+ "RRG": 594,
675
+ "LNR": 595,
676
+ "KHKQRQG": 596,
677
+ "FLV": 597,
678
+ "NKK": 598,
679
+ "MKRTYQP": 599,
680
+ "KLS": 600,
681
+ "LKI": 601,
682
+ "RRRA": 602,
683
+ "FGF": 603,
684
+ "MAV": 604,
685
+ "EH": 605,
686
+ "MKI": 606,
687
+ "MKA": 607,
688
+ "STA": 608,
689
+ "LGF": 609,
690
+ "VVR": 610,
691
+ "RRN": 611,
692
+ "AQ": 612,
693
+ "MW": 613,
694
+ "FFN": 614,
695
+ "IKK": 615,
696
+ "KLL": 616,
697
+ "SLV": 617,
698
+ "LFV": 618,
699
+ "MLT": 619,
700
+ "RLI": 620,
701
+ "KLA": 621,
702
+ "IW": 622,
703
+ "GD": 623,
704
+ "KLP": 624,
705
+ "DTP": 625,
706
+ "FT": 626,
707
+ "HC": 627,
708
+ "HN": 628,
709
+ "RRSH": 629,
710
+ "FD": 630,
711
+ "SKT": 631,
712
+ "SGE": 632,
713
+ "PT": 633,
714
+ "LLLI": 634,
715
+ "KLR": 635,
716
+ "LCG": 636,
717
+ "CT": 637,
718
+ "RM": 638,
719
+ "HY": 639,
720
+ "AD": 640,
721
+ "HFY": 641,
722
+ "EC": 642,
723
+ "RARMA": 643,
724
+ "PI": 644,
725
+ "KII": 645,
726
+ "LTA": 646,
727
+ "SKK": 647,
728
+ "DT": 648,
729
+ "VVF": 649,
730
+ "LEE": 650,
731
+ "FEE": 651,
732
+ "LKE": 652,
733
+ "KCN": 653,
734
+ "AAG": 654,
735
+ "LKN": 655,
736
+ "KIV": 656,
737
+ "LNRTS": 657,
738
+ "PC": 658,
739
+ "MAKK": 659,
740
+ "EF": 660,
741
+ "HQ": 661,
742
+ "LLI": 662,
743
+ "KLT": 663,
744
+ "LVA": 664,
745
+ "PD": 665,
746
+ "MTQ": 666,
747
+ "YT": 667,
748
+ "RRH": 668,
749
+ "MKRTFQP": 669,
750
+ "FKR": 670,
751
+ "NLR": 671,
752
+ "LVG": 672,
753
+ "MLI": 673,
754
+ "LKV": 674,
755
+ "KNKR": 675,
756
+ "FDP": 676,
757
+ "MAVQQ": 677,
758
+ "HF": 678,
759
+ "NW": 679,
760
+ "HFYTT": 680,
761
+ "YTR": 681,
762
+ "CSC": 682,
763
+ "YS": 683,
764
+ "LNRTSLY": 684,
765
+ "LIG": 685,
766
+ "LSN": 686,
767
+ "LKS": 687,
768
+ "AT": 688,
769
+ "QT": 689,
770
+ "LAC": 690,
771
+ "AF": 691,
772
+ "SKI": 692,
773
+ "KLE": 693,
774
+ "QH": 694,
775
+ "YD": 695,
776
+ "GGG": 696,
777
+ "NNNN": 697,
778
+ "MEA": 698,
779
+ "SLR": 699,
780
+ "YFFN": 700,
781
+ "KSE": 701,
782
+ "LKG": 702,
783
+ "KKR": 703,
784
+ "RHKQRQG": 704,
785
+ "MTI": 705,
786
+ "VK": 706,
787
+ "WQ": 707,
788
+ "HD": 708,
789
+ "SSG": 709,
790
+ "MRV": 710,
791
+ "MSD": 711,
792
+ "VRR": 712,
793
+ "LLF": 713,
794
+ "KSV": 714,
795
+ "KRF": 715,
796
+ "MKVRASV": 716,
797
+ "TAFG": 717,
798
+ "NLL": 718,
799
+ "QY": 719,
800
+ "RIC": 720,
801
+ "LGV": 721,
802
+ "LFF": 722,
803
+ "WV": 723,
804
+ "TVE": 724,
805
+ "IQR": 725,
806
+ "HSV": 726,
807
+ "MM": 727,
808
+ "LSV": 728,
809
+ "EEE": 729,
810
+ "AY": 730,
811
+ "LFG": 731,
812
+ "LRDP": 732,
813
+ "KPSV": 733,
814
+ "MQF": 734,
815
+ "WGLLLI": 735,
816
+ "RGMIN": 736,
817
+ "WGLLLIFV": 737,
818
+ "PN": 738,
819
+ "SEA": 739,
820
+ "KRT": 740,
821
+ "LAVLF": 741,
822
+ "KLN": 742,
823
+ "LGLRR": 743,
824
+ "KLV": 744,
825
+ "WF": 745,
826
+ "LLLL": 746,
827
+ "HT": 747,
828
+ "TGHFYTT": 748,
829
+ "RNC": 749,
830
+ "THGF": 750,
831
+ "IFTV": 751,
832
+ "LKQ": 752,
833
+ "YTF": 753,
834
+ "KCA": 754,
835
+ "RRRAKGR": 755,
836
+ "CI": 756,
837
+ "LSH": 757,
838
+ "SLG": 758,
839
+ "LAY": 759,
840
+ "LRHH": 760,
841
+ "GW": 761,
842
+ "MLL": 762,
843
+ "SAA": 763,
844
+ "RVC": 764,
845
+ "NLQ": 765,
846
+ "KGA": 766,
847
+ "FFV": 767,
848
+ "LRDPFEE": 768,
849
+ "NKT": 769,
850
+ "WGLLLIFVLAVLF": 770,
851
+ "MQFIQR": 771,
852
+ "LSG": 772,
853
+ "META": 773,
854
+ "SKRGM": 774,
855
+ "CV": 775,
856
+ "SQQ": 776,
857
+ "VTV": 777,
858
+ "YQ": 778,
859
+ "IFTVRW": 779,
860
+ "HS": 780,
861
+ "IGR": 781,
862
+ "MKT": 782,
863
+ "IK": 783,
864
+ "PF": 784,
865
+ "ICE": 785,
866
+ "GRR": 786,
867
+ "KLC": 787,
868
+ "SLT": 788,
869
+ "RPSV": 789,
870
+ "IM": 790,
871
+ "LAVP": 791,
872
+ "VELNRTSLY": 792,
873
+ "DH": 793,
874
+ "WAA": 794,
875
+ "MAKG": 795,
876
+ "AN": 796,
877
+ "KAT": 797,
878
+ "KEV": 798,
879
+ "DALT": 799,
880
+ "SFTG": 800,
881
+ "WP": 801,
882
+ "LIC": 802,
883
+ "IVLG": 803,
884
+ "MKVRPSV": 804,
885
+ "KEAKIK": 805,
886
+ "AH": 806,
887
+ "GN": 807,
888
+ "KRKR": 808,
889
+ "MDP": 809,
890
+ "LYTAFG": 810,
891
+ "MIE": 811,
892
+ "RKH": 812,
893
+ "LAF": 813,
894
+ "MAD": 814,
895
+ "CD": 815,
896
+ "TM": 816,
897
+ "LKKY": 817,
898
+ "SFTGYA": 818,
899
+ "GRIP": 819,
900
+ "LGI": 820,
901
+ "KFG": 821,
902
+ "LRRFKR": 822,
903
+ "HKE": 823,
904
+ "TLH": 824,
905
+ "LVR": 825,
906
+ "KTI": 826,
907
+ "KEKE": 827,
908
+ "SNYFFN": 828,
909
+ "SKN": 829,
910
+ "YLQ": 830,
911
+ "SFTGYALYTAFG": 831,
912
+ "IIA": 832,
913
+ "FFLG": 833,
914
+ "MST": 834,
915
+ "PQ": 835,
916
+ "PIFTVRW": 836,
917
+ "LIA": 837,
918
+ "VIA": 838,
919
+ "SFTGYALYTAFGQP": 839,
920
+ "DKC": 840,
921
+ "IH": 841,
922
+ "QW": 842,
923
+ "MAR": 843,
924
+ "SHA": 844,
925
+ "INR": 845,
926
+ "KRE": 846,
927
+ "KVEE": 847,
928
+ "LAI": 848,
929
+ "TTG": 849,
930
+ "CS": 850,
931
+ "LKC": 851,
932
+ "SKV": 852,
933
+ "YLS": 853,
934
+ "KKE": 854,
935
+ "LAT": 855,
936
+ "TVV": 856,
937
+ "IIFF": 857,
938
+ "WGLLLIFVLAVLFSNYFFN": 858,
939
+ "LAVPTV": 859,
940
+ "DW": 860,
941
+ "LVF": 861,
942
+ "GRA": 862,
943
+ "VVG": 863,
944
+ "SNPNE": 864,
945
+ "KEE": 865,
946
+ "RKI": 866,
947
+ "YW": 867,
948
+ "LQQ": 868,
949
+ "DGIP": 869,
950
+ "MAA": 870,
951
+ "TKR": 871,
952
+ "LRE": 872,
953
+ "IGV": 873,
954
+ "FRE": 874,
955
+ "SLE": 875,
956
+ "KKV": 876,
957
+ "KQG": 877,
958
+ "RRAKGR": 878,
959
+ "PW": 879,
960
+ "QKA": 880,
961
+ "GRQV": 881,
962
+ "MTQSNPNE": 882,
963
+ "DIA": 883,
964
+ "SSAG": 884,
965
+ "LPP": 885,
966
+ "TGQ": 886,
967
+ "METATLV": 887,
968
+ "DM": 888,
969
+ "LLP": 889,
970
+ "SSL": 890,
971
+ "SAMQFIQR": 891,
972
+ "RRRY": 892,
973
+ "MEALV": 893,
974
+ "LVP": 894,
975
+ "TYPIFTVRW": 895,
976
+ "AIFF": 896,
977
+ "FW": 897,
978
+ "LGR": 898,
979
+ "MSE": 899,
980
+ "MK": 900,
981
+ "SCG": 901,
982
+ "YRRG": 902,
983
+ "FKK": 903,
984
+ "SKA": 904,
985
+ "SISAMQFIQR": 905,
986
+ "CW": 906,
987
+ "NKN": 907,
988
+ "TLT": 908,
989
+ "SGLG": 909,
990
+ "KGQ": 910,
991
+ "KSA": 911,
992
+ "HGD": 912,
993
+ "MKVRASVKK": 913,
994
+ "MEALVYTF": 914,
995
+ "TKA": 915,
996
+ "FGKC": 916,
997
+ "NAP": 917,
998
+ "HGLAVPTV": 918,
999
+ "RSR": 919,
1000
+ "TKK": 920,
1001
+ "SGA": 921,
1002
+ "MEV": 922,
1003
+ "RRC": 923,
1004
+ "LARRRAKGR": 924,
1005
+ "SVEE": 925,
1006
+ "IAG": 926,
1007
+ "EY": 927,
1008
+ "LRA": 928,
1009
+ "SNGT": 929,
1010
+ "NAVV": 930,
1011
+ "LSNDP": 931,
1012
+ "SLP": 932,
1013
+ "GRPHSV": 933,
1014
+ "TGHFYTTTKN": 934,
1015
+ "SKE": 935,
1016
+ "KGP": 936,
1017
+ "REPP": 937,
1018
+ "SIA": 938,
1019
+ "VH": 939,
1020
+ "KRKG": 940,
1021
+ "LGIIFF": 941,
1022
+ "EEEE": 942,
1023
+ "LKSG": 943,
1024
+ "LKD": 944,
1025
+ "PRR": 945,
1026
+ "KIR": 946,
1027
+ "YGSY": 947,
1028
+ "FGFLSNDP": 948,
1029
+ "KSEAA": 949,
1030
+ "LGG": 950,
1031
+ "NPKHKQRQG": 951,
1032
+ "YPKP": 952,
1033
+ "MTIDR": 953,
1034
+ "MLF": 954,
1035
+ "GRIPLW": 955,
1036
+ "AIFFREPP": 956,
1037
+ "SNGTLT": 957,
1038
+ "LLIE": 958,
1039
+ "KVE": 959,
1040
+ "LPKH": 960,
1041
+ "MTIDRTYPIFTVRW": 961,
1042
+ "KVCY": 962,
1043
+ "VIG": 963,
1044
+ "SHLV": 964,
1045
+ "INA": 965,
1046
+ "LCGSD": 966,
1047
+ "ILL": 967,
1048
+ "NK": 968,
1049
+ "SEE": 969,
1050
+ "LNP": 970,
1051
+ "KYG": 971,
1052
+ "KYDP": 972,
1053
+ "LRDPFEEHGD": 973,
1054
+ "MKVRPSVKP": 974,
1055
+ "WR": 975,
1056
+ "LVV": 976,
1057
+ "KIG": 977,
1058
+ "MPKM": 978,
1059
+ "LLVSFTGYALYTAFGQP": 979,
1060
+ "QNVELNRTSLY": 980,
1061
+ "IGH": 981,
1062
+ "IFI": 982,
1063
+ "MTQSNPNEQNVELNRTSLY": 983,
1064
+ "CKC": 984,
1065
+ "FH": 985,
1066
+ "KKI": 986,
1067
+ "IPG": 987,
1068
+ "RRRGR": 988,
1069
+ "VVRVIC": 989,
1070
+ "RARMATKN": 990,
1071
+ "MAVQQNKK": 991,
1072
+ "SQQLRDPFEEHGD": 992,
1073
+ "DS": 993,
1074
+ "KDG": 994,
1075
+ "WE": 995,
1076
+ "KVQ": 996,
1077
+ "FFP": 997,
1078
+ "CPN": 998,
1079
+ "KIKLV": 999,
1080
+ "RKN": 1000,
1081
+ "TGR": 1001,
1082
+ "TLI": 1002,
1083
+ "AAI": 1003,
1084
+ "LSE": 1004,
1085
+ "TADG": 1005,
1086
+ "FCNAVV": 1006,
1087
+ "LAVHGLAVPTV": 1007,
1088
+ "YLQYRRG": 1008,
1089
+ "LGIIFFAIFFREPP": 1009,
1090
+ "CH": 1010,
1091
+ "PH": 1011,
1092
+ "TLL": 1012,
1093
+ "KKKK": 1013,
1094
+ "AAE": 1014,
1095
+ "LGLR": 1015,
1096
+ "KAG": 1016,
1097
+ "IRE": 1017,
1098
+ "SKG": 1018,
1099
+ "SFF": 1019,
1100
+ "TSS": 1020,
1101
+ "TLE": 1021,
1102
+ "VKK": 1022,
1103
+ "VICE": 1023
1104
+ },
1105
+ "merges": [
1106
+ "S e",
1107
+ "c e",
1108
+ "e n",
1109
+ "q u",
1110
+ "Se qu",
1111
+ "en ce",
1112
+ "Sequ ence",
1113
+ "R R",
1114
+ "L L",
1115
+ "K K",
1116
+ "K R",
1117
+ "L A",
1118
+ "L V",
1119
+ "A A",
1120
+ "G R",
1121
+ "S S",
1122
+ "K V",
1123
+ "L I",
1124
+ "L S",
1125
+ "L G",
1126
+ "K I",
1127
+ "M A",
1128
+ "T V",
1129
+ "K A",
1130
+ "L F",
1131
+ "E E",
1132
+ "S V",
1133
+ "L R",
1134
+ "K T",
1135
+ "S G",
1136
+ "L T",
1137
+ "V V",
1138
+ "K N",
1139
+ "K G",
1140
+ "K E",
1141
+ "K S",
1142
+ "I I",
1143
+ "G G",
1144
+ "F F",
1145
+ "V I",
1146
+ "K C",
1147
+ "L N",
1148
+ "T A",
1149
+ "L P",
1150
+ "K Q",
1151
+ "L E",
1152
+ "T T",
1153
+ "C G",
1154
+ "R A",
1155
+ "K P",
1156
+ "S R",
1157
+ "I A",
1158
+ "S A",
1159
+ "T G",
1160
+ "V A",
1161
+ "F G",
1162
+ "S N",
1163
+ "D P",
1164
+ "E A",
1165
+ "C P",
1166
+ "L Q",
1167
+ "I G",
1168
+ "R G",
1169
+ "D G",
1170
+ "S C",
1171
+ "S P",
1172
+ "T P",
1173
+ "S I",
1174
+ "D A",
1175
+ "R V",
1176
+ "N N",
1177
+ "S F",
1178
+ "R E",
1179
+ "L Y",
1180
+ "R Q",
1181
+ "K H",
1182
+ "S T",
1183
+ "V P",
1184
+ "I V",
1185
+ "S H",
1186
+ "I N",
1187
+ "D E",
1188
+ "A G",
1189
+ "I P",
1190
+ "K Y",
1191
+ "C C",
1192
+ "K F",
1193
+ "Q Q",
1194
+ "I F",
1195
+ "N P",
1196
+ "K D",
1197
+ "L D",
1198
+ "S D",
1199
+ "S E",
1200
+ "Q P",
1201
+ "V E",
1202
+ "I C",
1203
+ "G F",
1204
+ "T R",
1205
+ "L C",
1206
+ "N A",
1207
+ "V G",
1208
+ "M P",
1209
+ "T F",
1210
+ "S Q",
1211
+ "T I",
1212
+ "M E",
1213
+ "V F",
1214
+ "S Y",
1215
+ "N G",
1216
+ "V R",
1217
+ "I E",
1218
+ "D D",
1219
+ "T Y",
1220
+ "T E",
1221
+ "L H",
1222
+ "R P",
1223
+ "R N",
1224
+ "N E",
1225
+ "Y G",
1226
+ "T C",
1227
+ "M G",
1228
+ "T Q",
1229
+ "V C",
1230
+ "R I",
1231
+ "A V",
1232
+ "M V",
1233
+ "A I",
1234
+ "T H",
1235
+ "R H",
1236
+ "L W",
1237
+ "Q E",
1238
+ "F Y",
1239
+ "L KK",
1240
+ "F V",
1241
+ "M R",
1242
+ "P A",
1243
+ "M Q",
1244
+ "T S",
1245
+ "P P",
1246
+ "K M",
1247
+ "N Q",
1248
+ "L M",
1249
+ "R C",
1250
+ "K GR",
1251
+ "Y Y",
1252
+ "E G",
1253
+ "D V",
1254
+ "Y P",
1255
+ "T N",
1256
+ "D C",
1257
+ "F A",
1258
+ "Q A",
1259
+ "Y A",
1260
+ "H H",
1261
+ "RR R",
1262
+ "I T",
1263
+ "L K",
1264
+ "R F",
1265
+ "P G",
1266
+ "H G",
1267
+ "I Y",
1268
+ "M KV",
1269
+ "N V",
1270
+ "M D",
1271
+ "C E",
1272
+ "C A",
1273
+ "M F",
1274
+ "T D",
1275
+ "Q G",
1276
+ "K RR",
1277
+ "M N",
1278
+ "L RR",
1279
+ "I R",
1280
+ "G A",
1281
+ "D R",
1282
+ "I Q",
1283
+ "N R",
1284
+ "V Y",
1285
+ "F P",
1286
+ "S W",
1287
+ "D Q",
1288
+ "S K",
1289
+ "RR RR",
1290
+ "W G",
1291
+ "RQ G",
1292
+ "H R",
1293
+ "F C",
1294
+ "P V",
1295
+ "E D",
1296
+ "M KR",
1297
+ "Y C",
1298
+ "Q V",
1299
+ "N I",
1300
+ "M I",
1301
+ "E V",
1302
+ "N C",
1303
+ "T K",
1304
+ "Y R",
1305
+ "G V",
1306
+ "G E",
1307
+ "M T",
1308
+ "D F",
1309
+ "Y V",
1310
+ "Q F",
1311
+ "D I",
1312
+ "LA V",
1313
+ "KI K",
1314
+ "G C",
1315
+ "G T",
1316
+ "H A",
1317
+ "M S",
1318
+ "E K",
1319
+ "D N",
1320
+ "A R",
1321
+ "C R",
1322
+ "KQ RQG",
1323
+ "F R",
1324
+ "P E",
1325
+ "LL V",
1326
+ "G I",
1327
+ "Q N",
1328
+ "A E",
1329
+ "F LL",
1330
+ "Q R",
1331
+ "C Y",
1332
+ "A P",
1333
+ "I S",
1334
+ "VI C",
1335
+ "L KR",
1336
+ "S RR",
1337
+ "K TV",
1338
+ "M Y",
1339
+ "N Y",
1340
+ "H P",
1341
+ "K W",
1342
+ "D Y",
1343
+ "F I",
1344
+ "LL E",
1345
+ "A C",
1346
+ "N D",
1347
+ "N H",
1348
+ "N F",
1349
+ "LS A",
1350
+ "Q I",
1351
+ "S L",
1352
+ "LA G",
1353
+ "S M",
1354
+ "V W",
1355
+ "S KR",
1356
+ "S LL",
1357
+ "V Q",
1358
+ "K AA",
1359
+ "R W",
1360
+ "LA A",
1361
+ "G M",
1362
+ "K L",
1363
+ "RA R",
1364
+ "F E",
1365
+ "R Y",
1366
+ "T KN",
1367
+ "MA VP",
1368
+ "Q C",
1369
+ "I D",
1370
+ "V D",
1371
+ "V M",
1372
+ "E P",
1373
+ "S LF",
1374
+ "GR P",
1375
+ "T LV",
1376
+ "M C",
1377
+ "R T",
1378
+ "Y I",
1379
+ "L SS",
1380
+ "LL G",
1381
+ "L KA",
1382
+ "E I",
1383
+ "M H",
1384
+ "AA A",
1385
+ "T W",
1386
+ "E Q",
1387
+ "M KK",
1388
+ "S LS",
1389
+ "E R",
1390
+ "C F",
1391
+ "H V",
1392
+ "KK N",
1393
+ "GR KV",
1394
+ "Y N",
1395
+ "R D",
1396
+ "C N",
1397
+ "S LA",
1398
+ "Y E",
1399
+ "LL A",
1400
+ "TY QP",
1401
+ "V T",
1402
+ "F N",
1403
+ "L TV",
1404
+ "H E",
1405
+ "G P",
1406
+ "K SS",
1407
+ "CG E",
1408
+ "W A",
1409
+ "F Q",
1410
+ "SN P",
1411
+ "SR SR",
1412
+ "R KR",
1413
+ "TF QP",
1414
+ "H I",
1415
+ "H GF",
1416
+ "S LI",
1417
+ "L KT",
1418
+ "Q D",
1419
+ "E T",
1420
+ "KV I",
1421
+ "KK A",
1422
+ "LI P",
1423
+ "AA V",
1424
+ "P R",
1425
+ "E N",
1426
+ "RR A",
1427
+ "Y F",
1428
+ "G Q",
1429
+ "V N",
1430
+ "C Q",
1431
+ "KE A",
1432
+ "E S",
1433
+ "RA SV",
1434
+ "G Y",
1435
+ "I LV",
1436
+ "S KS",
1437
+ "RG M",
1438
+ "N T",
1439
+ "RR G",
1440
+ "LN R",
1441
+ "KH KQRQG",
1442
+ "F LV",
1443
+ "N KK",
1444
+ "MKR TYQP",
1445
+ "K LS",
1446
+ "L KI",
1447
+ "RR RA",
1448
+ "FG F",
1449
+ "MA V",
1450
+ "E H",
1451
+ "M KI",
1452
+ "M KA",
1453
+ "S TA",
1454
+ "LG F",
1455
+ "VV R",
1456
+ "RR N",
1457
+ "A Q",
1458
+ "M W",
1459
+ "FF N",
1460
+ "I KK",
1461
+ "K LL",
1462
+ "S LV",
1463
+ "LF V",
1464
+ "M LT",
1465
+ "R LI",
1466
+ "K LA",
1467
+ "I W",
1468
+ "G D",
1469
+ "K LP",
1470
+ "D TP",
1471
+ "F T",
1472
+ "H C",
1473
+ "H N",
1474
+ "RR SH",
1475
+ "F D",
1476
+ "S KT",
1477
+ "SG E",
1478
+ "P T",
1479
+ "LL LI",
1480
+ "K LR",
1481
+ "L CG",
1482
+ "C T",
1483
+ "R M",
1484
+ "H Y",
1485
+ "A D",
1486
+ "H FY",
1487
+ "E C",
1488
+ "RAR MA",
1489
+ "P I",
1490
+ "KI I",
1491
+ "LT A",
1492
+ "S KK",
1493
+ "D T",
1494
+ "VV F",
1495
+ "L EE",
1496
+ "F EE",
1497
+ "L KE",
1498
+ "KC N",
1499
+ "AA G",
1500
+ "L KN",
1501
+ "KI V",
1502
+ "LNR TS",
1503
+ "P C",
1504
+ "MA KK",
1505
+ "E F",
1506
+ "H Q",
1507
+ "LL I",
1508
+ "K LT",
1509
+ "LV A",
1510
+ "P D",
1511
+ "M TQ",
1512
+ "Y T",
1513
+ "RR H",
1514
+ "MKR TFQP",
1515
+ "F KR",
1516
+ "N LR",
1517
+ "LV G",
1518
+ "M LI",
1519
+ "L KV",
1520
+ "KN KR",
1521
+ "F DP",
1522
+ "MAV QQ",
1523
+ "H F",
1524
+ "N W",
1525
+ "HFY TT",
1526
+ "Y TR",
1527
+ "C SC",
1528
+ "Y S",
1529
+ "LNRTS LY",
1530
+ "LI G",
1531
+ "LS N",
1532
+ "L KS",
1533
+ "A T",
1534
+ "Q T",
1535
+ "LA C",
1536
+ "A F",
1537
+ "S KI",
1538
+ "K LE",
1539
+ "Q H",
1540
+ "Y D",
1541
+ "GG G",
1542
+ "NN NN",
1543
+ "M EA",
1544
+ "S LR",
1545
+ "Y FFN",
1546
+ "KS E",
1547
+ "L KG",
1548
+ "KK R",
1549
+ "RH KQRQG",
1550
+ "M TI",
1551
+ "V K",
1552
+ "W Q",
1553
+ "H D",
1554
+ "SS G",
1555
+ "M RV",
1556
+ "M SD",
1557
+ "V RR",
1558
+ "LL F",
1559
+ "K SV",
1560
+ "KR F",
1561
+ "MKV RASV",
1562
+ "TA FG",
1563
+ "N LL",
1564
+ "Q Y",
1565
+ "R IC",
1566
+ "LG V",
1567
+ "LF F",
1568
+ "W V",
1569
+ "TV E",
1570
+ "IQ R",
1571
+ "H SV",
1572
+ "M M",
1573
+ "LS V",
1574
+ "EE E",
1575
+ "A Y",
1576
+ "LF G",
1577
+ "LR DP",
1578
+ "KP SV",
1579
+ "MQ F",
1580
+ "WG LLLI",
1581
+ "RGM IN",
1582
+ "WGLLLI FV",
1583
+ "P N",
1584
+ "S EA",
1585
+ "KR T",
1586
+ "LAV LF",
1587
+ "K LN",
1588
+ "LG LRR",
1589
+ "K LV",
1590
+ "W F",
1591
+ "LL LL",
1592
+ "H T",
1593
+ "TG HFYTT",
1594
+ "RN C",
1595
+ "TH GF",
1596
+ "IF TV",
1597
+ "L KQ",
1598
+ "Y TF",
1599
+ "KC A",
1600
+ "RRRA KGR",
1601
+ "C I",
1602
+ "LS H",
1603
+ "S LG",
1604
+ "LA Y",
1605
+ "LR HH",
1606
+ "G W",
1607
+ "M LL",
1608
+ "S AA",
1609
+ "RV C",
1610
+ "N LQ",
1611
+ "KG A",
1612
+ "FF V",
1613
+ "LRDP FEE",
1614
+ "N KT",
1615
+ "WGLLLIFV LAVLF",
1616
+ "MQF IQR",
1617
+ "LS G",
1618
+ "ME TA",
1619
+ "SKR GM",
1620
+ "C V",
1621
+ "S QQ",
1622
+ "V TV",
1623
+ "Y Q",
1624
+ "IFTV RW",
1625
+ "H S",
1626
+ "I GR",
1627
+ "M KT",
1628
+ "I K",
1629
+ "P F",
1630
+ "IC E",
1631
+ "G RR",
1632
+ "K LC",
1633
+ "S LT",
1634
+ "RP SV",
1635
+ "I M",
1636
+ "LA VP",
1637
+ "VE LNRTSLY",
1638
+ "D H",
1639
+ "W AA",
1640
+ "MA KG",
1641
+ "A N",
1642
+ "KA T",
1643
+ "KE V",
1644
+ "DA LT",
1645
+ "SF TG",
1646
+ "W P",
1647
+ "LI C",
1648
+ "IV LG",
1649
+ "MKV RPSV",
1650
+ "KEA KIK",
1651
+ "A H",
1652
+ "G N",
1653
+ "KR KR",
1654
+ "M DP",
1655
+ "LY TAFG",
1656
+ "M IE",
1657
+ "R KH",
1658
+ "LA F",
1659
+ "MA D",
1660
+ "C D",
1661
+ "T M",
1662
+ "LKK Y",
1663
+ "SFTG YA",
1664
+ "GR IP",
1665
+ "LG I",
1666
+ "K FG",
1667
+ "LRR FKR",
1668
+ "H KE",
1669
+ "T LH",
1670
+ "LV R",
1671
+ "KT I",
1672
+ "KE KE",
1673
+ "SN YFFN",
1674
+ "S KN",
1675
+ "Y LQ",
1676
+ "SFTGYA LYTAFG",
1677
+ "II A",
1678
+ "FF LG",
1679
+ "M ST",
1680
+ "P Q",
1681
+ "P IFTVRW",
1682
+ "LI A",
1683
+ "VI A",
1684
+ "SFTGYALYTAFG QP",
1685
+ "D KC",
1686
+ "I H",
1687
+ "Q W",
1688
+ "MA R",
1689
+ "SH A",
1690
+ "IN R",
1691
+ "KR E",
1692
+ "KV EE",
1693
+ "LA I",
1694
+ "TT G",
1695
+ "C S",
1696
+ "L KC",
1697
+ "S KV",
1698
+ "Y LS",
1699
+ "KK E",
1700
+ "LA T",
1701
+ "TV V",
1702
+ "II FF",
1703
+ "WGLLLIFVLAVLF SNYFFN",
1704
+ "LAVP TV",
1705
+ "D W",
1706
+ "LV F",
1707
+ "GR A",
1708
+ "VV G",
1709
+ "SNP NE",
1710
+ "K EE",
1711
+ "R KI",
1712
+ "Y W",
1713
+ "LQ Q",
1714
+ "DG IP",
1715
+ "M AA",
1716
+ "T KR",
1717
+ "LR E",
1718
+ "IG V",
1719
+ "F RE",
1720
+ "S LE",
1721
+ "KK V",
1722
+ "KQ G",
1723
+ "RRA KGR",
1724
+ "P W",
1725
+ "Q KA",
1726
+ "GR QV",
1727
+ "MTQ SNPNE",
1728
+ "D IA",
1729
+ "SS AG",
1730
+ "LP P",
1731
+ "TG Q",
1732
+ "META TLV",
1733
+ "D M",
1734
+ "LL P",
1735
+ "SS L",
1736
+ "SA MQFIQR",
1737
+ "RRR Y",
1738
+ "MEA LV",
1739
+ "LV P",
1740
+ "TY PIFTVRW",
1741
+ "AI FF",
1742
+ "F W",
1743
+ "L GR",
1744
+ "M SE",
1745
+ "M K",
1746
+ "S CG",
1747
+ "Y RRG",
1748
+ "F KK",
1749
+ "S KA",
1750
+ "SI SAMQFIQR",
1751
+ "C W",
1752
+ "N KN",
1753
+ "T LT",
1754
+ "SG LG",
1755
+ "KG Q",
1756
+ "KS A",
1757
+ "HG D",
1758
+ "MKVRASV KK",
1759
+ "MEALV YTF",
1760
+ "T KA",
1761
+ "FG KC",
1762
+ "NA P",
1763
+ "HG LAVPTV",
1764
+ "R SR",
1765
+ "T KK",
1766
+ "SG A",
1767
+ "ME V",
1768
+ "RR C",
1769
+ "LA RRRAKGR",
1770
+ "SV EE",
1771
+ "IA G",
1772
+ "E Y",
1773
+ "LR A",
1774
+ "SN GT",
1775
+ "NA VV",
1776
+ "LSN DP",
1777
+ "S LP",
1778
+ "GRP HSV",
1779
+ "TGHFYTT TKN",
1780
+ "S KE",
1781
+ "KG P",
1782
+ "RE PP",
1783
+ "S IA",
1784
+ "V H",
1785
+ "KR KG",
1786
+ "LG IIFF",
1787
+ "EE EE",
1788
+ "LK SG",
1789
+ "L KD",
1790
+ "P RR",
1791
+ "KI R",
1792
+ "YG SY",
1793
+ "FGF LSNDP",
1794
+ "KSE AA",
1795
+ "LG G",
1796
+ "NP KHKQRQG",
1797
+ "YP KP",
1798
+ "MTI DR",
1799
+ "M LF",
1800
+ "GRIP LW",
1801
+ "AIFF REPP",
1802
+ "SNGT LT",
1803
+ "LL IE",
1804
+ "KV E",
1805
+ "LP KH",
1806
+ "MTIDR TYPIFTVRW",
1807
+ "KV CY",
1808
+ "VI G",
1809
+ "SH LV",
1810
+ "IN A",
1811
+ "LCG SD",
1812
+ "I LL",
1813
+ "N K",
1814
+ "S EE",
1815
+ "LN P",
1816
+ "KY G",
1817
+ "KY DP",
1818
+ "LRDPFEE HGD",
1819
+ "MKVRPSV KP",
1820
+ "W R",
1821
+ "LV V",
1822
+ "KI G",
1823
+ "MP KM",
1824
+ "LLV SFTGYALYTAFGQP",
1825
+ "QN VELNRTSLY",
1826
+ "IG H",
1827
+ "IF I",
1828
+ "MTQSNPNE QNVELNRTSLY",
1829
+ "C KC",
1830
+ "F H",
1831
+ "KK I",
1832
+ "IP G",
1833
+ "RRR GR",
1834
+ "VVR VIC",
1835
+ "RARMA TKN",
1836
+ "MAVQQ NKK",
1837
+ "SQQ LRDPFEEHGD",
1838
+ "D S",
1839
+ "K DG",
1840
+ "W E",
1841
+ "KV Q",
1842
+ "FF P",
1843
+ "CP N",
1844
+ "KIK LV",
1845
+ "R KN",
1846
+ "T GR",
1847
+ "T LI",
1848
+ "AA I",
1849
+ "LS E",
1850
+ "TA DG",
1851
+ "FC NAVV",
1852
+ "LAV HGLAVPTV",
1853
+ "YLQ YRRG",
1854
+ "LGIIFF AIFFREPP",
1855
+ "C H",
1856
+ "P H",
1857
+ "T LL",
1858
+ "KK KK",
1859
+ "AA E",
1860
+ "LG LR",
1861
+ "KA G",
1862
+ "I RE",
1863
+ "S KG",
1864
+ "S FF",
1865
+ "T SS",
1866
+ "T LE",
1867
+ "V KK",
1868
+ "VI CE"
1869
+ ]
1870
+ }
1871
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "clean_up_tokenization_spaces": true,
3
+ "model_max_length": 1000000000000000019884624838656,
4
+ "tokenizer_class": "PreTrainedTokenizerFast"
5
+ }