QizhiPei commited on
Commit
4883dfd
1 Parent(s): aec2f30

Upload tokenizer

Browse files
added_tokens.json CHANGED
@@ -1,110 +1,8 @@
1
  {
2
- ".": 5,
3
- "</s>": 1,
4
- "<bom>": 35071,
5
- "<bop>": 35066,
6
- "<eom>": 35067,
7
- "<eop>": 35063,
8
- "<extra_id_0>": 32099,
9
- "<extra_id_10>": 32089,
10
- "<extra_id_11>": 32088,
11
- "<extra_id_12>": 32087,
12
- "<extra_id_13>": 32086,
13
- "<extra_id_14>": 32085,
14
- "<extra_id_15>": 32084,
15
- "<extra_id_16>": 32083,
16
- "<extra_id_17>": 32082,
17
- "<extra_id_18>": 32081,
18
- "<extra_id_19>": 32080,
19
- "<extra_id_1>": 32098,
20
- "<extra_id_20>": 32079,
21
- "<extra_id_21>": 32078,
22
- "<extra_id_22>": 32077,
23
- "<extra_id_23>": 32076,
24
- "<extra_id_24>": 32075,
25
- "<extra_id_25>": 32074,
26
- "<extra_id_26>": 32073,
27
- "<extra_id_27>": 32072,
28
- "<extra_id_28>": 32071,
29
- "<extra_id_29>": 32070,
30
- "<extra_id_2>": 32097,
31
- "<extra_id_30>": 32069,
32
- "<extra_id_31>": 32068,
33
- "<extra_id_32>": 32067,
34
- "<extra_id_33>": 32066,
35
- "<extra_id_34>": 32065,
36
- "<extra_id_35>": 32064,
37
- "<extra_id_36>": 32063,
38
- "<extra_id_37>": 32062,
39
- "<extra_id_38>": 32061,
40
- "<extra_id_39>": 32060,
41
- "<extra_id_3>": 32096,
42
- "<extra_id_40>": 32059,
43
- "<extra_id_41>": 32058,
44
- "<extra_id_42>": 32057,
45
- "<extra_id_43>": 32056,
46
- "<extra_id_44>": 32055,
47
- "<extra_id_45>": 32054,
48
- "<extra_id_46>": 32053,
49
- "<extra_id_47>": 32052,
50
- "<extra_id_48>": 32051,
51
- "<extra_id_49>": 32050,
52
- "<extra_id_4>": 32095,
53
- "<extra_id_50>": 32049,
54
- "<extra_id_51>": 32048,
55
- "<extra_id_52>": 32047,
56
- "<extra_id_53>": 32046,
57
- "<extra_id_54>": 32045,
58
- "<extra_id_55>": 32044,
59
- "<extra_id_56>": 32043,
60
- "<extra_id_57>": 32042,
61
- "<extra_id_58>": 32041,
62
- "<extra_id_59>": 32040,
63
- "<extra_id_5>": 32094,
64
- "<extra_id_60>": 32039,
65
- "<extra_id_61>": 32038,
66
- "<extra_id_62>": 32037,
67
- "<extra_id_63>": 32036,
68
- "<extra_id_64>": 32035,
69
- "<extra_id_65>": 32034,
70
- "<extra_id_66>": 32033,
71
- "<extra_id_67>": 32032,
72
- "<extra_id_68>": 32031,
73
- "<extra_id_69>": 32030,
74
- "<extra_id_6>": 32093,
75
- "<extra_id_70>": 32029,
76
- "<extra_id_71>": 32028,
77
- "<extra_id_72>": 32027,
78
- "<extra_id_73>": 32026,
79
- "<extra_id_74>": 32025,
80
- "<extra_id_75>": 32024,
81
- "<extra_id_76>": 32023,
82
- "<extra_id_77>": 32022,
83
- "<extra_id_78>": 32021,
84
- "<extra_id_79>": 32020,
85
- "<extra_id_7>": 32092,
86
- "<extra_id_80>": 32019,
87
- "<extra_id_81>": 32018,
88
- "<extra_id_82>": 32017,
89
- "<extra_id_83>": 32016,
90
- "<extra_id_84>": 32015,
91
- "<extra_id_85>": 32014,
92
- "<extra_id_86>": 32013,
93
- "<extra_id_87>": 32012,
94
- "<extra_id_88>": 32011,
95
- "<extra_id_89>": 32010,
96
- "<extra_id_8>": 32091,
97
- "<extra_id_90>": 32009,
98
- "<extra_id_91>": 32008,
99
- "<extra_id_92>": 32007,
100
- "<extra_id_93>": 32006,
101
- "<extra_id_94>": 32005,
102
- "<extra_id_95>": 32004,
103
- "<extra_id_96>": 32003,
104
- "<extra_id_97>": 32002,
105
- "<extra_id_98>": 32001,
106
- "<extra_id_99>": 32000,
107
- "<extra_id_9>": 32090,
108
  "<p>A": 32100,
109
  "<p>C": 32101,
110
  "<p>D": 32102,
@@ -125,14 +23,12 @@
125
  "<p>V": 32117,
126
  "<p>W": 32118,
127
  "<p>Y": 32119,
128
- "<pad>": 0,
129
- "<unk>": 2,
130
  "DESCRIPTION": 35068,
131
  "FUNCTION": 35070,
132
- "MOLECULE NAME": 35072,
133
- "PROTEIN FAMILIES": 35064,
134
  "PROTEIN NAME": 35069,
135
- "SUBCELLULAR LOCATION": 35065,
136
  "[#11C-1]": 34809,
137
  "[#11CH1]": 34772,
138
  "[#11C]": 32177,
 
1
  {
2
+ "<bom>": 35063,
3
+ "<bop>": 35065,
4
+ "<eom>": 35064,
5
+ "<eop>": 35066,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  "<p>A": 32100,
7
  "<p>C": 32101,
8
  "<p>D": 32102,
 
23
  "<p>V": 32117,
24
  "<p>W": 32118,
25
  "<p>Y": 32119,
 
 
26
  "DESCRIPTION": 35068,
27
  "FUNCTION": 35070,
28
+ "MOLECULE NAME": 35067,
29
+ "PROTEIN FAMILIES": 35072,
30
  "PROTEIN NAME": 35069,
31
+ "SUBCELLULAR LOCATION": 35071,
32
  "[#11C-1]": 34809,
33
  "[#11CH1]": 34772,
34
  "[#11C]": 32177,
special_tokens_map.json CHANGED
@@ -100,16 +100,16 @@
100
  "<extra_id_97>",
101
  "<extra_id_98>",
102
  "<extra_id_99>",
103
- "<eop>",
104
- "PROTEIN FAMILIES",
105
- "SUBCELLULAR LOCATION",
106
- "<bop>",
107
  "<eom>",
 
 
 
108
  "DESCRIPTION",
109
  "PROTEIN NAME",
110
  "FUNCTION",
111
- "<bom>",
112
- "MOLECULE NAME"
113
  ],
114
  "eos_token": "</s>",
115
  "pad_token": "<pad>",
 
100
  "<extra_id_97>",
101
  "<extra_id_98>",
102
  "<extra_id_99>",
103
+ "<bom>",
 
 
 
104
  "<eom>",
105
+ "<bop>",
106
+ "<eop>",
107
+ "MOLECULE NAME",
108
  "DESCRIPTION",
109
  "PROTEIN NAME",
110
  "FUNCTION",
111
+ "SUBCELLULAR LOCATION",
112
+ "PROTEIN FAMILIES"
113
  ],
114
  "eos_token": "</s>",
115
  "pad_token": "<pad>",
tokenizer.json CHANGED
@@ -7,27 +7,27 @@
7
  "id": 0,
8
  "content": "<pad>",
9
  "single_word": false,
10
- "lstrip": true,
11
- "rstrip": true,
12
- "normalized": true,
13
  "special": true
14
  },
15
  {
16
  "id": 1,
17
  "content": "</s>",
18
  "single_word": false,
19
- "lstrip": true,
20
- "rstrip": true,
21
- "normalized": true,
22
  "special": true
23
  },
24
  {
25
  "id": 2,
26
  "content": "<unk>",
27
  "single_word": false,
28
- "lstrip": true,
29
- "rstrip": true,
30
- "normalized": true,
31
  "special": true
32
  },
33
  {
@@ -43,8 +43,8 @@
43
  "id": 32000,
44
  "content": "<extra_id_99>",
45
  "single_word": false,
46
- "lstrip": true,
47
- "rstrip": true,
48
  "normalized": false,
49
  "special": true
50
  },
@@ -52,8 +52,8 @@
52
  "id": 32001,
53
  "content": "<extra_id_98>",
54
  "single_word": false,
55
- "lstrip": true,
56
- "rstrip": true,
57
  "normalized": false,
58
  "special": true
59
  },
@@ -61,8 +61,8 @@
61
  "id": 32002,
62
  "content": "<extra_id_97>",
63
  "single_word": false,
64
- "lstrip": true,
65
- "rstrip": true,
66
  "normalized": false,
67
  "special": true
68
  },
@@ -70,8 +70,8 @@
70
  "id": 32003,
71
  "content": "<extra_id_96>",
72
  "single_word": false,
73
- "lstrip": true,
74
- "rstrip": true,
75
  "normalized": false,
76
  "special": true
77
  },
@@ -79,8 +79,8 @@
79
  "id": 32004,
80
  "content": "<extra_id_95>",
81
  "single_word": false,
82
- "lstrip": true,
83
- "rstrip": true,
84
  "normalized": false,
85
  "special": true
86
  },
@@ -88,8 +88,8 @@
88
  "id": 32005,
89
  "content": "<extra_id_94>",
90
  "single_word": false,
91
- "lstrip": true,
92
- "rstrip": true,
93
  "normalized": false,
94
  "special": true
95
  },
@@ -97,8 +97,8 @@
97
  "id": 32006,
98
  "content": "<extra_id_93>",
99
  "single_word": false,
100
- "lstrip": true,
101
- "rstrip": true,
102
  "normalized": false,
103
  "special": true
104
  },
@@ -106,8 +106,8 @@
106
  "id": 32007,
107
  "content": "<extra_id_92>",
108
  "single_word": false,
109
- "lstrip": true,
110
- "rstrip": true,
111
  "normalized": false,
112
  "special": true
113
  },
@@ -115,8 +115,8 @@
115
  "id": 32008,
116
  "content": "<extra_id_91>",
117
  "single_word": false,
118
- "lstrip": true,
119
- "rstrip": true,
120
  "normalized": false,
121
  "special": true
122
  },
@@ -124,8 +124,8 @@
124
  "id": 32009,
125
  "content": "<extra_id_90>",
126
  "single_word": false,
127
- "lstrip": true,
128
- "rstrip": true,
129
  "normalized": false,
130
  "special": true
131
  },
@@ -133,8 +133,8 @@
133
  "id": 32010,
134
  "content": "<extra_id_89>",
135
  "single_word": false,
136
- "lstrip": true,
137
- "rstrip": true,
138
  "normalized": false,
139
  "special": true
140
  },
@@ -142,8 +142,8 @@
142
  "id": 32011,
143
  "content": "<extra_id_88>",
144
  "single_word": false,
145
- "lstrip": true,
146
- "rstrip": true,
147
  "normalized": false,
148
  "special": true
149
  },
@@ -151,8 +151,8 @@
151
  "id": 32012,
152
  "content": "<extra_id_87>",
153
  "single_word": false,
154
- "lstrip": true,
155
- "rstrip": true,
156
  "normalized": false,
157
  "special": true
158
  },
@@ -160,8 +160,8 @@
160
  "id": 32013,
161
  "content": "<extra_id_86>",
162
  "single_word": false,
163
- "lstrip": true,
164
- "rstrip": true,
165
  "normalized": false,
166
  "special": true
167
  },
@@ -169,8 +169,8 @@
169
  "id": 32014,
170
  "content": "<extra_id_85>",
171
  "single_word": false,
172
- "lstrip": true,
173
- "rstrip": true,
174
  "normalized": false,
175
  "special": true
176
  },
@@ -178,8 +178,8 @@
178
  "id": 32015,
179
  "content": "<extra_id_84>",
180
  "single_word": false,
181
- "lstrip": true,
182
- "rstrip": true,
183
  "normalized": false,
184
  "special": true
185
  },
@@ -187,8 +187,8 @@
187
  "id": 32016,
188
  "content": "<extra_id_83>",
189
  "single_word": false,
190
- "lstrip": true,
191
- "rstrip": true,
192
  "normalized": false,
193
  "special": true
194
  },
@@ -196,8 +196,8 @@
196
  "id": 32017,
197
  "content": "<extra_id_82>",
198
  "single_word": false,
199
- "lstrip": true,
200
- "rstrip": true,
201
  "normalized": false,
202
  "special": true
203
  },
@@ -205,8 +205,8 @@
205
  "id": 32018,
206
  "content": "<extra_id_81>",
207
  "single_word": false,
208
- "lstrip": true,
209
- "rstrip": true,
210
  "normalized": false,
211
  "special": true
212
  },
@@ -214,8 +214,8 @@
214
  "id": 32019,
215
  "content": "<extra_id_80>",
216
  "single_word": false,
217
- "lstrip": true,
218
- "rstrip": true,
219
  "normalized": false,
220
  "special": true
221
  },
@@ -223,8 +223,8 @@
223
  "id": 32020,
224
  "content": "<extra_id_79>",
225
  "single_word": false,
226
- "lstrip": true,
227
- "rstrip": true,
228
  "normalized": false,
229
  "special": true
230
  },
@@ -232,8 +232,8 @@
232
  "id": 32021,
233
  "content": "<extra_id_78>",
234
  "single_word": false,
235
- "lstrip": true,
236
- "rstrip": true,
237
  "normalized": false,
238
  "special": true
239
  },
@@ -241,8 +241,8 @@
241
  "id": 32022,
242
  "content": "<extra_id_77>",
243
  "single_word": false,
244
- "lstrip": true,
245
- "rstrip": true,
246
  "normalized": false,
247
  "special": true
248
  },
@@ -250,8 +250,8 @@
250
  "id": 32023,
251
  "content": "<extra_id_76>",
252
  "single_word": false,
253
- "lstrip": true,
254
- "rstrip": true,
255
  "normalized": false,
256
  "special": true
257
  },
@@ -259,8 +259,8 @@
259
  "id": 32024,
260
  "content": "<extra_id_75>",
261
  "single_word": false,
262
- "lstrip": true,
263
- "rstrip": true,
264
  "normalized": false,
265
  "special": true
266
  },
@@ -268,8 +268,8 @@
268
  "id": 32025,
269
  "content": "<extra_id_74>",
270
  "single_word": false,
271
- "lstrip": true,
272
- "rstrip": true,
273
  "normalized": false,
274
  "special": true
275
  },
@@ -277,8 +277,8 @@
277
  "id": 32026,
278
  "content": "<extra_id_73>",
279
  "single_word": false,
280
- "lstrip": true,
281
- "rstrip": true,
282
  "normalized": false,
283
  "special": true
284
  },
@@ -286,8 +286,8 @@
286
  "id": 32027,
287
  "content": "<extra_id_72>",
288
  "single_word": false,
289
- "lstrip": true,
290
- "rstrip": true,
291
  "normalized": false,
292
  "special": true
293
  },
@@ -295,8 +295,8 @@
295
  "id": 32028,
296
  "content": "<extra_id_71>",
297
  "single_word": false,
298
- "lstrip": true,
299
- "rstrip": true,
300
  "normalized": false,
301
  "special": true
302
  },
@@ -304,8 +304,8 @@
304
  "id": 32029,
305
  "content": "<extra_id_70>",
306
  "single_word": false,
307
- "lstrip": true,
308
- "rstrip": true,
309
  "normalized": false,
310
  "special": true
311
  },
@@ -313,8 +313,8 @@
313
  "id": 32030,
314
  "content": "<extra_id_69>",
315
  "single_word": false,
316
- "lstrip": true,
317
- "rstrip": true,
318
  "normalized": false,
319
  "special": true
320
  },
@@ -322,8 +322,8 @@
322
  "id": 32031,
323
  "content": "<extra_id_68>",
324
  "single_word": false,
325
- "lstrip": true,
326
- "rstrip": true,
327
  "normalized": false,
328
  "special": true
329
  },
@@ -331,8 +331,8 @@
331
  "id": 32032,
332
  "content": "<extra_id_67>",
333
  "single_word": false,
334
- "lstrip": true,
335
- "rstrip": true,
336
  "normalized": false,
337
  "special": true
338
  },
@@ -340,8 +340,8 @@
340
  "id": 32033,
341
  "content": "<extra_id_66>",
342
  "single_word": false,
343
- "lstrip": true,
344
- "rstrip": true,
345
  "normalized": false,
346
  "special": true
347
  },
@@ -349,8 +349,8 @@
349
  "id": 32034,
350
  "content": "<extra_id_65>",
351
  "single_word": false,
352
- "lstrip": true,
353
- "rstrip": true,
354
  "normalized": false,
355
  "special": true
356
  },
@@ -358,8 +358,8 @@
358
  "id": 32035,
359
  "content": "<extra_id_64>",
360
  "single_word": false,
361
- "lstrip": true,
362
- "rstrip": true,
363
  "normalized": false,
364
  "special": true
365
  },
@@ -367,8 +367,8 @@
367
  "id": 32036,
368
  "content": "<extra_id_63>",
369
  "single_word": false,
370
- "lstrip": true,
371
- "rstrip": true,
372
  "normalized": false,
373
  "special": true
374
  },
@@ -376,8 +376,8 @@
376
  "id": 32037,
377
  "content": "<extra_id_62>",
378
  "single_word": false,
379
- "lstrip": true,
380
- "rstrip": true,
381
  "normalized": false,
382
  "special": true
383
  },
@@ -385,8 +385,8 @@
385
  "id": 32038,
386
  "content": "<extra_id_61>",
387
  "single_word": false,
388
- "lstrip": true,
389
- "rstrip": true,
390
  "normalized": false,
391
  "special": true
392
  },
@@ -394,8 +394,8 @@
394
  "id": 32039,
395
  "content": "<extra_id_60>",
396
  "single_word": false,
397
- "lstrip": true,
398
- "rstrip": true,
399
  "normalized": false,
400
  "special": true
401
  },
@@ -403,8 +403,8 @@
403
  "id": 32040,
404
  "content": "<extra_id_59>",
405
  "single_word": false,
406
- "lstrip": true,
407
- "rstrip": true,
408
  "normalized": false,
409
  "special": true
410
  },
@@ -412,8 +412,8 @@
412
  "id": 32041,
413
  "content": "<extra_id_58>",
414
  "single_word": false,
415
- "lstrip": true,
416
- "rstrip": true,
417
  "normalized": false,
418
  "special": true
419
  },
@@ -421,8 +421,8 @@
421
  "id": 32042,
422
  "content": "<extra_id_57>",
423
  "single_word": false,
424
- "lstrip": true,
425
- "rstrip": true,
426
  "normalized": false,
427
  "special": true
428
  },
@@ -430,8 +430,8 @@
430
  "id": 32043,
431
  "content": "<extra_id_56>",
432
  "single_word": false,
433
- "lstrip": true,
434
- "rstrip": true,
435
  "normalized": false,
436
  "special": true
437
  },
@@ -439,8 +439,8 @@
439
  "id": 32044,
440
  "content": "<extra_id_55>",
441
  "single_word": false,
442
- "lstrip": true,
443
- "rstrip": true,
444
  "normalized": false,
445
  "special": true
446
  },
@@ -448,8 +448,8 @@
448
  "id": 32045,
449
  "content": "<extra_id_54>",
450
  "single_word": false,
451
- "lstrip": true,
452
- "rstrip": true,
453
  "normalized": false,
454
  "special": true
455
  },
@@ -457,8 +457,8 @@
457
  "id": 32046,
458
  "content": "<extra_id_53>",
459
  "single_word": false,
460
- "lstrip": true,
461
- "rstrip": true,
462
  "normalized": false,
463
  "special": true
464
  },
@@ -466,8 +466,8 @@
466
  "id": 32047,
467
  "content": "<extra_id_52>",
468
  "single_word": false,
469
- "lstrip": true,
470
- "rstrip": true,
471
  "normalized": false,
472
  "special": true
473
  },
@@ -475,8 +475,8 @@
475
  "id": 32048,
476
  "content": "<extra_id_51>",
477
  "single_word": false,
478
- "lstrip": true,
479
- "rstrip": true,
480
  "normalized": false,
481
  "special": true
482
  },
@@ -484,8 +484,8 @@
484
  "id": 32049,
485
  "content": "<extra_id_50>",
486
  "single_word": false,
487
- "lstrip": true,
488
- "rstrip": true,
489
  "normalized": false,
490
  "special": true
491
  },
@@ -493,8 +493,8 @@
493
  "id": 32050,
494
  "content": "<extra_id_49>",
495
  "single_word": false,
496
- "lstrip": true,
497
- "rstrip": true,
498
  "normalized": false,
499
  "special": true
500
  },
@@ -502,8 +502,8 @@
502
  "id": 32051,
503
  "content": "<extra_id_48>",
504
  "single_word": false,
505
- "lstrip": true,
506
- "rstrip": true,
507
  "normalized": false,
508
  "special": true
509
  },
@@ -511,8 +511,8 @@
511
  "id": 32052,
512
  "content": "<extra_id_47>",
513
  "single_word": false,
514
- "lstrip": true,
515
- "rstrip": true,
516
  "normalized": false,
517
  "special": true
518
  },
@@ -520,8 +520,8 @@
520
  "id": 32053,
521
  "content": "<extra_id_46>",
522
  "single_word": false,
523
- "lstrip": true,
524
- "rstrip": true,
525
  "normalized": false,
526
  "special": true
527
  },
@@ -529,8 +529,8 @@
529
  "id": 32054,
530
  "content": "<extra_id_45>",
531
  "single_word": false,
532
- "lstrip": true,
533
- "rstrip": true,
534
  "normalized": false,
535
  "special": true
536
  },
@@ -538,8 +538,8 @@
538
  "id": 32055,
539
  "content": "<extra_id_44>",
540
  "single_word": false,
541
- "lstrip": true,
542
- "rstrip": true,
543
  "normalized": false,
544
  "special": true
545
  },
@@ -547,8 +547,8 @@
547
  "id": 32056,
548
  "content": "<extra_id_43>",
549
  "single_word": false,
550
- "lstrip": true,
551
- "rstrip": true,
552
  "normalized": false,
553
  "special": true
554
  },
@@ -556,8 +556,8 @@
556
  "id": 32057,
557
  "content": "<extra_id_42>",
558
  "single_word": false,
559
- "lstrip": true,
560
- "rstrip": true,
561
  "normalized": false,
562
  "special": true
563
  },
@@ -565,8 +565,8 @@
565
  "id": 32058,
566
  "content": "<extra_id_41>",
567
  "single_word": false,
568
- "lstrip": true,
569
- "rstrip": true,
570
  "normalized": false,
571
  "special": true
572
  },
@@ -574,8 +574,8 @@
574
  "id": 32059,
575
  "content": "<extra_id_40>",
576
  "single_word": false,
577
- "lstrip": true,
578
- "rstrip": true,
579
  "normalized": false,
580
  "special": true
581
  },
@@ -583,8 +583,8 @@
583
  "id": 32060,
584
  "content": "<extra_id_39>",
585
  "single_word": false,
586
- "lstrip": true,
587
- "rstrip": true,
588
  "normalized": false,
589
  "special": true
590
  },
@@ -592,8 +592,8 @@
592
  "id": 32061,
593
  "content": "<extra_id_38>",
594
  "single_word": false,
595
- "lstrip": true,
596
- "rstrip": true,
597
  "normalized": false,
598
  "special": true
599
  },
@@ -601,8 +601,8 @@
601
  "id": 32062,
602
  "content": "<extra_id_37>",
603
  "single_word": false,
604
- "lstrip": true,
605
- "rstrip": true,
606
  "normalized": false,
607
  "special": true
608
  },
@@ -610,8 +610,8 @@
610
  "id": 32063,
611
  "content": "<extra_id_36>",
612
  "single_word": false,
613
- "lstrip": true,
614
- "rstrip": true,
615
  "normalized": false,
616
  "special": true
617
  },
@@ -619,8 +619,8 @@
619
  "id": 32064,
620
  "content": "<extra_id_35>",
621
  "single_word": false,
622
- "lstrip": true,
623
- "rstrip": true,
624
  "normalized": false,
625
  "special": true
626
  },
@@ -628,8 +628,8 @@
628
  "id": 32065,
629
  "content": "<extra_id_34>",
630
  "single_word": false,
631
- "lstrip": true,
632
- "rstrip": true,
633
  "normalized": false,
634
  "special": true
635
  },
@@ -637,8 +637,8 @@
637
  "id": 32066,
638
  "content": "<extra_id_33>",
639
  "single_word": false,
640
- "lstrip": true,
641
- "rstrip": true,
642
  "normalized": false,
643
  "special": true
644
  },
@@ -646,8 +646,8 @@
646
  "id": 32067,
647
  "content": "<extra_id_32>",
648
  "single_word": false,
649
- "lstrip": true,
650
- "rstrip": true,
651
  "normalized": false,
652
  "special": true
653
  },
@@ -655,8 +655,8 @@
655
  "id": 32068,
656
  "content": "<extra_id_31>",
657
  "single_word": false,
658
- "lstrip": true,
659
- "rstrip": true,
660
  "normalized": false,
661
  "special": true
662
  },
@@ -664,8 +664,8 @@
664
  "id": 32069,
665
  "content": "<extra_id_30>",
666
  "single_word": false,
667
- "lstrip": true,
668
- "rstrip": true,
669
  "normalized": false,
670
  "special": true
671
  },
@@ -673,8 +673,8 @@
673
  "id": 32070,
674
  "content": "<extra_id_29>",
675
  "single_word": false,
676
- "lstrip": true,
677
- "rstrip": true,
678
  "normalized": false,
679
  "special": true
680
  },
@@ -682,8 +682,8 @@
682
  "id": 32071,
683
  "content": "<extra_id_28>",
684
  "single_word": false,
685
- "lstrip": true,
686
- "rstrip": true,
687
  "normalized": false,
688
  "special": true
689
  },
@@ -691,8 +691,8 @@
691
  "id": 32072,
692
  "content": "<extra_id_27>",
693
  "single_word": false,
694
- "lstrip": true,
695
- "rstrip": true,
696
  "normalized": false,
697
  "special": true
698
  },
@@ -700,8 +700,8 @@
700
  "id": 32073,
701
  "content": "<extra_id_26>",
702
  "single_word": false,
703
- "lstrip": true,
704
- "rstrip": true,
705
  "normalized": false,
706
  "special": true
707
  },
@@ -709,8 +709,8 @@
709
  "id": 32074,
710
  "content": "<extra_id_25>",
711
  "single_word": false,
712
- "lstrip": true,
713
- "rstrip": true,
714
  "normalized": false,
715
  "special": true
716
  },
@@ -718,8 +718,8 @@
718
  "id": 32075,
719
  "content": "<extra_id_24>",
720
  "single_word": false,
721
- "lstrip": true,
722
- "rstrip": true,
723
  "normalized": false,
724
  "special": true
725
  },
@@ -727,8 +727,8 @@
727
  "id": 32076,
728
  "content": "<extra_id_23>",
729
  "single_word": false,
730
- "lstrip": true,
731
- "rstrip": true,
732
  "normalized": false,
733
  "special": true
734
  },
@@ -736,8 +736,8 @@
736
  "id": 32077,
737
  "content": "<extra_id_22>",
738
  "single_word": false,
739
- "lstrip": true,
740
- "rstrip": true,
741
  "normalized": false,
742
  "special": true
743
  },
@@ -745,8 +745,8 @@
745
  "id": 32078,
746
  "content": "<extra_id_21>",
747
  "single_word": false,
748
- "lstrip": true,
749
- "rstrip": true,
750
  "normalized": false,
751
  "special": true
752
  },
@@ -754,8 +754,8 @@
754
  "id": 32079,
755
  "content": "<extra_id_20>",
756
  "single_word": false,
757
- "lstrip": true,
758
- "rstrip": true,
759
  "normalized": false,
760
  "special": true
761
  },
@@ -763,8 +763,8 @@
763
  "id": 32080,
764
  "content": "<extra_id_19>",
765
  "single_word": false,
766
- "lstrip": true,
767
- "rstrip": true,
768
  "normalized": false,
769
  "special": true
770
  },
@@ -772,8 +772,8 @@
772
  "id": 32081,
773
  "content": "<extra_id_18>",
774
  "single_word": false,
775
- "lstrip": true,
776
- "rstrip": true,
777
  "normalized": false,
778
  "special": true
779
  },
@@ -781,8 +781,8 @@
781
  "id": 32082,
782
  "content": "<extra_id_17>",
783
  "single_word": false,
784
- "lstrip": true,
785
- "rstrip": true,
786
  "normalized": false,
787
  "special": true
788
  },
@@ -790,8 +790,8 @@
790
  "id": 32083,
791
  "content": "<extra_id_16>",
792
  "single_word": false,
793
- "lstrip": true,
794
- "rstrip": true,
795
  "normalized": false,
796
  "special": true
797
  },
@@ -799,8 +799,8 @@
799
  "id": 32084,
800
  "content": "<extra_id_15>",
801
  "single_word": false,
802
- "lstrip": true,
803
- "rstrip": true,
804
  "normalized": false,
805
  "special": true
806
  },
@@ -808,8 +808,8 @@
808
  "id": 32085,
809
  "content": "<extra_id_14>",
810
  "single_word": false,
811
- "lstrip": true,
812
- "rstrip": true,
813
  "normalized": false,
814
  "special": true
815
  },
@@ -817,8 +817,8 @@
817
  "id": 32086,
818
  "content": "<extra_id_13>",
819
  "single_word": false,
820
- "lstrip": true,
821
- "rstrip": true,
822
  "normalized": false,
823
  "special": true
824
  },
@@ -826,8 +826,8 @@
826
  "id": 32087,
827
  "content": "<extra_id_12>",
828
  "single_word": false,
829
- "lstrip": true,
830
- "rstrip": true,
831
  "normalized": false,
832
  "special": true
833
  },
@@ -835,8 +835,8 @@
835
  "id": 32088,
836
  "content": "<extra_id_11>",
837
  "single_word": false,
838
- "lstrip": true,
839
- "rstrip": true,
840
  "normalized": false,
841
  "special": true
842
  },
@@ -844,8 +844,8 @@
844
  "id": 32089,
845
  "content": "<extra_id_10>",
846
  "single_word": false,
847
- "lstrip": true,
848
- "rstrip": true,
849
  "normalized": false,
850
  "special": true
851
  },
@@ -853,8 +853,8 @@
853
  "id": 32090,
854
  "content": "<extra_id_9>",
855
  "single_word": false,
856
- "lstrip": true,
857
- "rstrip": true,
858
  "normalized": false,
859
  "special": true
860
  },
@@ -862,8 +862,8 @@
862
  "id": 32091,
863
  "content": "<extra_id_8>",
864
  "single_word": false,
865
- "lstrip": true,
866
- "rstrip": true,
867
  "normalized": false,
868
  "special": true
869
  },
@@ -871,8 +871,8 @@
871
  "id": 32092,
872
  "content": "<extra_id_7>",
873
  "single_word": false,
874
- "lstrip": true,
875
- "rstrip": true,
876
  "normalized": false,
877
  "special": true
878
  },
@@ -880,8 +880,8 @@
880
  "id": 32093,
881
  "content": "<extra_id_6>",
882
  "single_word": false,
883
- "lstrip": true,
884
- "rstrip": true,
885
  "normalized": false,
886
  "special": true
887
  },
@@ -889,8 +889,8 @@
889
  "id": 32094,
890
  "content": "<extra_id_5>",
891
  "single_word": false,
892
- "lstrip": true,
893
- "rstrip": true,
894
  "normalized": false,
895
  "special": true
896
  },
@@ -898,8 +898,8 @@
898
  "id": 32095,
899
  "content": "<extra_id_4>",
900
  "single_word": false,
901
- "lstrip": true,
902
- "rstrip": true,
903
  "normalized": false,
904
  "special": true
905
  },
@@ -907,8 +907,8 @@
907
  "id": 32096,
908
  "content": "<extra_id_3>",
909
  "single_word": false,
910
- "lstrip": true,
911
- "rstrip": true,
912
  "normalized": false,
913
  "special": true
914
  },
@@ -916,8 +916,8 @@
916
  "id": 32097,
917
  "content": "<extra_id_2>",
918
  "single_word": false,
919
- "lstrip": true,
920
- "rstrip": true,
921
  "normalized": false,
922
  "special": true
923
  },
@@ -925,8 +925,8 @@
925
  "id": 32098,
926
  "content": "<extra_id_1>",
927
  "single_word": false,
928
- "lstrip": true,
929
- "rstrip": true,
930
  "normalized": false,
931
  "special": true
932
  },
@@ -934,8 +934,8 @@
934
  "id": 32099,
935
  "content": "<extra_id_0>",
936
  "single_word": false,
937
- "lstrip": true,
938
- "rstrip": true,
939
  "normalized": false,
940
  "special": true
941
  },
@@ -27608,46 +27608,46 @@
27608
  },
27609
  {
27610
  "id": 35063,
27611
- "content": "<eop>",
27612
  "single_word": false,
27613
- "lstrip": true,
27614
- "rstrip": true,
27615
  "normalized": false,
27616
  "special": true
27617
  },
27618
  {
27619
  "id": 35064,
27620
- "content": "PROTEIN FAMILIES",
27621
  "single_word": false,
27622
- "lstrip": true,
27623
- "rstrip": true,
27624
  "normalized": false,
27625
  "special": true
27626
  },
27627
  {
27628
  "id": 35065,
27629
- "content": "SUBCELLULAR LOCATION",
27630
  "single_word": false,
27631
- "lstrip": true,
27632
- "rstrip": true,
27633
  "normalized": false,
27634
  "special": true
27635
  },
27636
  {
27637
  "id": 35066,
27638
- "content": "<bop>",
27639
  "single_word": false,
27640
- "lstrip": true,
27641
- "rstrip": true,
27642
  "normalized": false,
27643
  "special": true
27644
  },
27645
  {
27646
  "id": 35067,
27647
- "content": "<eom>",
27648
  "single_word": false,
27649
- "lstrip": true,
27650
- "rstrip": true,
27651
  "normalized": false,
27652
  "special": true
27653
  },
@@ -27655,8 +27655,8 @@
27655
  "id": 35068,
27656
  "content": "DESCRIPTION",
27657
  "single_word": false,
27658
- "lstrip": true,
27659
- "rstrip": true,
27660
  "normalized": false,
27661
  "special": true
27662
  },
@@ -27664,8 +27664,8 @@
27664
  "id": 35069,
27665
  "content": "PROTEIN NAME",
27666
  "single_word": false,
27667
- "lstrip": true,
27668
- "rstrip": true,
27669
  "normalized": false,
27670
  "special": true
27671
  },
@@ -27673,26 +27673,26 @@
27673
  "id": 35070,
27674
  "content": "FUNCTION",
27675
  "single_word": false,
27676
- "lstrip": true,
27677
- "rstrip": true,
27678
  "normalized": false,
27679
  "special": true
27680
  },
27681
  {
27682
  "id": 35071,
27683
- "content": "<bom>",
27684
  "single_word": false,
27685
- "lstrip": true,
27686
- "rstrip": true,
27687
  "normalized": false,
27688
  "special": true
27689
  },
27690
  {
27691
  "id": 35072,
27692
- "content": "MOLECULE NAME",
27693
  "single_word": false,
27694
- "lstrip": true,
27695
- "rstrip": true,
27696
  "normalized": false,
27697
  "special": true
27698
  }
@@ -156181,7 +156181,6 @@
156181
  "<extra_id_0>",
156182
  0.0
156183
  ]
156184
- ],
156185
- "byte_fallback": false
156186
  }
156187
  }
 
7
  "id": 0,
8
  "content": "<pad>",
9
  "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
  "special": true
14
  },
15
  {
16
  "id": 1,
17
  "content": "</s>",
18
  "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
  "special": true
23
  },
24
  {
25
  "id": 2,
26
  "content": "<unk>",
27
  "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
  "special": true
32
  },
33
  {
 
43
  "id": 32000,
44
  "content": "<extra_id_99>",
45
  "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
  "normalized": false,
49
  "special": true
50
  },
 
52
  "id": 32001,
53
  "content": "<extra_id_98>",
54
  "single_word": false,
55
+ "lstrip": false,
56
+ "rstrip": false,
57
  "normalized": false,
58
  "special": true
59
  },
 
61
  "id": 32002,
62
  "content": "<extra_id_97>",
63
  "single_word": false,
64
+ "lstrip": false,
65
+ "rstrip": false,
66
  "normalized": false,
67
  "special": true
68
  },
 
70
  "id": 32003,
71
  "content": "<extra_id_96>",
72
  "single_word": false,
73
+ "lstrip": false,
74
+ "rstrip": false,
75
  "normalized": false,
76
  "special": true
77
  },
 
79
  "id": 32004,
80
  "content": "<extra_id_95>",
81
  "single_word": false,
82
+ "lstrip": false,
83
+ "rstrip": false,
84
  "normalized": false,
85
  "special": true
86
  },
 
88
  "id": 32005,
89
  "content": "<extra_id_94>",
90
  "single_word": false,
91
+ "lstrip": false,
92
+ "rstrip": false,
93
  "normalized": false,
94
  "special": true
95
  },
 
97
  "id": 32006,
98
  "content": "<extra_id_93>",
99
  "single_word": false,
100
+ "lstrip": false,
101
+ "rstrip": false,
102
  "normalized": false,
103
  "special": true
104
  },
 
106
  "id": 32007,
107
  "content": "<extra_id_92>",
108
  "single_word": false,
109
+ "lstrip": false,
110
+ "rstrip": false,
111
  "normalized": false,
112
  "special": true
113
  },
 
115
  "id": 32008,
116
  "content": "<extra_id_91>",
117
  "single_word": false,
118
+ "lstrip": false,
119
+ "rstrip": false,
120
  "normalized": false,
121
  "special": true
122
  },
 
124
  "id": 32009,
125
  "content": "<extra_id_90>",
126
  "single_word": false,
127
+ "lstrip": false,
128
+ "rstrip": false,
129
  "normalized": false,
130
  "special": true
131
  },
 
133
  "id": 32010,
134
  "content": "<extra_id_89>",
135
  "single_word": false,
136
+ "lstrip": false,
137
+ "rstrip": false,
138
  "normalized": false,
139
  "special": true
140
  },
 
142
  "id": 32011,
143
  "content": "<extra_id_88>",
144
  "single_word": false,
145
+ "lstrip": false,
146
+ "rstrip": false,
147
  "normalized": false,
148
  "special": true
149
  },
 
151
  "id": 32012,
152
  "content": "<extra_id_87>",
153
  "single_word": false,
154
+ "lstrip": false,
155
+ "rstrip": false,
156
  "normalized": false,
157
  "special": true
158
  },
 
160
  "id": 32013,
161
  "content": "<extra_id_86>",
162
  "single_word": false,
163
+ "lstrip": false,
164
+ "rstrip": false,
165
  "normalized": false,
166
  "special": true
167
  },
 
169
  "id": 32014,
170
  "content": "<extra_id_85>",
171
  "single_word": false,
172
+ "lstrip": false,
173
+ "rstrip": false,
174
  "normalized": false,
175
  "special": true
176
  },
 
178
  "id": 32015,
179
  "content": "<extra_id_84>",
180
  "single_word": false,
181
+ "lstrip": false,
182
+ "rstrip": false,
183
  "normalized": false,
184
  "special": true
185
  },
 
187
  "id": 32016,
188
  "content": "<extra_id_83>",
189
  "single_word": false,
190
+ "lstrip": false,
191
+ "rstrip": false,
192
  "normalized": false,
193
  "special": true
194
  },
 
196
  "id": 32017,
197
  "content": "<extra_id_82>",
198
  "single_word": false,
199
+ "lstrip": false,
200
+ "rstrip": false,
201
  "normalized": false,
202
  "special": true
203
  },
 
205
  "id": 32018,
206
  "content": "<extra_id_81>",
207
  "single_word": false,
208
+ "lstrip": false,
209
+ "rstrip": false,
210
  "normalized": false,
211
  "special": true
212
  },
 
214
  "id": 32019,
215
  "content": "<extra_id_80>",
216
  "single_word": false,
217
+ "lstrip": false,
218
+ "rstrip": false,
219
  "normalized": false,
220
  "special": true
221
  },
 
223
  "id": 32020,
224
  "content": "<extra_id_79>",
225
  "single_word": false,
226
+ "lstrip": false,
227
+ "rstrip": false,
228
  "normalized": false,
229
  "special": true
230
  },
 
232
  "id": 32021,
233
  "content": "<extra_id_78>",
234
  "single_word": false,
235
+ "lstrip": false,
236
+ "rstrip": false,
237
  "normalized": false,
238
  "special": true
239
  },
 
241
  "id": 32022,
242
  "content": "<extra_id_77>",
243
  "single_word": false,
244
+ "lstrip": false,
245
+ "rstrip": false,
246
  "normalized": false,
247
  "special": true
248
  },
 
250
  "id": 32023,
251
  "content": "<extra_id_76>",
252
  "single_word": false,
253
+ "lstrip": false,
254
+ "rstrip": false,
255
  "normalized": false,
256
  "special": true
257
  },
 
259
  "id": 32024,
260
  "content": "<extra_id_75>",
261
  "single_word": false,
262
+ "lstrip": false,
263
+ "rstrip": false,
264
  "normalized": false,
265
  "special": true
266
  },
 
268
  "id": 32025,
269
  "content": "<extra_id_74>",
270
  "single_word": false,
271
+ "lstrip": false,
272
+ "rstrip": false,
273
  "normalized": false,
274
  "special": true
275
  },
 
277
  "id": 32026,
278
  "content": "<extra_id_73>",
279
  "single_word": false,
280
+ "lstrip": false,
281
+ "rstrip": false,
282
  "normalized": false,
283
  "special": true
284
  },
 
286
  "id": 32027,
287
  "content": "<extra_id_72>",
288
  "single_word": false,
289
+ "lstrip": false,
290
+ "rstrip": false,
291
  "normalized": false,
292
  "special": true
293
  },
 
295
  "id": 32028,
296
  "content": "<extra_id_71>",
297
  "single_word": false,
298
+ "lstrip": false,
299
+ "rstrip": false,
300
  "normalized": false,
301
  "special": true
302
  },
 
304
  "id": 32029,
305
  "content": "<extra_id_70>",
306
  "single_word": false,
307
+ "lstrip": false,
308
+ "rstrip": false,
309
  "normalized": false,
310
  "special": true
311
  },
 
313
  "id": 32030,
314
  "content": "<extra_id_69>",
315
  "single_word": false,
316
+ "lstrip": false,
317
+ "rstrip": false,
318
  "normalized": false,
319
  "special": true
320
  },
 
322
  "id": 32031,
323
  "content": "<extra_id_68>",
324
  "single_word": false,
325
+ "lstrip": false,
326
+ "rstrip": false,
327
  "normalized": false,
328
  "special": true
329
  },
 
331
  "id": 32032,
332
  "content": "<extra_id_67>",
333
  "single_word": false,
334
+ "lstrip": false,
335
+ "rstrip": false,
336
  "normalized": false,
337
  "special": true
338
  },
 
340
  "id": 32033,
341
  "content": "<extra_id_66>",
342
  "single_word": false,
343
+ "lstrip": false,
344
+ "rstrip": false,
345
  "normalized": false,
346
  "special": true
347
  },
 
349
  "id": 32034,
350
  "content": "<extra_id_65>",
351
  "single_word": false,
352
+ "lstrip": false,
353
+ "rstrip": false,
354
  "normalized": false,
355
  "special": true
356
  },
 
358
  "id": 32035,
359
  "content": "<extra_id_64>",
360
  "single_word": false,
361
+ "lstrip": false,
362
+ "rstrip": false,
363
  "normalized": false,
364
  "special": true
365
  },
 
367
  "id": 32036,
368
  "content": "<extra_id_63>",
369
  "single_word": false,
370
+ "lstrip": false,
371
+ "rstrip": false,
372
  "normalized": false,
373
  "special": true
374
  },
 
376
  "id": 32037,
377
  "content": "<extra_id_62>",
378
  "single_word": false,
379
+ "lstrip": false,
380
+ "rstrip": false,
381
  "normalized": false,
382
  "special": true
383
  },
 
385
  "id": 32038,
386
  "content": "<extra_id_61>",
387
  "single_word": false,
388
+ "lstrip": false,
389
+ "rstrip": false,
390
  "normalized": false,
391
  "special": true
392
  },
 
394
  "id": 32039,
395
  "content": "<extra_id_60>",
396
  "single_word": false,
397
+ "lstrip": false,
398
+ "rstrip": false,
399
  "normalized": false,
400
  "special": true
401
  },
 
403
  "id": 32040,
404
  "content": "<extra_id_59>",
405
  "single_word": false,
406
+ "lstrip": false,
407
+ "rstrip": false,
408
  "normalized": false,
409
  "special": true
410
  },
 
412
  "id": 32041,
413
  "content": "<extra_id_58>",
414
  "single_word": false,
415
+ "lstrip": false,
416
+ "rstrip": false,
417
  "normalized": false,
418
  "special": true
419
  },
 
421
  "id": 32042,
422
  "content": "<extra_id_57>",
423
  "single_word": false,
424
+ "lstrip": false,
425
+ "rstrip": false,
426
  "normalized": false,
427
  "special": true
428
  },
 
430
  "id": 32043,
431
  "content": "<extra_id_56>",
432
  "single_word": false,
433
+ "lstrip": false,
434
+ "rstrip": false,
435
  "normalized": false,
436
  "special": true
437
  },
 
439
  "id": 32044,
440
  "content": "<extra_id_55>",
441
  "single_word": false,
442
+ "lstrip": false,
443
+ "rstrip": false,
444
  "normalized": false,
445
  "special": true
446
  },
 
448
  "id": 32045,
449
  "content": "<extra_id_54>",
450
  "single_word": false,
451
+ "lstrip": false,
452
+ "rstrip": false,
453
  "normalized": false,
454
  "special": true
455
  },
 
457
  "id": 32046,
458
  "content": "<extra_id_53>",
459
  "single_word": false,
460
+ "lstrip": false,
461
+ "rstrip": false,
462
  "normalized": false,
463
  "special": true
464
  },
 
466
  "id": 32047,
467
  "content": "<extra_id_52>",
468
  "single_word": false,
469
+ "lstrip": false,
470
+ "rstrip": false,
471
  "normalized": false,
472
  "special": true
473
  },
 
475
  "id": 32048,
476
  "content": "<extra_id_51>",
477
  "single_word": false,
478
+ "lstrip": false,
479
+ "rstrip": false,
480
  "normalized": false,
481
  "special": true
482
  },
 
484
  "id": 32049,
485
  "content": "<extra_id_50>",
486
  "single_word": false,
487
+ "lstrip": false,
488
+ "rstrip": false,
489
  "normalized": false,
490
  "special": true
491
  },
 
493
  "id": 32050,
494
  "content": "<extra_id_49>",
495
  "single_word": false,
496
+ "lstrip": false,
497
+ "rstrip": false,
498
  "normalized": false,
499
  "special": true
500
  },
 
502
  "id": 32051,
503
  "content": "<extra_id_48>",
504
  "single_word": false,
505
+ "lstrip": false,
506
+ "rstrip": false,
507
  "normalized": false,
508
  "special": true
509
  },
 
511
  "id": 32052,
512
  "content": "<extra_id_47>",
513
  "single_word": false,
514
+ "lstrip": false,
515
+ "rstrip": false,
516
  "normalized": false,
517
  "special": true
518
  },
 
520
  "id": 32053,
521
  "content": "<extra_id_46>",
522
  "single_word": false,
523
+ "lstrip": false,
524
+ "rstrip": false,
525
  "normalized": false,
526
  "special": true
527
  },
 
529
  "id": 32054,
530
  "content": "<extra_id_45>",
531
  "single_word": false,
532
+ "lstrip": false,
533
+ "rstrip": false,
534
  "normalized": false,
535
  "special": true
536
  },
 
538
  "id": 32055,
539
  "content": "<extra_id_44>",
540
  "single_word": false,
541
+ "lstrip": false,
542
+ "rstrip": false,
543
  "normalized": false,
544
  "special": true
545
  },
 
547
  "id": 32056,
548
  "content": "<extra_id_43>",
549
  "single_word": false,
550
+ "lstrip": false,
551
+ "rstrip": false,
552
  "normalized": false,
553
  "special": true
554
  },
 
556
  "id": 32057,
557
  "content": "<extra_id_42>",
558
  "single_word": false,
559
+ "lstrip": false,
560
+ "rstrip": false,
561
  "normalized": false,
562
  "special": true
563
  },
 
565
  "id": 32058,
566
  "content": "<extra_id_41>",
567
  "single_word": false,
568
+ "lstrip": false,
569
+ "rstrip": false,
570
  "normalized": false,
571
  "special": true
572
  },
 
574
  "id": 32059,
575
  "content": "<extra_id_40>",
576
  "single_word": false,
577
+ "lstrip": false,
578
+ "rstrip": false,
579
  "normalized": false,
580
  "special": true
581
  },
 
583
  "id": 32060,
584
  "content": "<extra_id_39>",
585
  "single_word": false,
586
+ "lstrip": false,
587
+ "rstrip": false,
588
  "normalized": false,
589
  "special": true
590
  },
 
592
  "id": 32061,
593
  "content": "<extra_id_38>",
594
  "single_word": false,
595
+ "lstrip": false,
596
+ "rstrip": false,
597
  "normalized": false,
598
  "special": true
599
  },
 
601
  "id": 32062,
602
  "content": "<extra_id_37>",
603
  "single_word": false,
604
+ "lstrip": false,
605
+ "rstrip": false,
606
  "normalized": false,
607
  "special": true
608
  },
 
610
  "id": 32063,
611
  "content": "<extra_id_36>",
612
  "single_word": false,
613
+ "lstrip": false,
614
+ "rstrip": false,
615
  "normalized": false,
616
  "special": true
617
  },
 
619
  "id": 32064,
620
  "content": "<extra_id_35>",
621
  "single_word": false,
622
+ "lstrip": false,
623
+ "rstrip": false,
624
  "normalized": false,
625
  "special": true
626
  },
 
628
  "id": 32065,
629
  "content": "<extra_id_34>",
630
  "single_word": false,
631
+ "lstrip": false,
632
+ "rstrip": false,
633
  "normalized": false,
634
  "special": true
635
  },
 
637
  "id": 32066,
638
  "content": "<extra_id_33>",
639
  "single_word": false,
640
+ "lstrip": false,
641
+ "rstrip": false,
642
  "normalized": false,
643
  "special": true
644
  },
 
646
  "id": 32067,
647
  "content": "<extra_id_32>",
648
  "single_word": false,
649
+ "lstrip": false,
650
+ "rstrip": false,
651
  "normalized": false,
652
  "special": true
653
  },
 
655
  "id": 32068,
656
  "content": "<extra_id_31>",
657
  "single_word": false,
658
+ "lstrip": false,
659
+ "rstrip": false,
660
  "normalized": false,
661
  "special": true
662
  },
 
664
  "id": 32069,
665
  "content": "<extra_id_30>",
666
  "single_word": false,
667
+ "lstrip": false,
668
+ "rstrip": false,
669
  "normalized": false,
670
  "special": true
671
  },
 
673
  "id": 32070,
674
  "content": "<extra_id_29>",
675
  "single_word": false,
676
+ "lstrip": false,
677
+ "rstrip": false,
678
  "normalized": false,
679
  "special": true
680
  },
 
682
  "id": 32071,
683
  "content": "<extra_id_28>",
684
  "single_word": false,
685
+ "lstrip": false,
686
+ "rstrip": false,
687
  "normalized": false,
688
  "special": true
689
  },
 
691
  "id": 32072,
692
  "content": "<extra_id_27>",
693
  "single_word": false,
694
+ "lstrip": false,
695
+ "rstrip": false,
696
  "normalized": false,
697
  "special": true
698
  },
 
700
  "id": 32073,
701
  "content": "<extra_id_26>",
702
  "single_word": false,
703
+ "lstrip": false,
704
+ "rstrip": false,
705
  "normalized": false,
706
  "special": true
707
  },
 
709
  "id": 32074,
710
  "content": "<extra_id_25>",
711
  "single_word": false,
712
+ "lstrip": false,
713
+ "rstrip": false,
714
  "normalized": false,
715
  "special": true
716
  },
 
718
  "id": 32075,
719
  "content": "<extra_id_24>",
720
  "single_word": false,
721
+ "lstrip": false,
722
+ "rstrip": false,
723
  "normalized": false,
724
  "special": true
725
  },
 
727
  "id": 32076,
728
  "content": "<extra_id_23>",
729
  "single_word": false,
730
+ "lstrip": false,
731
+ "rstrip": false,
732
  "normalized": false,
733
  "special": true
734
  },
 
736
  "id": 32077,
737
  "content": "<extra_id_22>",
738
  "single_word": false,
739
+ "lstrip": false,
740
+ "rstrip": false,
741
  "normalized": false,
742
  "special": true
743
  },
 
745
  "id": 32078,
746
  "content": "<extra_id_21>",
747
  "single_word": false,
748
+ "lstrip": false,
749
+ "rstrip": false,
750
  "normalized": false,
751
  "special": true
752
  },
 
754
  "id": 32079,
755
  "content": "<extra_id_20>",
756
  "single_word": false,
757
+ "lstrip": false,
758
+ "rstrip": false,
759
  "normalized": false,
760
  "special": true
761
  },
 
763
  "id": 32080,
764
  "content": "<extra_id_19>",
765
  "single_word": false,
766
+ "lstrip": false,
767
+ "rstrip": false,
768
  "normalized": false,
769
  "special": true
770
  },
 
772
  "id": 32081,
773
  "content": "<extra_id_18>",
774
  "single_word": false,
775
+ "lstrip": false,
776
+ "rstrip": false,
777
  "normalized": false,
778
  "special": true
779
  },
 
781
  "id": 32082,
782
  "content": "<extra_id_17>",
783
  "single_word": false,
784
+ "lstrip": false,
785
+ "rstrip": false,
786
  "normalized": false,
787
  "special": true
788
  },
 
790
  "id": 32083,
791
  "content": "<extra_id_16>",
792
  "single_word": false,
793
+ "lstrip": false,
794
+ "rstrip": false,
795
  "normalized": false,
796
  "special": true
797
  },
 
799
  "id": 32084,
800
  "content": "<extra_id_15>",
801
  "single_word": false,
802
+ "lstrip": false,
803
+ "rstrip": false,
804
  "normalized": false,
805
  "special": true
806
  },
 
808
  "id": 32085,
809
  "content": "<extra_id_14>",
810
  "single_word": false,
811
+ "lstrip": false,
812
+ "rstrip": false,
813
  "normalized": false,
814
  "special": true
815
  },
 
817
  "id": 32086,
818
  "content": "<extra_id_13>",
819
  "single_word": false,
820
+ "lstrip": false,
821
+ "rstrip": false,
822
  "normalized": false,
823
  "special": true
824
  },
 
826
  "id": 32087,
827
  "content": "<extra_id_12>",
828
  "single_word": false,
829
+ "lstrip": false,
830
+ "rstrip": false,
831
  "normalized": false,
832
  "special": true
833
  },
 
835
  "id": 32088,
836
  "content": "<extra_id_11>",
837
  "single_word": false,
838
+ "lstrip": false,
839
+ "rstrip": false,
840
  "normalized": false,
841
  "special": true
842
  },
 
844
  "id": 32089,
845
  "content": "<extra_id_10>",
846
  "single_word": false,
847
+ "lstrip": false,
848
+ "rstrip": false,
849
  "normalized": false,
850
  "special": true
851
  },
 
853
  "id": 32090,
854
  "content": "<extra_id_9>",
855
  "single_word": false,
856
+ "lstrip": false,
857
+ "rstrip": false,
858
  "normalized": false,
859
  "special": true
860
  },
 
862
  "id": 32091,
863
  "content": "<extra_id_8>",
864
  "single_word": false,
865
+ "lstrip": false,
866
+ "rstrip": false,
867
  "normalized": false,
868
  "special": true
869
  },
 
871
  "id": 32092,
872
  "content": "<extra_id_7>",
873
  "single_word": false,
874
+ "lstrip": false,
875
+ "rstrip": false,
876
  "normalized": false,
877
  "special": true
878
  },
 
880
  "id": 32093,
881
  "content": "<extra_id_6>",
882
  "single_word": false,
883
+ "lstrip": false,
884
+ "rstrip": false,
885
  "normalized": false,
886
  "special": true
887
  },
 
889
  "id": 32094,
890
  "content": "<extra_id_5>",
891
  "single_word": false,
892
+ "lstrip": false,
893
+ "rstrip": false,
894
  "normalized": false,
895
  "special": true
896
  },
 
898
  "id": 32095,
899
  "content": "<extra_id_4>",
900
  "single_word": false,
901
+ "lstrip": false,
902
+ "rstrip": false,
903
  "normalized": false,
904
  "special": true
905
  },
 
907
  "id": 32096,
908
  "content": "<extra_id_3>",
909
  "single_word": false,
910
+ "lstrip": false,
911
+ "rstrip": false,
912
  "normalized": false,
913
  "special": true
914
  },
 
916
  "id": 32097,
917
  "content": "<extra_id_2>",
918
  "single_word": false,
919
+ "lstrip": false,
920
+ "rstrip": false,
921
  "normalized": false,
922
  "special": true
923
  },
 
925
  "id": 32098,
926
  "content": "<extra_id_1>",
927
  "single_word": false,
928
+ "lstrip": false,
929
+ "rstrip": false,
930
  "normalized": false,
931
  "special": true
932
  },
 
934
  "id": 32099,
935
  "content": "<extra_id_0>",
936
  "single_word": false,
937
+ "lstrip": false,
938
+ "rstrip": false,
939
  "normalized": false,
940
  "special": true
941
  },
 
27608
  },
27609
  {
27610
  "id": 35063,
27611
+ "content": "<bom>",
27612
  "single_word": false,
27613
+ "lstrip": false,
27614
+ "rstrip": false,
27615
  "normalized": false,
27616
  "special": true
27617
  },
27618
  {
27619
  "id": 35064,
27620
+ "content": "<eom>",
27621
  "single_word": false,
27622
+ "lstrip": false,
27623
+ "rstrip": false,
27624
  "normalized": false,
27625
  "special": true
27626
  },
27627
  {
27628
  "id": 35065,
27629
+ "content": "<bop>",
27630
  "single_word": false,
27631
+ "lstrip": false,
27632
+ "rstrip": false,
27633
  "normalized": false,
27634
  "special": true
27635
  },
27636
  {
27637
  "id": 35066,
27638
+ "content": "<eop>",
27639
  "single_word": false,
27640
+ "lstrip": false,
27641
+ "rstrip": false,
27642
  "normalized": false,
27643
  "special": true
27644
  },
27645
  {
27646
  "id": 35067,
27647
+ "content": "MOLECULE NAME",
27648
  "single_word": false,
27649
+ "lstrip": false,
27650
+ "rstrip": false,
27651
  "normalized": false,
27652
  "special": true
27653
  },
 
27655
  "id": 35068,
27656
  "content": "DESCRIPTION",
27657
  "single_word": false,
27658
+ "lstrip": false,
27659
+ "rstrip": false,
27660
  "normalized": false,
27661
  "special": true
27662
  },
 
27664
  "id": 35069,
27665
  "content": "PROTEIN NAME",
27666
  "single_word": false,
27667
+ "lstrip": false,
27668
+ "rstrip": false,
27669
  "normalized": false,
27670
  "special": true
27671
  },
 
27673
  "id": 35070,
27674
  "content": "FUNCTION",
27675
  "single_word": false,
27676
+ "lstrip": false,
27677
+ "rstrip": false,
27678
  "normalized": false,
27679
  "special": true
27680
  },
27681
  {
27682
  "id": 35071,
27683
+ "content": "SUBCELLULAR LOCATION",
27684
  "single_word": false,
27685
+ "lstrip": false,
27686
+ "rstrip": false,
27687
  "normalized": false,
27688
  "special": true
27689
  },
27690
  {
27691
  "id": 35072,
27692
+ "content": "PROTEIN FAMILIES",
27693
  "single_word": false,
27694
+ "lstrip": false,
27695
+ "rstrip": false,
27696
  "normalized": false,
27697
  "special": true
27698
  }
 
156181
  "<extra_id_0>",
156182
  0.0
156183
  ]
156184
+ ]
 
156185
  }
156186
  }
tokenizer_config.json CHANGED
The diff for this file is too large to render. See raw diff