RaushanTurganbay HF staff commited on
Commit
d0de11f
·
verified ·
1 Parent(s): 75a72ea

Upload processor

Browse files
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<image>": 1024
3
+ }
preprocessor_config.json CHANGED
@@ -15,6 +15,7 @@
15
  0.26130258,
16
  0.27577711
17
  ],
 
18
  "resample": 3,
19
  "rescale_factor": 0.00392156862745098,
20
  "size": {
 
15
  0.26130258,
16
  0.27577711
17
  ],
18
+ "processor_class": "Blip2Processor",
19
  "resample": 3,
20
  "rescale_factor": 0.00392156862745098,
21
  "size": {
processor_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "num_query_tokens": 10,
3
+ "processor_class": "Blip2Processor"
4
+ }
tokenizer.json CHANGED
@@ -20,6 +20,15 @@
20
  "rstrip": false,
21
  "normalized": true,
22
  "special": true
 
 
 
 
 
 
 
 
 
23
  }
24
  ],
25
  "normalizer": null,
@@ -90,6 +99,8 @@
90
  "continuing_subword_prefix": "",
91
  "end_of_word_suffix": "",
92
  "fuse_unk": false,
 
 
93
  "vocab": {
94
  "<pad>": 0,
95
  "</s>": 1,
@@ -1117,772 +1128,3070 @@
1117
  "Ġbegan": 1023
1118
  },
1119
  "merges": [
1120
- "Ġ t",
1121
- "h e",
1122
- "Ġ a",
1123
- "i n",
1124
- "Ġt he",
1125
- "e r",
1126
- "o n",
1127
- ,",
1128
- "r e",
1129
- s",
1130
- "e d",
1131
- "Ġ o",
1132
- "Ġ w",
1133
- "n d",
1134
- "a t",
1135
- "Ġ .",
1136
- "o r",
1137
- "i t",
1138
- "Ġ c",
1139
- "e n",
1140
- "Ġ f",
1141
- "i s",
1142
- "e s",
1143
- "a r",
1144
- "Ġo f",
1145
- "Ġ b",
1146
- "a n",
1147
- "Ġ in",
1148
- "a l",
1149
- "in g",
1150
- "Ġ p",
1151
- "Ġa nd",
1152
- "a s",
1153
- "Ġt o",
1154
- "r o",
1155
- "i c",
1156
- "Ġ m",
1157
- d",
1158
- "Ġ h",
1159
- "i on",
1160
- "l e",
1161
- "o u",
1162
- "Ġ T",
1163
- "Ġ re",
1164
- "Ġ =",
1165
- \"",
1166
- "Ġ A",
1167
- "Ġ S",
1168
- "en t",
1169
- "i l",
1170
- "Ġt h",
1171
- "Ġ 1",
1172
- "s t",
1173
- "Ġ C",
1174
- "e l",
1175
- "o m",
1176
- "Ġ l",
1177
- "a m",
1178
- "Ġ Ċ",
1179
- "Ġ e",
1180
- "Ġ n",
1181
- @",
1182
- "a d",
1183
- "a c",
1184
- "Ġw as",
1185
- "Ġ M",
1186
- "u r",
1187
- "ĠT he",
1188
- "e c",
1189
- "Ġ on",
1190
- "l y",
1191
- "Ġ B",
1192
- "Ġ I",
1193
- g",
1194
- "Ġ '",
1195
- "e t",
1196
- "o l",
1197
- "i d",
1198
- "i v",
1199
- "i m",
1200
- "Ġf or",
1201
- "i r",
1202
- "- @",
1203
- "Ġ@ -@",
1204
- "i g",
1205
- "o t",
1206
- "t er",
1207
- "Ġa s",
1208
- "Ġ H",
1209
- "u s",
1210
- "o w",
1211
- "Ġs t",
1212
- "u t",
1213
- "it h",
1214
- "a y",
1215
- "Ġ 2",
1216
- "Ġ P",
1217
- "at ion",
1218
- "v er",
1219
- "Ġb e",
1220
- "he r",
1221
- "Ġth at",
1222
- "Ġw ith",
1223
- "Ġ R",
1224
- "c e",
1225
- "t h",
1226
- "Ġ D",
1227
- "Ġ is",
1228
- "u n",
1229
- "e m",
1230
- "Ġ F",
1231
- "Ġw h",
1232
- "u l",
1233
- "Ġb y",
1234
- "Ġa l",
1235
- "c h",
1236
- "Ġ )",
1237
- "Ġ (",
1238
- "Ġ W",
1239
- "Ġc on",
1240
- "r a",
1241
- G",
1242
- "o s",
1243
- "Ġ L",
1244
- "Ġ N",
1245
- "Ġa t",
1246
- "er s",
1247
- "c t",
1248
- "Ġ it",
1249
- "Ġ1 9",
1250
- "ro m",
1251
- "a nd",
1252
- "Ġa n",
1253
- "u m",
1254
- "es t",
1255
- "Ġ J",
1256
- "a g",
1257
- "Ġ he",
1258
- "0 0",
1259
- "is t",
1260
- "a in",
1261
- "o d",
1262
- "a v",
1263
- "r i",
1264
- "Ġ E",
1265
- O",
1266
- "Ġf rom",
1267
- "Ġc om",
1268
- "Ġh is",
1269
- "o p",
1270
- "Ġp ro",
1271
- "re s",
1272
- "i es",
1273
- "i f",
1274
- "Ġ v",
1275
- "or t",
1276
- "er e",
1277
- "il l",
1278
- "l d",
1279
- "Ġd e",
1280
- "p p",
1281
- "Ġs u",
1282
- "o re",
1283
- "ĠI n",
1284
- "Ġ r",
1285
- "Ġs e",
1286
- "Ġw ere",
1287
- "e w",
1288
- "on g",
1289
- "ig h",
1290
- "ar d",
1291
- "at e",
1292
- "al l",
1293
- "ar t",
1294
- "a k",
1295
- "ic h",
1296
- "Ġc h",
1297
- "Ġo r",
1298
- "a b",
1299
- "an t",
1300
- "u d",
1301
- "o c",
1302
- "b er",
1303
- "Ġe x",
1304
- "g h",
1305
- "it y",
1306
- "at ed",
1307
- "p t",
1308
- "es s",
1309
- "e ar",
1310
- "Ġ K",
1311
- "Ġp l",
1312
- "am e",
1313
- "q u",
1314
- "iv e",
1315
- "ro u",
1316
- "Ġa re",
1317
- "Ġ â",
1318
- "Ġs h",
1319
- "Ġ k",
1320
- "ac k",
1321
- "ec t",
1322
- "Ġâ Ģ",
1323
- "Ġ U",
1324
- "Ġh ad",
1325
- "s e",
1326
- "Ġwh ich",
1327
- "re d",
1328
- "o v",
1329
- "ĠS t",
1330
- "as t",
1331
- "Ġs p",
1332
- "i an",
1333
- y",
1334
- "m ent",
1335
- "Ġ le",
1336
- "Ġn ot",
1337
- "g e",
1338
- "or d",
1339
- "r it",
1340
- "i p",
1341
- "in e",
1342
- "el l",
1343
- "al ly",
1344
- "ou r",
1345
- "o st",
1346
- "igh t",
1347
- "t her",
1348
- "a p",
1349
- "Ġ u",
1350
- "is h",
1351
- "ĠC h",
1352
- "ou n",
1353
- "i a",
1354
- "Ġ 3",
1355
- "av e",
1356
- "ar y",
1357
- "u st",
1358
- "o g",
1359
- "Ġ2 00",
1360
- "Ġ un",
1361
- "ou s",
1362
- "ir st",
1363
- "Ġ V",
1364
- "c c",
1365
- "Ġin c",
1366
- "Ġ ;",
1367
- "Ġcom p",
1368
- "r u",
1369
- "ion s",
1370
- "Ġthe ir",
1371
- "Ġb ut",
1372
- "id e",
1373
- "u re",
1374
- "s o",
1375
- "Ġcon t",
1376
- "Ġin t",
1377
- "f ter",
1378
- "ic al",
1379
- "i al",
1380
- "Ġa r",
1381
- "Ġf irst",
1382
- "ou ld",
1383
- "Ġit s",
1384
- "he d",
1385
- "ĠâĢ ĵ",
1386
- "Ġw he",
1387
- "w o",
1388
- "ou t",
1389
- "u b",
1390
- "Ġ2 0",
1391
- "f f",
1392
- "Ġ :",
1393
- "u e",
1394
- "Ġ her",
1395
- "ow n",
1396
- "o k",
1397
- "Ġal so",
1398
- "Ġc l",
1399
- "p er",
1400
- "ig n",
1401
- "at er",
1402
- "r an",
1403
- "or m",
1404
- "i e",
1405
- "om e",
1406
- "or k",
1407
- "as s",
1408
- "i re",
1409
- "e nd",
1410
- "Ġre s",
1411
- "Ġa b",
1412
- "Ġa d",
1413
- us",
1414
- "r y",
1415
- "Ġre c",
1416
- "Ġh ave",
1417
- "ag e",
1418
- "ĠH e",
1419
- "Ġ 4",
1420
- "Ġ ro",
1421
- "m er",
1422
- "Ġon e",
1423
- "on d",
1424
- "l ow",
1425
- "Ġh as",
1426
- "ĠT h",
1427
- "d u",
1428
- "Ġ 5",
1429
- "Ġp er",
1430
- "Ġbe en",
1431
- "im e",
1432
- "Ġt wo",
1433
- "en ce",
1434
- "l and",
1435
- "Ġ1 8",
1436
- ". @",
1437
- "Ġ@ .@",
1438
- "ul t",
1439
- "re e",
1440
- "ou gh",
1441
- "i le",
1442
- "Ġwh o",
1443
- "ĠA l",
1444
- "Ġs c",
1445
- "ur ing",
1446
- "p l",
1447
- "or y",
1448
- "it ion",
1449
- "r ic",
1450
- "ation s",
1451
- "Ġd is",
1452
- "Ġth is",
1453
- "Ġb ec",
1454
- "Ġa pp",
1455
- "i z",
1456
- "ĠI t",
1457
- "a re",
1458
- "ac h",
1459
- "l ud",
1460
- "ad e",
1461
- "Ġpl ay",
1462
- "Ġ j",
1463
- "Ġm an",
1464
- "ac t",
1465
- "el y",
1466
- "Ġp art",
1467
- "Ġd es",
1468
- "Ġa g",
1469
- "Ġthe y",
1470
- "Ġy ear",
1471
- "oun t",
1472
- "Ġ20 1",
1473
- "Ġo ver",
1474
- "Ġo ther",
1475
- "ou nd",
1476
- "Ġa fter",
1477
- "i b",
1478
- "o ver",
1479
- "Ġs er",
1480
- "Ġ en",
1481
- "Ġof f",
1482
- "Ġ im",
1483
- "ct ion",
1484
- "Ġ Y",
1485
- "k e",
1486
- "it e",
1487
- ", @",
1488
- "Ġ@ ,@",
1489
- "t e",
1490
- "ur n",
1491
- "Ġinc lud",
1492
- "res s",
1493
- "an ce",
1494
- "an g",
1495
- "Ġat t",
1496
- "ic e",
1497
- "ac e",
1498
- "ar k",
1499
- "Ġo ut",
1500
- "w n",
1501
- "p h",
1502
- "em ber",
1503
- "Ġp re",
1504
- "Ġu p",
1505
- "en s",
1506
- "m an",
1507
- "Ġe v",
1508
- "Ġt ime",
1509
- "nd er",
1510
- "rou gh",
1511
- "c ed",
1512
- "Ġf in",
1513
- "Ġint o",
1514
- "on e",
1515
- "p ort",
1516
- "rou nd",
1517
- "w e",
1518
- "re n",
1519
- "l es",
1520
- "in t",
1521
- "ĠO n",
1522
- "v el",
1523
- "Ġcom m",
1524
- "Ġs he",
1525
- "as on",
1526
- "am p",
1527
- "Ġt e",
1528
- "Ġw ould",
1529
- "w ard",
1530
- "Ġm ore",
1531
- "Ġ 6",
1532
- "i ed",
1533
- "os e",
1534
- "ri b",
1535
- "ĠU n",
1536
- "Ġal l",
1537
- "ing s",
1538
- "ter n",
1539
- "c es",
1540
- "ab le",
1541
- "Ġw e",
1542
- "it ed",
1543
- "e ver",
1544
- "ent s",
1545
- "Ġh im",
1546
- "as ed",
1547
- "or s",
1548
- "o y",
1549
- "o od",
1550
- "Ġc ent",
1551
- "i x",
1552
- "as e",
1553
- "il d",
1554
- "ĠA n",
1555
- "Ġ 7",
1556
- "Ġw ork",
1557
- "at es",
1558
- "i ous",
1559
- "at h",
1560
- "Ġp o",
1561
- "ro p",
1562
- "ol d",
1563
- "al s",
1564
- "is s",
1565
- "e y",
1566
- "ic t",
1567
- "Ġf e",
1568
- "Ġthe m",
1569
- "g an",
1570
- "Ġs ec",
1571
- "Ġb et",
1572
- "Ġwhe n",
1573
- "Ġs ong",
1574
- "Ġre m",
1575
- "e p",
1576
- "f orm",
1577
- "a il",
1578
- "f er",
1579
- "Ġe ar",
1580
- "ub l",
1581
- "a w",
1582
- "Ġk n",
1583
- "ak e",
1584
- "a us",
1585
- "Ġm ost",
1586
- "Ġcon s",
1587
- "Ġd uring",
1588
- "ĠA s",
1589
- "or th",
1590
- "Ġn ew",
1591
- "er ed",
1592
- "il m",
1593
- "v ed",
1594
- "at t",
1595
- "Ġon ly",
1596
- "Ġ 9",
1597
- "Ġd ec",
1598
- "Ġ 8",
1599
- "ic k",
1600
- "Ġg ame",
1601
- "on s",
1602
- "u g",
1603
- "Ġt r",
1604
- "f t",
1605
- "ot h",
1606
- "o ok",
1607
- "ĠM ar",
1608
- "re at",
1609
- "w ay",
1610
- "Ġc an",
1611
- "ol low",
1612
- "ou th",
1613
- "we en",
1614
- "ĠE n",
1615
- "Ġ19 9",
1616
- "ter s",
1617
- "Ġre l",
1618
- "in d",
1619
- "Ġab out",
1620
- "Ġse ason",
1621
- "Ġag ain",
1622
- "r al",
1623
- "Ġth ree",
1624
- "ation al",
1625
- "Ġu nder",
1626
- "ul ar",
1627
- "Ġm e",
1628
- "Ġth an",
1629
- "ĠC om",
1630
- "ĠA r",
1631
- "h ip",
1632
- "o b",
1633
- "Ġn e",
1634
- "Ġbet ween",
1635
- "Ġf l",
1636
- "h n",
1637
- "v e",
1638
- "Ġch ar",
1639
- "Ġc ol",
1640
- "Ġrec ord",
1641
- "i ew",
1642
- "r on",
1643
- "f ore",
1644
- "Ġth rough",
1645
- "is ion",
1646
- "or n",
1647
- "Ġ 00",
1648
- "oc k",
1649
- "Ġ ver",
1650
- "Ġl ater",
1651
- "Ġn um",
1652
- "Ġe nd",
1653
- "ol og",
1654
- "am es",
1655
- "Ġp os",
1656
- "Ġw rit",
1657
- "Ġpro du",
1658
- "Ġwh ile",
1659
- "Ġa ct",
1660
- "Ġre le",
1661
- "Ġf ilm",
1662
- "is hed",
1663
- "Ġp r",
1664
- "an s",
1665
- "Ġre g",
1666
- "Ġfor m",
1667
- "Ġas s",
1668
- "ĠS e",
1669
- "ur y",
1670
- "t ed",
1671
- "t s",
1672
- "Ġm ade",
1673
- "Ġsu b",
1674
- "Ġp e",
1675
- "Ġs o",
1676
- "or ld",
1677
- "Ġre t",
1678
- "ĠN ew",
1679
- "Ġsp ec",
1680
- "Ġa cc",
1681
- "Ġ qu",
1682
- "Ġwhe re",
1683
- "en er",
1684
- "Ġm ov",
1685
- "he s",
1686
- "mer ic",
1687
- "at ing",
1688
- "Ġin ter",
1689
- "ĠL e",
1690
- "ĠA meric",
1691
- "Ġ ra",
1692
- "Ġs ome",
1693
- "Ġc o",
1694
- "Ġl ar",
1695
- "Ġb u",
1696
- "Ġde f",
1697
- "b um",
1698
- "Ġa c",
1699
- "Ġm us",
1700
- "Ġf ollow",
1701
- "ĠA t",
1702
- "in s",
1703
- "iv ed",
1704
- "if ic",
1705
- "u al",
1706
- "Ġa m",
1707
- "Ġsu ch",
1708
- "Ġsec ond",
1709
- "i ke",
1710
- "Ġf our",
1711
- "Ġin d",
1712
- "an n",
1713
- "he n",
1714
- "Ġus ed",
1715
- "ĠR e",
1716
- "ic s",
1717
- "le ct",
1718
- "Ġd ay",
1719
- "i el",
1720
- "il y",
1721
- "ĠTh is",
1722
- "Ġ 0",
1723
- "Ġp ubl",
1724
- "Ġc all",
1725
- "ĠJ o",
1726
- "l l",
1727
- "Ġal bum",
1728
- "Ġ00 0",
1729
- "ran s",
1730
- "Ġd o",
1731
- "an y",
1732
- "Ġbe fore",
1733
- "ro s",
1734
- "ĠS h",
1735
- "Ġs y",
1736
- "a id",
1737
- "ĠEn g",
1738
- "Ġbe ing",
1739
- "Ġ1 0",
1740
- "u c",
1741
- "Ġe p",
1742
- "Ġsu pp",
1743
- "Ġthe re",
1744
- "Ġyear s",
1745
- "ar s",
1746
- "ow ever",
1747
- "Ġ ent",
1748
- "if e",
1749
- "Ġh igh",
1750
- "Ġf ound",
1751
- "ir d",
1752
- "Ġn o",
1753
- "Ġs et",
1754
- "in es",
1755
- "iv er",
1756
- "i o",
1757
- "ot her",
1758
- "j ect",
1759
- "Ġs ur",
1760
- "a j",
1761
- "t en",
1762
- "Ġt ra",
1763
- "Ġ1 2",
1764
- "is ed",
1765
- "it ies",
1766
- "vel op",
1767
- "Ġb l",
1768
- "al e",
1769
- "Ġser ies",
1770
- "Ġl oc",
1771
- "Ġnum ber",
1772
- "Ġp res",
1773
- "an e",
1774
- "aus e",
1775
- "od e",
1776
- "e k",
1777
- "t on",
1778
- "ĠS c",
1779
- "i er",
1780
- "is e",
1781
- "Ġse ver",
1782
- "in ce",
1783
- "Ġb oth",
1784
- "an k",
1785
- "ro w",
1786
- "ire ct",
1787
- "s on",
1788
- "Ġthe n",
1789
- "ĠB rit",
1790
- "i et",
1791
- "Ġ1 6",
1792
- "Ġep is",
1793
- "Ġinclud ing",
1794
- "it s",
1795
- "ig in",
1796
- "p r",
1797
- "Ġ /",
1798
- "Ġagain st",
1799
- "Ġw ell",
1800
- "Ġbec ame",
1801
- "Ġex p",
1802
- "Ġkn own",
1803
- "Ġt rans",
1804
- "Ġchar ac",
1805
- "ĠâĢ Ķ",
1806
- "r am",
1807
- "Ġb ack",
1808
- "Ġad d",
1809
- "Ġp op",
1810
- "Ġg o",
1811
- "ur ch",
1812
- "Ġdes c",
1813
- "Ġs ing",
1814
- "iel d",
1815
- "Ġper form",
1816
- "ain ed",
1817
- "Ġre ce",
1818
- "id ent",
1819
- "Ġe m",
1820
- "er t",
1821
- "u res",
1822
- "Ġin v",
1823
- "Ġde p",
1824
- "Ġ19 8",
1825
- "a ir",
1826
- "er n",
1827
- "at her",
1828
- "f ul",
1829
- "Ġ Z",
1830
- "Ġm on",
1831
- "Ġman y",
1832
- "Ġm ain",
1833
- "Ġst ud",
1834
- "Ġl ong",
1835
- "in n",
1836
- "th ough",
1837
- "u p",
1838
- "o ol",
1839
- "ĠUn ited",
1840
- "l ed",
1841
- "em ent",
1842
- "Ġ1 5",
1843
- "ow er",
1844
- "ĠJo hn",
1845
- "Ġo p",
1846
- "Ġ1 1",
1847
- "in ed",
1848
- "Ġm et",
1849
- "o ber",
1850
- "le y",
1851
- "Ġ1 7",
1852
- "Ġcent ury",
1853
- "Ġte am",
1854
- "Ġ est",
1855
- "ĠA fter",
1856
- "y l",
1857
- "Ġm in",
1858
- "u ch",
1859
- "ut e",
1860
- "Ġde velop",
1861
- "ĠS he",
1862
- "i am",
1863
- "Ġsh ow",
1864
- "el f",
1865
- "Ġre p",
1866
- "Ġcon c",
1867
- "at ive",
1868
- "Ġc re",
1869
- "over n",
1870
- "a red",
1871
- "Ġ19 4",
1872
- "Ġor igin",
1873
- "Ġs m",
1874
- "iv ers",
1875
- "a z",
1876
- "Ġle ad",
1877
- "Ġsever al",
1878
- "a h",
1879
- "Ġo b",
1880
- "Ġre v",
1881
- "Ġm ill",
1882
- "er m",
1883
- "u ally",
1884
- "o ot",
1885
- "Ġbe gan"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1886
  ]
1887
  }
1888
  }
 
20
  "rstrip": false,
21
  "normalized": true,
22
  "special": true
23
+ },
24
+ {
25
+ "id": 1024,
26
+ "content": "<image>",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
  }
33
  ],
34
  "normalizer": null,
 
99
  "continuing_subword_prefix": "",
100
  "end_of_word_suffix": "",
101
  "fuse_unk": false,
102
+ "byte_fallback": false,
103
+ "ignore_merges": false,
104
  "vocab": {
105
  "<pad>": 0,
106
  "</s>": 1,
 
1128
  "Ġbegan": 1023
1129
  },
1130
  "merges": [
1131
+ [
1132
+ "Ġ",
1133
+ "t"
1134
+ ],
1135
+ [
1136
+ "h",
1137
+ "e"
1138
+ ],
1139
+ [
1140
+ "Ġ",
1141
+ "a"
1142
+ ],
1143
+ [
1144
+ "i",
1145
+ "n"
1146
+ ],
1147
+ [
1148
+ "Ġt",
1149
+ "he"
1150
+ ],
1151
+ [
1152
+ "e",
1153
+ "r"
1154
+ ],
1155
+ [
1156
+ "o",
1157
+ "n"
1158
+ ],
1159
+ [
1160
+ "Ġ",
1161
+ ","
1162
+ ],
1163
+ [
1164
+ "r",
1165
+ "e"
1166
+ ],
1167
+ [
1168
+ "Ġ",
1169
+ "s"
1170
+ ],
1171
+ [
1172
+ "e",
1173
+ "d"
1174
+ ],
1175
+ [
1176
+ "Ġ",
1177
+ "o"
1178
+ ],
1179
+ [
1180
+ "Ġ",
1181
+ "w"
1182
+ ],
1183
+ [
1184
+ "n",
1185
+ "d"
1186
+ ],
1187
+ [
1188
+ "a",
1189
+ "t"
1190
+ ],
1191
+ [
1192
+ "Ġ",
1193
+ "."
1194
+ ],
1195
+ [
1196
+ "o",
1197
+ "r"
1198
+ ],
1199
+ [
1200
+ "i",
1201
+ "t"
1202
+ ],
1203
+ [
1204
+ "Ġ",
1205
+ "c"
1206
+ ],
1207
+ [
1208
+ "e",
1209
+ "n"
1210
+ ],
1211
+ [
1212
+ "Ġ",
1213
+ "f"
1214
+ ],
1215
+ [
1216
+ "i",
1217
+ "s"
1218
+ ],
1219
+ [
1220
+ "e",
1221
+ "s"
1222
+ ],
1223
+ [
1224
+ "a",
1225
+ "r"
1226
+ ],
1227
+ [
1228
+ "Ġo",
1229
+ "f"
1230
+ ],
1231
+ [
1232
+ "Ġ",
1233
+ "b"
1234
+ ],
1235
+ [
1236
+ "a",
1237
+ "n"
1238
+ ],
1239
+ [
1240
+ "Ġ",
1241
+ "in"
1242
+ ],
1243
+ [
1244
+ "a",
1245
+ "l"
1246
+ ],
1247
+ [
1248
+ "in",
1249
+ "g"
1250
+ ],
1251
+ [
1252
+ "Ġ",
1253
+ "p"
1254
+ ],
1255
+ [
1256
+ "Ġa",
1257
+ "nd"
1258
+ ],
1259
+ [
1260
+ "a",
1261
+ "s"
1262
+ ],
1263
+ [
1264
+ "Ġt",
1265
+ "o"
1266
+ ],
1267
+ [
1268
+ "r",
1269
+ "o"
1270
+ ],
1271
+ [
1272
+ "i",
1273
+ "c"
1274
+ ],
1275
+ [
1276
+ "Ġ",
1277
+ "m"
1278
+ ],
1279
+ [
1280
+ "Ġ",
1281
+ "d"
1282
+ ],
1283
+ [
1284
+ "Ġ",
1285
+ "h"
1286
+ ],
1287
+ [
1288
+ "i",
1289
+ "on"
1290
+ ],
1291
+ [
1292
+ "l",
1293
+ "e"
1294
+ ],
1295
+ [
1296
+ "o",
1297
+ "u"
1298
+ ],
1299
+ [
1300
+ "Ġ",
1301
+ "T"
1302
+ ],
1303
+ [
1304
+ "Ġ",
1305
+ "re"
1306
+ ],
1307
+ [
1308
+ "Ġ",
1309
+ "="
1310
+ ],
1311
+ [
1312
+ "Ġ",
1313
+ "\""
1314
+ ],
1315
+ [
1316
+ "Ġ",
1317
+ "A"
1318
+ ],
1319
+ [
1320
+ "Ġ",
1321
+ "S"
1322
+ ],
1323
+ [
1324
+ "en",
1325
+ "t"
1326
+ ],
1327
+ [
1328
+ "i",
1329
+ "l"
1330
+ ],
1331
+ [
1332
+ "Ġt",
1333
+ "h"
1334
+ ],
1335
+ [
1336
+ "Ġ",
1337
+ "1"
1338
+ ],
1339
+ [
1340
+ "s",
1341
+ "t"
1342
+ ],
1343
+ [
1344
+ "Ġ",
1345
+ "C"
1346
+ ],
1347
+ [
1348
+ "e",
1349
+ "l"
1350
+ ],
1351
+ [
1352
+ "o",
1353
+ "m"
1354
+ ],
1355
+ [
1356
+ "Ġ",
1357
+ "l"
1358
+ ],
1359
+ [
1360
+ "a",
1361
+ "m"
1362
+ ],
1363
+ [
1364
+ "Ġ",
1365
+ "Ċ"
1366
+ ],
1367
+ [
1368
+ "Ġ",
1369
+ "e"
1370
+ ],
1371
+ [
1372
+ "Ġ",
1373
+ "n"
1374
+ ],
1375
+ [
1376
+ "Ġ",
1377
+ "@"
1378
+ ],
1379
+ [
1380
+ "a",
1381
+ "d"
1382
+ ],
1383
+ [
1384
+ "a",
1385
+ "c"
1386
+ ],
1387
+ [
1388
+ "Ġw",
1389
+ "as"
1390
+ ],
1391
+ [
1392
+ "Ġ",
1393
+ "M"
1394
+ ],
1395
+ [
1396
+ "u",
1397
+ "r"
1398
+ ],
1399
+ [
1400
+ "ĠT",
1401
+ "he"
1402
+ ],
1403
+ [
1404
+ "e",
1405
+ "c"
1406
+ ],
1407
+ [
1408
+ "Ġ",
1409
+ "on"
1410
+ ],
1411
+ [
1412
+ "l",
1413
+ "y"
1414
+ ],
1415
+ [
1416
+ "Ġ",
1417
+ "B"
1418
+ ],
1419
+ [
1420
+ "Ġ",
1421
+ "I"
1422
+ ],
1423
+ [
1424
+ "Ġ",
1425
+ "g"
1426
+ ],
1427
+ [
1428
+ "Ġ",
1429
+ "'"
1430
+ ],
1431
+ [
1432
+ "e",
1433
+ "t"
1434
+ ],
1435
+ [
1436
+ "o",
1437
+ "l"
1438
+ ],
1439
+ [
1440
+ "i",
1441
+ "d"
1442
+ ],
1443
+ [
1444
+ "i",
1445
+ "v"
1446
+ ],
1447
+ [
1448
+ "i",
1449
+ "m"
1450
+ ],
1451
+ [
1452
+ "Ġf",
1453
+ "or"
1454
+ ],
1455
+ [
1456
+ "i",
1457
+ "r"
1458
+ ],
1459
+ [
1460
+ "-",
1461
+ "@"
1462
+ ],
1463
+ [
1464
+ "Ġ@",
1465
+ "-@"
1466
+ ],
1467
+ [
1468
+ "i",
1469
+ "g"
1470
+ ],
1471
+ [
1472
+ "o",
1473
+ "t"
1474
+ ],
1475
+ [
1476
+ "t",
1477
+ "er"
1478
+ ],
1479
+ [
1480
+ "Ġa",
1481
+ "s"
1482
+ ],
1483
+ [
1484
+ "Ġ",
1485
+ "H"
1486
+ ],
1487
+ [
1488
+ "u",
1489
+ "s"
1490
+ ],
1491
+ [
1492
+ "o",
1493
+ "w"
1494
+ ],
1495
+ [
1496
+ "Ġs",
1497
+ "t"
1498
+ ],
1499
+ [
1500
+ "u",
1501
+ "t"
1502
+ ],
1503
+ [
1504
+ "it",
1505
+ "h"
1506
+ ],
1507
+ [
1508
+ "a",
1509
+ "y"
1510
+ ],
1511
+ [
1512
+ "Ġ",
1513
+ "2"
1514
+ ],
1515
+ [
1516
+ "Ġ",
1517
+ "P"
1518
+ ],
1519
+ [
1520
+ "at",
1521
+ "ion"
1522
+ ],
1523
+ [
1524
+ "v",
1525
+ "er"
1526
+ ],
1527
+ [
1528
+ "Ġb",
1529
+ "e"
1530
+ ],
1531
+ [
1532
+ "he",
1533
+ "r"
1534
+ ],
1535
+ [
1536
+ "Ġth",
1537
+ "at"
1538
+ ],
1539
+ [
1540
+ "Ġw",
1541
+ "ith"
1542
+ ],
1543
+ [
1544
+ "Ġ",
1545
+ "R"
1546
+ ],
1547
+ [
1548
+ "c",
1549
+ "e"
1550
+ ],
1551
+ [
1552
+ "t",
1553
+ "h"
1554
+ ],
1555
+ [
1556
+ "Ġ",
1557
+ "D"
1558
+ ],
1559
+ [
1560
+ "Ġ",
1561
+ "is"
1562
+ ],
1563
+ [
1564
+ "u",
1565
+ "n"
1566
+ ],
1567
+ [
1568
+ "e",
1569
+ "m"
1570
+ ],
1571
+ [
1572
+ "Ġ",
1573
+ "F"
1574
+ ],
1575
+ [
1576
+ "Ġw",
1577
+ "h"
1578
+ ],
1579
+ [
1580
+ "u",
1581
+ "l"
1582
+ ],
1583
+ [
1584
+ "Ġb",
1585
+ "y"
1586
+ ],
1587
+ [
1588
+ "Ġa",
1589
+ "l"
1590
+ ],
1591
+ [
1592
+ "c",
1593
+ "h"
1594
+ ],
1595
+ [
1596
+ "Ġ",
1597
+ ")"
1598
+ ],
1599
+ [
1600
+ "Ġ",
1601
+ "("
1602
+ ],
1603
+ [
1604
+ "Ġ",
1605
+ "W"
1606
+ ],
1607
+ [
1608
+ "Ġc",
1609
+ "on"
1610
+ ],
1611
+ [
1612
+ "r",
1613
+ "a"
1614
+ ],
1615
+ [
1616
+ "Ġ",
1617
+ "G"
1618
+ ],
1619
+ [
1620
+ "o",
1621
+ "s"
1622
+ ],
1623
+ [
1624
+ "Ġ",
1625
+ "L"
1626
+ ],
1627
+ [
1628
+ "Ġ",
1629
+ "N"
1630
+ ],
1631
+ [
1632
+ "Ġa",
1633
+ "t"
1634
+ ],
1635
+ [
1636
+ "er",
1637
+ "s"
1638
+ ],
1639
+ [
1640
+ "c",
1641
+ "t"
1642
+ ],
1643
+ [
1644
+ "Ġ",
1645
+ "it"
1646
+ ],
1647
+ [
1648
+ "Ġ1",
1649
+ "9"
1650
+ ],
1651
+ [
1652
+ "ro",
1653
+ "m"
1654
+ ],
1655
+ [
1656
+ "a",
1657
+ "nd"
1658
+ ],
1659
+ [
1660
+ "Ġa",
1661
+ "n"
1662
+ ],
1663
+ [
1664
+ "u",
1665
+ "m"
1666
+ ],
1667
+ [
1668
+ "es",
1669
+ "t"
1670
+ ],
1671
+ [
1672
+ "Ġ",
1673
+ "J"
1674
+ ],
1675
+ [
1676
+ "a",
1677
+ "g"
1678
+ ],
1679
+ [
1680
+ "Ġ",
1681
+ "he"
1682
+ ],
1683
+ [
1684
+ "0",
1685
+ "0"
1686
+ ],
1687
+ [
1688
+ "is",
1689
+ "t"
1690
+ ],
1691
+ [
1692
+ "a",
1693
+ "in"
1694
+ ],
1695
+ [
1696
+ "o",
1697
+ "d"
1698
+ ],
1699
+ [
1700
+ "a",
1701
+ "v"
1702
+ ],
1703
+ [
1704
+ "r",
1705
+ "i"
1706
+ ],
1707
+ [
1708
+ "Ġ",
1709
+ "E"
1710
+ ],
1711
+ [
1712
+ "Ġ",
1713
+ "O"
1714
+ ],
1715
+ [
1716
+ "Ġf",
1717
+ "rom"
1718
+ ],
1719
+ [
1720
+ "Ġc",
1721
+ "om"
1722
+ ],
1723
+ [
1724
+ "Ġh",
1725
+ "is"
1726
+ ],
1727
+ [
1728
+ "o",
1729
+ "p"
1730
+ ],
1731
+ [
1732
+ "Ġp",
1733
+ "ro"
1734
+ ],
1735
+ [
1736
+ "re",
1737
+ "s"
1738
+ ],
1739
+ [
1740
+ "i",
1741
+ "es"
1742
+ ],
1743
+ [
1744
+ "i",
1745
+ "f"
1746
+ ],
1747
+ [
1748
+ "Ġ",
1749
+ "v"
1750
+ ],
1751
+ [
1752
+ "or",
1753
+ "t"
1754
+ ],
1755
+ [
1756
+ "er",
1757
+ "e"
1758
+ ],
1759
+ [
1760
+ "il",
1761
+ "l"
1762
+ ],
1763
+ [
1764
+ "l",
1765
+ "d"
1766
+ ],
1767
+ [
1768
+ "Ġd",
1769
+ "e"
1770
+ ],
1771
+ [
1772
+ "p",
1773
+ "p"
1774
+ ],
1775
+ [
1776
+ "Ġs",
1777
+ "u"
1778
+ ],
1779
+ [
1780
+ "o",
1781
+ "re"
1782
+ ],
1783
+ [
1784
+ "ĠI",
1785
+ "n"
1786
+ ],
1787
+ [
1788
+ "Ġ",
1789
+ "r"
1790
+ ],
1791
+ [
1792
+ "Ġs",
1793
+ "e"
1794
+ ],
1795
+ [
1796
+ "Ġw",
1797
+ "ere"
1798
+ ],
1799
+ [
1800
+ "e",
1801
+ "w"
1802
+ ],
1803
+ [
1804
+ "on",
1805
+ "g"
1806
+ ],
1807
+ [
1808
+ "ig",
1809
+ "h"
1810
+ ],
1811
+ [
1812
+ "ar",
1813
+ "d"
1814
+ ],
1815
+ [
1816
+ "at",
1817
+ "e"
1818
+ ],
1819
+ [
1820
+ "al",
1821
+ "l"
1822
+ ],
1823
+ [
1824
+ "ar",
1825
+ "t"
1826
+ ],
1827
+ [
1828
+ "a",
1829
+ "k"
1830
+ ],
1831
+ [
1832
+ "ic",
1833
+ "h"
1834
+ ],
1835
+ [
1836
+ "Ġc",
1837
+ "h"
1838
+ ],
1839
+ [
1840
+ "Ġo",
1841
+ "r"
1842
+ ],
1843
+ [
1844
+ "a",
1845
+ "b"
1846
+ ],
1847
+ [
1848
+ "an",
1849
+ "t"
1850
+ ],
1851
+ [
1852
+ "u",
1853
+ "d"
1854
+ ],
1855
+ [
1856
+ "o",
1857
+ "c"
1858
+ ],
1859
+ [
1860
+ "b",
1861
+ "er"
1862
+ ],
1863
+ [
1864
+ "Ġe",
1865
+ "x"
1866
+ ],
1867
+ [
1868
+ "g",
1869
+ "h"
1870
+ ],
1871
+ [
1872
+ "it",
1873
+ "y"
1874
+ ],
1875
+ [
1876
+ "at",
1877
+ "ed"
1878
+ ],
1879
+ [
1880
+ "p",
1881
+ "t"
1882
+ ],
1883
+ [
1884
+ "es",
1885
+ "s"
1886
+ ],
1887
+ [
1888
+ "e",
1889
+ "ar"
1890
+ ],
1891
+ [
1892
+ "Ġ",
1893
+ "K"
1894
+ ],
1895
+ [
1896
+ "Ġp",
1897
+ "l"
1898
+ ],
1899
+ [
1900
+ "am",
1901
+ "e"
1902
+ ],
1903
+ [
1904
+ "q",
1905
+ "u"
1906
+ ],
1907
+ [
1908
+ "iv",
1909
+ "e"
1910
+ ],
1911
+ [
1912
+ "ro",
1913
+ "u"
1914
+ ],
1915
+ [
1916
+ "Ġa",
1917
+ "re"
1918
+ ],
1919
+ [
1920
+ "Ġ",
1921
+ "â"
1922
+ ],
1923
+ [
1924
+ "Ġs",
1925
+ "h"
1926
+ ],
1927
+ [
1928
+ "Ġ",
1929
+ "k"
1930
+ ],
1931
+ [
1932
+ "ac",
1933
+ "k"
1934
+ ],
1935
+ [
1936
+ "ec",
1937
+ "t"
1938
+ ],
1939
+ [
1940
+ "Ġâ",
1941
+ "Ģ"
1942
+ ],
1943
+ [
1944
+ "Ġ",
1945
+ "U"
1946
+ ],
1947
+ [
1948
+ "Ġh",
1949
+ "ad"
1950
+ ],
1951
+ [
1952
+ "s",
1953
+ "e"
1954
+ ],
1955
+ [
1956
+ "Ġwh",
1957
+ "ich"
1958
+ ],
1959
+ [
1960
+ "re",
1961
+ "d"
1962
+ ],
1963
+ [
1964
+ "o",
1965
+ "v"
1966
+ ],
1967
+ [
1968
+ "ĠS",
1969
+ "t"
1970
+ ],
1971
+ [
1972
+ "as",
1973
+ "t"
1974
+ ],
1975
+ [
1976
+ "Ġs",
1977
+ "p"
1978
+ ],
1979
+ [
1980
+ "i",
1981
+ "an"
1982
+ ],
1983
+ [
1984
+ "Ġ",
1985
+ "y"
1986
+ ],
1987
+ [
1988
+ "m",
1989
+ "ent"
1990
+ ],
1991
+ [
1992
+ "Ġ",
1993
+ "le"
1994
+ ],
1995
+ [
1996
+ "Ġn",
1997
+ "ot"
1998
+ ],
1999
+ [
2000
+ "g",
2001
+ "e"
2002
+ ],
2003
+ [
2004
+ "or",
2005
+ "d"
2006
+ ],
2007
+ [
2008
+ "r",
2009
+ "it"
2010
+ ],
2011
+ [
2012
+ "i",
2013
+ "p"
2014
+ ],
2015
+ [
2016
+ "in",
2017
+ "e"
2018
+ ],
2019
+ [
2020
+ "el",
2021
+ "l"
2022
+ ],
2023
+ [
2024
+ "al",
2025
+ "ly"
2026
+ ],
2027
+ [
2028
+ "ou",
2029
+ "r"
2030
+ ],
2031
+ [
2032
+ "o",
2033
+ "st"
2034
+ ],
2035
+ [
2036
+ "igh",
2037
+ "t"
2038
+ ],
2039
+ [
2040
+ "t",
2041
+ "her"
2042
+ ],
2043
+ [
2044
+ "a",
2045
+ "p"
2046
+ ],
2047
+ [
2048
+ "Ġ",
2049
+ "u"
2050
+ ],
2051
+ [
2052
+ "is",
2053
+ "h"
2054
+ ],
2055
+ [
2056
+ "ĠC",
2057
+ "h"
2058
+ ],
2059
+ [
2060
+ "ou",
2061
+ "n"
2062
+ ],
2063
+ [
2064
+ "i",
2065
+ "a"
2066
+ ],
2067
+ [
2068
+ "Ġ",
2069
+ "3"
2070
+ ],
2071
+ [
2072
+ "av",
2073
+ "e"
2074
+ ],
2075
+ [
2076
+ "ar",
2077
+ "y"
2078
+ ],
2079
+ [
2080
+ "u",
2081
+ "st"
2082
+ ],
2083
+ [
2084
+ "o",
2085
+ "g"
2086
+ ],
2087
+ [
2088
+ "Ġ2",
2089
+ "00"
2090
+ ],
2091
+ [
2092
+ "Ġ",
2093
+ "un"
2094
+ ],
2095
+ [
2096
+ "ou",
2097
+ "s"
2098
+ ],
2099
+ [
2100
+ "ir",
2101
+ "st"
2102
+ ],
2103
+ [
2104
+ "Ġ",
2105
+ "V"
2106
+ ],
2107
+ [
2108
+ "c",
2109
+ "c"
2110
+ ],
2111
+ [
2112
+ "Ġin",
2113
+ "c"
2114
+ ],
2115
+ [
2116
+ "Ġ",
2117
+ ";"
2118
+ ],
2119
+ [
2120
+ "Ġcom",
2121
+ "p"
2122
+ ],
2123
+ [
2124
+ "r",
2125
+ "u"
2126
+ ],
2127
+ [
2128
+ "ion",
2129
+ "s"
2130
+ ],
2131
+ [
2132
+ "Ġthe",
2133
+ "ir"
2134
+ ],
2135
+ [
2136
+ "Ġb",
2137
+ "ut"
2138
+ ],
2139
+ [
2140
+ "id",
2141
+ "e"
2142
+ ],
2143
+ [
2144
+ "u",
2145
+ "re"
2146
+ ],
2147
+ [
2148
+ "s",
2149
+ "o"
2150
+ ],
2151
+ [
2152
+ "Ġcon",
2153
+ "t"
2154
+ ],
2155
+ [
2156
+ "Ġin",
2157
+ "t"
2158
+ ],
2159
+ [
2160
+ "f",
2161
+ "ter"
2162
+ ],
2163
+ [
2164
+ "ic",
2165
+ "al"
2166
+ ],
2167
+ [
2168
+ "i",
2169
+ "al"
2170
+ ],
2171
+ [
2172
+ "Ġa",
2173
+ "r"
2174
+ ],
2175
+ [
2176
+ "Ġf",
2177
+ "irst"
2178
+ ],
2179
+ [
2180
+ "ou",
2181
+ "ld"
2182
+ ],
2183
+ [
2184
+ "Ġit",
2185
+ "s"
2186
+ ],
2187
+ [
2188
+ "he",
2189
+ "d"
2190
+ ],
2191
+ [
2192
+ "ĠâĢ",
2193
+ "ĵ"
2194
+ ],
2195
+ [
2196
+ "Ġw",
2197
+ "he"
2198
+ ],
2199
+ [
2200
+ "w",
2201
+ "o"
2202
+ ],
2203
+ [
2204
+ "ou",
2205
+ "t"
2206
+ ],
2207
+ [
2208
+ "u",
2209
+ "b"
2210
+ ],
2211
+ [
2212
+ "Ġ2",
2213
+ "0"
2214
+ ],
2215
+ [
2216
+ "f",
2217
+ "f"
2218
+ ],
2219
+ [
2220
+ "Ġ",
2221
+ ":"
2222
+ ],
2223
+ [
2224
+ "u",
2225
+ "e"
2226
+ ],
2227
+ [
2228
+ "Ġ",
2229
+ "her"
2230
+ ],
2231
+ [
2232
+ "ow",
2233
+ "n"
2234
+ ],
2235
+ [
2236
+ "o",
2237
+ "k"
2238
+ ],
2239
+ [
2240
+ "Ġal",
2241
+ "so"
2242
+ ],
2243
+ [
2244
+ "Ġc",
2245
+ "l"
2246
+ ],
2247
+ [
2248
+ "p",
2249
+ "er"
2250
+ ],
2251
+ [
2252
+ "ig",
2253
+ "n"
2254
+ ],
2255
+ [
2256
+ "at",
2257
+ "er"
2258
+ ],
2259
+ [
2260
+ "r",
2261
+ "an"
2262
+ ],
2263
+ [
2264
+ "or",
2265
+ "m"
2266
+ ],
2267
+ [
2268
+ "i",
2269
+ "e"
2270
+ ],
2271
+ [
2272
+ "om",
2273
+ "e"
2274
+ ],
2275
+ [
2276
+ "or",
2277
+ "k"
2278
+ ],
2279
+ [
2280
+ "as",
2281
+ "s"
2282
+ ],
2283
+ [
2284
+ "i",
2285
+ "re"
2286
+ ],
2287
+ [
2288
+ "e",
2289
+ "nd"
2290
+ ],
2291
+ [
2292
+ "Ġre",
2293
+ "s"
2294
+ ],
2295
+ [
2296
+ "Ġa",
2297
+ "b"
2298
+ ],
2299
+ [
2300
+ "Ġa",
2301
+ "d"
2302
+ ],
2303
+ [
2304
+ "Ġ",
2305
+ "us"
2306
+ ],
2307
+ [
2308
+ "r",
2309
+ "y"
2310
+ ],
2311
+ [
2312
+ "Ġre",
2313
+ "c"
2314
+ ],
2315
+ [
2316
+ "Ġh",
2317
+ "ave"
2318
+ ],
2319
+ [
2320
+ "ag",
2321
+ "e"
2322
+ ],
2323
+ [
2324
+ "ĠH",
2325
+ "e"
2326
+ ],
2327
+ [
2328
+ "Ġ",
2329
+ "4"
2330
+ ],
2331
+ [
2332
+ "Ġ",
2333
+ "ro"
2334
+ ],
2335
+ [
2336
+ "m",
2337
+ "er"
2338
+ ],
2339
+ [
2340
+ "Ġon",
2341
+ "e"
2342
+ ],
2343
+ [
2344
+ "on",
2345
+ "d"
2346
+ ],
2347
+ [
2348
+ "l",
2349
+ "ow"
2350
+ ],
2351
+ [
2352
+ "Ġh",
2353
+ "as"
2354
+ ],
2355
+ [
2356
+ "ĠT",
2357
+ "h"
2358
+ ],
2359
+ [
2360
+ "d",
2361
+ "u"
2362
+ ],
2363
+ [
2364
+ "Ġ",
2365
+ "5"
2366
+ ],
2367
+ [
2368
+ "Ġp",
2369
+ "er"
2370
+ ],
2371
+ [
2372
+ "Ġbe",
2373
+ "en"
2374
+ ],
2375
+ [
2376
+ "im",
2377
+ "e"
2378
+ ],
2379
+ [
2380
+ "Ġt",
2381
+ "wo"
2382
+ ],
2383
+ [
2384
+ "en",
2385
+ "ce"
2386
+ ],
2387
+ [
2388
+ "l",
2389
+ "and"
2390
+ ],
2391
+ [
2392
+ "Ġ1",
2393
+ "8"
2394
+ ],
2395
+ [
2396
+ ".",
2397
+ "@"
2398
+ ],
2399
+ [
2400
+ "Ġ@",
2401
+ ".@"
2402
+ ],
2403
+ [
2404
+ "ul",
2405
+ "t"
2406
+ ],
2407
+ [
2408
+ "re",
2409
+ "e"
2410
+ ],
2411
+ [
2412
+ "ou",
2413
+ "gh"
2414
+ ],
2415
+ [
2416
+ "i",
2417
+ "le"
2418
+ ],
2419
+ [
2420
+ "Ġwh",
2421
+ "o"
2422
+ ],
2423
+ [
2424
+ "ĠA",
2425
+ "l"
2426
+ ],
2427
+ [
2428
+ "Ġs",
2429
+ "c"
2430
+ ],
2431
+ [
2432
+ "ur",
2433
+ "ing"
2434
+ ],
2435
+ [
2436
+ "p",
2437
+ "l"
2438
+ ],
2439
+ [
2440
+ "or",
2441
+ "y"
2442
+ ],
2443
+ [
2444
+ "it",
2445
+ "ion"
2446
+ ],
2447
+ [
2448
+ "r",
2449
+ "ic"
2450
+ ],
2451
+ [
2452
+ "ation",
2453
+ "s"
2454
+ ],
2455
+ [
2456
+ "Ġd",
2457
+ "is"
2458
+ ],
2459
+ [
2460
+ "Ġth",
2461
+ "is"
2462
+ ],
2463
+ [
2464
+ "Ġb",
2465
+ "ec"
2466
+ ],
2467
+ [
2468
+ "Ġa",
2469
+ "pp"
2470
+ ],
2471
+ [
2472
+ "i",
2473
+ "z"
2474
+ ],
2475
+ [
2476
+ "ĠI",
2477
+ "t"
2478
+ ],
2479
+ [
2480
+ "a",
2481
+ "re"
2482
+ ],
2483
+ [
2484
+ "ac",
2485
+ "h"
2486
+ ],
2487
+ [
2488
+ "l",
2489
+ "ud"
2490
+ ],
2491
+ [
2492
+ "ad",
2493
+ "e"
2494
+ ],
2495
+ [
2496
+ "Ġpl",
2497
+ "ay"
2498
+ ],
2499
+ [
2500
+ "Ġ",
2501
+ "j"
2502
+ ],
2503
+ [
2504
+ "Ġm",
2505
+ "an"
2506
+ ],
2507
+ [
2508
+ "ac",
2509
+ "t"
2510
+ ],
2511
+ [
2512
+ "el",
2513
+ "y"
2514
+ ],
2515
+ [
2516
+ "Ġp",
2517
+ "art"
2518
+ ],
2519
+ [
2520
+ "Ġd",
2521
+ "es"
2522
+ ],
2523
+ [
2524
+ "Ġa",
2525
+ "g"
2526
+ ],
2527
+ [
2528
+ "Ġthe",
2529
+ "y"
2530
+ ],
2531
+ [
2532
+ "Ġy",
2533
+ "ear"
2534
+ ],
2535
+ [
2536
+ "oun",
2537
+ "t"
2538
+ ],
2539
+ [
2540
+ "Ġ20",
2541
+ "1"
2542
+ ],
2543
+ [
2544
+ "Ġo",
2545
+ "ver"
2546
+ ],
2547
+ [
2548
+ "Ġo",
2549
+ "ther"
2550
+ ],
2551
+ [
2552
+ "ou",
2553
+ "nd"
2554
+ ],
2555
+ [
2556
+ "Ġa",
2557
+ "fter"
2558
+ ],
2559
+ [
2560
+ "i",
2561
+ "b"
2562
+ ],
2563
+ [
2564
+ "o",
2565
+ "ver"
2566
+ ],
2567
+ [
2568
+ "Ġs",
2569
+ "er"
2570
+ ],
2571
+ [
2572
+ "Ġ",
2573
+ "en"
2574
+ ],
2575
+ [
2576
+ "Ġof",
2577
+ "f"
2578
+ ],
2579
+ [
2580
+ "Ġ",
2581
+ "im"
2582
+ ],
2583
+ [
2584
+ "ct",
2585
+ "ion"
2586
+ ],
2587
+ [
2588
+ "Ġ",
2589
+ "Y"
2590
+ ],
2591
+ [
2592
+ "k",
2593
+ "e"
2594
+ ],
2595
+ [
2596
+ "it",
2597
+ "e"
2598
+ ],
2599
+ [
2600
+ ",",
2601
+ "@"
2602
+ ],
2603
+ [
2604
+ "Ġ@",
2605
+ ",@"
2606
+ ],
2607
+ [
2608
+ "t",
2609
+ "e"
2610
+ ],
2611
+ [
2612
+ "ur",
2613
+ "n"
2614
+ ],
2615
+ [
2616
+ "Ġinc",
2617
+ "lud"
2618
+ ],
2619
+ [
2620
+ "res",
2621
+ "s"
2622
+ ],
2623
+ [
2624
+ "an",
2625
+ "ce"
2626
+ ],
2627
+ [
2628
+ "an",
2629
+ "g"
2630
+ ],
2631
+ [
2632
+ "Ġat",
2633
+ "t"
2634
+ ],
2635
+ [
2636
+ "ic",
2637
+ "e"
2638
+ ],
2639
+ [
2640
+ "ac",
2641
+ "e"
2642
+ ],
2643
+ [
2644
+ "ar",
2645
+ "k"
2646
+ ],
2647
+ [
2648
+ "Ġo",
2649
+ "ut"
2650
+ ],
2651
+ [
2652
+ "w",
2653
+ "n"
2654
+ ],
2655
+ [
2656
+ "p",
2657
+ "h"
2658
+ ],
2659
+ [
2660
+ "em",
2661
+ "ber"
2662
+ ],
2663
+ [
2664
+ "Ġp",
2665
+ "re"
2666
+ ],
2667
+ [
2668
+ "Ġu",
2669
+ "p"
2670
+ ],
2671
+ [
2672
+ "en",
2673
+ "s"
2674
+ ],
2675
+ [
2676
+ "m",
2677
+ "an"
2678
+ ],
2679
+ [
2680
+ "Ġe",
2681
+ "v"
2682
+ ],
2683
+ [
2684
+ "Ġt",
2685
+ "ime"
2686
+ ],
2687
+ [
2688
+ "nd",
2689
+ "er"
2690
+ ],
2691
+ [
2692
+ "rou",
2693
+ "gh"
2694
+ ],
2695
+ [
2696
+ "c",
2697
+ "ed"
2698
+ ],
2699
+ [
2700
+ "Ġf",
2701
+ "in"
2702
+ ],
2703
+ [
2704
+ "Ġint",
2705
+ "o"
2706
+ ],
2707
+ [
2708
+ "on",
2709
+ "e"
2710
+ ],
2711
+ [
2712
+ "p",
2713
+ "ort"
2714
+ ],
2715
+ [
2716
+ "rou",
2717
+ "nd"
2718
+ ],
2719
+ [
2720
+ "w",
2721
+ "e"
2722
+ ],
2723
+ [
2724
+ "re",
2725
+ "n"
2726
+ ],
2727
+ [
2728
+ "l",
2729
+ "es"
2730
+ ],
2731
+ [
2732
+ "in",
2733
+ "t"
2734
+ ],
2735
+ [
2736
+ "ĠO",
2737
+ "n"
2738
+ ],
2739
+ [
2740
+ "v",
2741
+ "el"
2742
+ ],
2743
+ [
2744
+ "Ġcom",
2745
+ "m"
2746
+ ],
2747
+ [
2748
+ "Ġs",
2749
+ "he"
2750
+ ],
2751
+ [
2752
+ "as",
2753
+ "on"
2754
+ ],
2755
+ [
2756
+ "am",
2757
+ "p"
2758
+ ],
2759
+ [
2760
+ "Ġt",
2761
+ "e"
2762
+ ],
2763
+ [
2764
+ "Ġw",
2765
+ "ould"
2766
+ ],
2767
+ [
2768
+ "w",
2769
+ "ard"
2770
+ ],
2771
+ [
2772
+ "Ġm",
2773
+ "ore"
2774
+ ],
2775
+ [
2776
+ "Ġ",
2777
+ "6"
2778
+ ],
2779
+ [
2780
+ "i",
2781
+ "ed"
2782
+ ],
2783
+ [
2784
+ "os",
2785
+ "e"
2786
+ ],
2787
+ [
2788
+ "ri",
2789
+ "b"
2790
+ ],
2791
+ [
2792
+ "ĠU",
2793
+ "n"
2794
+ ],
2795
+ [
2796
+ "Ġal",
2797
+ "l"
2798
+ ],
2799
+ [
2800
+ "ing",
2801
+ "s"
2802
+ ],
2803
+ [
2804
+ "ter",
2805
+ "n"
2806
+ ],
2807
+ [
2808
+ "c",
2809
+ "es"
2810
+ ],
2811
+ [
2812
+ "ab",
2813
+ "le"
2814
+ ],
2815
+ [
2816
+ "Ġw",
2817
+ "e"
2818
+ ],
2819
+ [
2820
+ "it",
2821
+ "ed"
2822
+ ],
2823
+ [
2824
+ "e",
2825
+ "ver"
2826
+ ],
2827
+ [
2828
+ "ent",
2829
+ "s"
2830
+ ],
2831
+ [
2832
+ "Ġh",
2833
+ "im"
2834
+ ],
2835
+ [
2836
+ "as",
2837
+ "ed"
2838
+ ],
2839
+ [
2840
+ "or",
2841
+ "s"
2842
+ ],
2843
+ [
2844
+ "o",
2845
+ "y"
2846
+ ],
2847
+ [
2848
+ "o",
2849
+ "od"
2850
+ ],
2851
+ [
2852
+ "Ġc",
2853
+ "ent"
2854
+ ],
2855
+ [
2856
+ "i",
2857
+ "x"
2858
+ ],
2859
+ [
2860
+ "as",
2861
+ "e"
2862
+ ],
2863
+ [
2864
+ "il",
2865
+ "d"
2866
+ ],
2867
+ [
2868
+ "ĠA",
2869
+ "n"
2870
+ ],
2871
+ [
2872
+ "Ġ",
2873
+ "7"
2874
+ ],
2875
+ [
2876
+ "Ġw",
2877
+ "ork"
2878
+ ],
2879
+ [
2880
+ "at",
2881
+ "es"
2882
+ ],
2883
+ [
2884
+ "i",
2885
+ "ous"
2886
+ ],
2887
+ [
2888
+ "at",
2889
+ "h"
2890
+ ],
2891
+ [
2892
+ "Ġp",
2893
+ "o"
2894
+ ],
2895
+ [
2896
+ "ro",
2897
+ "p"
2898
+ ],
2899
+ [
2900
+ "ol",
2901
+ "d"
2902
+ ],
2903
+ [
2904
+ "al",
2905
+ "s"
2906
+ ],
2907
+ [
2908
+ "is",
2909
+ "s"
2910
+ ],
2911
+ [
2912
+ "e",
2913
+ "y"
2914
+ ],
2915
+ [
2916
+ "ic",
2917
+ "t"
2918
+ ],
2919
+ [
2920
+ "Ġf",
2921
+ "e"
2922
+ ],
2923
+ [
2924
+ "Ġthe",
2925
+ "m"
2926
+ ],
2927
+ [
2928
+ "g",
2929
+ "an"
2930
+ ],
2931
+ [
2932
+ "Ġs",
2933
+ "ec"
2934
+ ],
2935
+ [
2936
+ "Ġb",
2937
+ "et"
2938
+ ],
2939
+ [
2940
+ "Ġwhe",
2941
+ "n"
2942
+ ],
2943
+ [
2944
+ "Ġs",
2945
+ "ong"
2946
+ ],
2947
+ [
2948
+ "Ġre",
2949
+ "m"
2950
+ ],
2951
+ [
2952
+ "e",
2953
+ "p"
2954
+ ],
2955
+ [
2956
+ "f",
2957
+ "orm"
2958
+ ],
2959
+ [
2960
+ "a",
2961
+ "il"
2962
+ ],
2963
+ [
2964
+ "f",
2965
+ "er"
2966
+ ],
2967
+ [
2968
+ "Ġe",
2969
+ "ar"
2970
+ ],
2971
+ [
2972
+ "ub",
2973
+ "l"
2974
+ ],
2975
+ [
2976
+ "a",
2977
+ "w"
2978
+ ],
2979
+ [
2980
+ "Ġk",
2981
+ "n"
2982
+ ],
2983
+ [
2984
+ "ak",
2985
+ "e"
2986
+ ],
2987
+ [
2988
+ "a",
2989
+ "us"
2990
+ ],
2991
+ [
2992
+ "Ġm",
2993
+ "ost"
2994
+ ],
2995
+ [
2996
+ "Ġcon",
2997
+ "s"
2998
+ ],
2999
+ [
3000
+ "Ġd",
3001
+ "uring"
3002
+ ],
3003
+ [
3004
+ "ĠA",
3005
+ "s"
3006
+ ],
3007
+ [
3008
+ "or",
3009
+ "th"
3010
+ ],
3011
+ [
3012
+ "Ġn",
3013
+ "ew"
3014
+ ],
3015
+ [
3016
+ "er",
3017
+ "ed"
3018
+ ],
3019
+ [
3020
+ "il",
3021
+ "m"
3022
+ ],
3023
+ [
3024
+ "v",
3025
+ "ed"
3026
+ ],
3027
+ [
3028
+ "at",
3029
+ "t"
3030
+ ],
3031
+ [
3032
+ "Ġon",
3033
+ "ly"
3034
+ ],
3035
+ [
3036
+ "Ġ",
3037
+ "9"
3038
+ ],
3039
+ [
3040
+ "Ġd",
3041
+ "ec"
3042
+ ],
3043
+ [
3044
+ "Ġ",
3045
+ "8"
3046
+ ],
3047
+ [
3048
+ "ic",
3049
+ "k"
3050
+ ],
3051
+ [
3052
+ "Ġg",
3053
+ "ame"
3054
+ ],
3055
+ [
3056
+ "on",
3057
+ "s"
3058
+ ],
3059
+ [
3060
+ "u",
3061
+ "g"
3062
+ ],
3063
+ [
3064
+ "Ġt",
3065
+ "r"
3066
+ ],
3067
+ [
3068
+ "f",
3069
+ "t"
3070
+ ],
3071
+ [
3072
+ "ot",
3073
+ "h"
3074
+ ],
3075
+ [
3076
+ "o",
3077
+ "ok"
3078
+ ],
3079
+ [
3080
+ "ĠM",
3081
+ "ar"
3082
+ ],
3083
+ [
3084
+ "re",
3085
+ "at"
3086
+ ],
3087
+ [
3088
+ "w",
3089
+ "ay"
3090
+ ],
3091
+ [
3092
+ "Ġc",
3093
+ "an"
3094
+ ],
3095
+ [
3096
+ "ol",
3097
+ "low"
3098
+ ],
3099
+ [
3100
+ "ou",
3101
+ "th"
3102
+ ],
3103
+ [
3104
+ "we",
3105
+ "en"
3106
+ ],
3107
+ [
3108
+ "ĠE",
3109
+ "n"
3110
+ ],
3111
+ [
3112
+ "Ġ19",
3113
+ "9"
3114
+ ],
3115
+ [
3116
+ "ter",
3117
+ "s"
3118
+ ],
3119
+ [
3120
+ "Ġre",
3121
+ "l"
3122
+ ],
3123
+ [
3124
+ "in",
3125
+ "d"
3126
+ ],
3127
+ [
3128
+ "Ġab",
3129
+ "out"
3130
+ ],
3131
+ [
3132
+ "Ġse",
3133
+ "ason"
3134
+ ],
3135
+ [
3136
+ "Ġag",
3137
+ "ain"
3138
+ ],
3139
+ [
3140
+ "r",
3141
+ "al"
3142
+ ],
3143
+ [
3144
+ "Ġth",
3145
+ "ree"
3146
+ ],
3147
+ [
3148
+ "ation",
3149
+ "al"
3150
+ ],
3151
+ [
3152
+ "Ġu",
3153
+ "nder"
3154
+ ],
3155
+ [
3156
+ "ul",
3157
+ "ar"
3158
+ ],
3159
+ [
3160
+ "Ġm",
3161
+ "e"
3162
+ ],
3163
+ [
3164
+ "Ġth",
3165
+ "an"
3166
+ ],
3167
+ [
3168
+ "ĠC",
3169
+ "om"
3170
+ ],
3171
+ [
3172
+ "ĠA",
3173
+ "r"
3174
+ ],
3175
+ [
3176
+ "h",
3177
+ "ip"
3178
+ ],
3179
+ [
3180
+ "o",
3181
+ "b"
3182
+ ],
3183
+ [
3184
+ "Ġn",
3185
+ "e"
3186
+ ],
3187
+ [
3188
+ "Ġbet",
3189
+ "ween"
3190
+ ],
3191
+ [
3192
+ "Ġf",
3193
+ "l"
3194
+ ],
3195
+ [
3196
+ "h",
3197
+ "n"
3198
+ ],
3199
+ [
3200
+ "v",
3201
+ "e"
3202
+ ],
3203
+ [
3204
+ "Ġch",
3205
+ "ar"
3206
+ ],
3207
+ [
3208
+ "Ġc",
3209
+ "ol"
3210
+ ],
3211
+ [
3212
+ "Ġrec",
3213
+ "ord"
3214
+ ],
3215
+ [
3216
+ "i",
3217
+ "ew"
3218
+ ],
3219
+ [
3220
+ "r",
3221
+ "on"
3222
+ ],
3223
+ [
3224
+ "f",
3225
+ "ore"
3226
+ ],
3227
+ [
3228
+ "Ġth",
3229
+ "rough"
3230
+ ],
3231
+ [
3232
+ "is",
3233
+ "ion"
3234
+ ],
3235
+ [
3236
+ "or",
3237
+ "n"
3238
+ ],
3239
+ [
3240
+ "Ġ",
3241
+ "00"
3242
+ ],
3243
+ [
3244
+ "oc",
3245
+ "k"
3246
+ ],
3247
+ [
3248
+ "Ġ",
3249
+ "ver"
3250
+ ],
3251
+ [
3252
+ "Ġl",
3253
+ "ater"
3254
+ ],
3255
+ [
3256
+ "Ġn",
3257
+ "um"
3258
+ ],
3259
+ [
3260
+ "Ġe",
3261
+ "nd"
3262
+ ],
3263
+ [
3264
+ "ol",
3265
+ "og"
3266
+ ],
3267
+ [
3268
+ "am",
3269
+ "es"
3270
+ ],
3271
+ [
3272
+ "Ġp",
3273
+ "os"
3274
+ ],
3275
+ [
3276
+ "Ġw",
3277
+ "rit"
3278
+ ],
3279
+ [
3280
+ "Ġpro",
3281
+ "du"
3282
+ ],
3283
+ [
3284
+ "Ġwh",
3285
+ "ile"
3286
+ ],
3287
+ [
3288
+ "Ġa",
3289
+ "ct"
3290
+ ],
3291
+ [
3292
+ "Ġre",
3293
+ "le"
3294
+ ],
3295
+ [
3296
+ "Ġf",
3297
+ "ilm"
3298
+ ],
3299
+ [
3300
+ "is",
3301
+ "hed"
3302
+ ],
3303
+ [
3304
+ "Ġp",
3305
+ "r"
3306
+ ],
3307
+ [
3308
+ "an",
3309
+ "s"
3310
+ ],
3311
+ [
3312
+ "Ġre",
3313
+ "g"
3314
+ ],
3315
+ [
3316
+ "Ġfor",
3317
+ "m"
3318
+ ],
3319
+ [
3320
+ "Ġas",
3321
+ "s"
3322
+ ],
3323
+ [
3324
+ "ĠS",
3325
+ "e"
3326
+ ],
3327
+ [
3328
+ "ur",
3329
+ "y"
3330
+ ],
3331
+ [
3332
+ "t",
3333
+ "ed"
3334
+ ],
3335
+ [
3336
+ "t",
3337
+ "s"
3338
+ ],
3339
+ [
3340
+ "Ġm",
3341
+ "ade"
3342
+ ],
3343
+ [
3344
+ "Ġsu",
3345
+ "b"
3346
+ ],
3347
+ [
3348
+ "Ġp",
3349
+ "e"
3350
+ ],
3351
+ [
3352
+ "Ġs",
3353
+ "o"
3354
+ ],
3355
+ [
3356
+ "or",
3357
+ "ld"
3358
+ ],
3359
+ [
3360
+ "Ġre",
3361
+ "t"
3362
+ ],
3363
+ [
3364
+ "ĠN",
3365
+ "ew"
3366
+ ],
3367
+ [
3368
+ "Ġsp",
3369
+ "ec"
3370
+ ],
3371
+ [
3372
+ "Ġa",
3373
+ "cc"
3374
+ ],
3375
+ [
3376
+ "Ġ",
3377
+ "qu"
3378
+ ],
3379
+ [
3380
+ "Ġwhe",
3381
+ "re"
3382
+ ],
3383
+ [
3384
+ "en",
3385
+ "er"
3386
+ ],
3387
+ [
3388
+ "Ġm",
3389
+ "ov"
3390
+ ],
3391
+ [
3392
+ "he",
3393
+ "s"
3394
+ ],
3395
+ [
3396
+ "mer",
3397
+ "ic"
3398
+ ],
3399
+ [
3400
+ "at",
3401
+ "ing"
3402
+ ],
3403
+ [
3404
+ "Ġin",
3405
+ "ter"
3406
+ ],
3407
+ [
3408
+ "ĠL",
3409
+ "e"
3410
+ ],
3411
+ [
3412
+ "ĠA",
3413
+ "meric"
3414
+ ],
3415
+ [
3416
+ "Ġ",
3417
+ "ra"
3418
+ ],
3419
+ [
3420
+ "Ġs",
3421
+ "ome"
3422
+ ],
3423
+ [
3424
+ "Ġc",
3425
+ "o"
3426
+ ],
3427
+ [
3428
+ "Ġl",
3429
+ "ar"
3430
+ ],
3431
+ [
3432
+ "Ġb",
3433
+ "u"
3434
+ ],
3435
+ [
3436
+ "Ġde",
3437
+ "f"
3438
+ ],
3439
+ [
3440
+ "b",
3441
+ "um"
3442
+ ],
3443
+ [
3444
+ "Ġa",
3445
+ "c"
3446
+ ],
3447
+ [
3448
+ "Ġm",
3449
+ "us"
3450
+ ],
3451
+ [
3452
+ "Ġf",
3453
+ "ollow"
3454
+ ],
3455
+ [
3456
+ "ĠA",
3457
+ "t"
3458
+ ],
3459
+ [
3460
+ "in",
3461
+ "s"
3462
+ ],
3463
+ [
3464
+ "iv",
3465
+ "ed"
3466
+ ],
3467
+ [
3468
+ "if",
3469
+ "ic"
3470
+ ],
3471
+ [
3472
+ "u",
3473
+ "al"
3474
+ ],
3475
+ [
3476
+ "Ġa",
3477
+ "m"
3478
+ ],
3479
+ [
3480
+ "Ġsu",
3481
+ "ch"
3482
+ ],
3483
+ [
3484
+ "Ġsec",
3485
+ "ond"
3486
+ ],
3487
+ [
3488
+ "i",
3489
+ "ke"
3490
+ ],
3491
+ [
3492
+ "Ġf",
3493
+ "our"
3494
+ ],
3495
+ [
3496
+ "Ġin",
3497
+ "d"
3498
+ ],
3499
+ [
3500
+ "an",
3501
+ "n"
3502
+ ],
3503
+ [
3504
+ "he",
3505
+ "n"
3506
+ ],
3507
+ [
3508
+ "Ġus",
3509
+ "ed"
3510
+ ],
3511
+ [
3512
+ "ĠR",
3513
+ "e"
3514
+ ],
3515
+ [
3516
+ "ic",
3517
+ "s"
3518
+ ],
3519
+ [
3520
+ "le",
3521
+ "ct"
3522
+ ],
3523
+ [
3524
+ "Ġd",
3525
+ "ay"
3526
+ ],
3527
+ [
3528
+ "i",
3529
+ "el"
3530
+ ],
3531
+ [
3532
+ "il",
3533
+ "y"
3534
+ ],
3535
+ [
3536
+ "ĠTh",
3537
+ "is"
3538
+ ],
3539
+ [
3540
+ "Ġ",
3541
+ "0"
3542
+ ],
3543
+ [
3544
+ "Ġp",
3545
+ "ubl"
3546
+ ],
3547
+ [
3548
+ "Ġc",
3549
+ "all"
3550
+ ],
3551
+ [
3552
+ "ĠJ",
3553
+ "o"
3554
+ ],
3555
+ [
3556
+ "l",
3557
+ "l"
3558
+ ],
3559
+ [
3560
+ "Ġal",
3561
+ "bum"
3562
+ ],
3563
+ [
3564
+ "Ġ00",
3565
+ "0"
3566
+ ],
3567
+ [
3568
+ "ran",
3569
+ "s"
3570
+ ],
3571
+ [
3572
+ "Ġd",
3573
+ "o"
3574
+ ],
3575
+ [
3576
+ "an",
3577
+ "y"
3578
+ ],
3579
+ [
3580
+ "Ġbe",
3581
+ "fore"
3582
+ ],
3583
+ [
3584
+ "ro",
3585
+ "s"
3586
+ ],
3587
+ [
3588
+ "ĠS",
3589
+ "h"
3590
+ ],
3591
+ [
3592
+ "Ġs",
3593
+ "y"
3594
+ ],
3595
+ [
3596
+ "a",
3597
+ "id"
3598
+ ],
3599
+ [
3600
+ "ĠEn",
3601
+ "g"
3602
+ ],
3603
+ [
3604
+ "Ġbe",
3605
+ "ing"
3606
+ ],
3607
+ [
3608
+ "Ġ1",
3609
+ "0"
3610
+ ],
3611
+ [
3612
+ "u",
3613
+ "c"
3614
+ ],
3615
+ [
3616
+ "Ġe",
3617
+ "p"
3618
+ ],
3619
+ [
3620
+ "Ġsu",
3621
+ "pp"
3622
+ ],
3623
+ [
3624
+ "Ġthe",
3625
+ "re"
3626
+ ],
3627
+ [
3628
+ "Ġyear",
3629
+ "s"
3630
+ ],
3631
+ [
3632
+ "ar",
3633
+ "s"
3634
+ ],
3635
+ [
3636
+ "ow",
3637
+ "ever"
3638
+ ],
3639
+ [
3640
+ "Ġ",
3641
+ "ent"
3642
+ ],
3643
+ [
3644
+ "if",
3645
+ "e"
3646
+ ],
3647
+ [
3648
+ "Ġh",
3649
+ "igh"
3650
+ ],
3651
+ [
3652
+ "Ġf",
3653
+ "ound"
3654
+ ],
3655
+ [
3656
+ "ir",
3657
+ "d"
3658
+ ],
3659
+ [
3660
+ "Ġn",
3661
+ "o"
3662
+ ],
3663
+ [
3664
+ "Ġs",
3665
+ "et"
3666
+ ],
3667
+ [
3668
+ "in",
3669
+ "es"
3670
+ ],
3671
+ [
3672
+ "iv",
3673
+ "er"
3674
+ ],
3675
+ [
3676
+ "i",
3677
+ "o"
3678
+ ],
3679
+ [
3680
+ "ot",
3681
+ "her"
3682
+ ],
3683
+ [
3684
+ "j",
3685
+ "ect"
3686
+ ],
3687
+ [
3688
+ "Ġs",
3689
+ "ur"
3690
+ ],
3691
+ [
3692
+ "a",
3693
+ "j"
3694
+ ],
3695
+ [
3696
+ "t",
3697
+ "en"
3698
+ ],
3699
+ [
3700
+ "Ġt",
3701
+ "ra"
3702
+ ],
3703
+ [
3704
+ "Ġ1",
3705
+ "2"
3706
+ ],
3707
+ [
3708
+ "is",
3709
+ "ed"
3710
+ ],
3711
+ [
3712
+ "it",
3713
+ "ies"
3714
+ ],
3715
+ [
3716
+ "vel",
3717
+ "op"
3718
+ ],
3719
+ [
3720
+ "Ġb",
3721
+ "l"
3722
+ ],
3723
+ [
3724
+ "al",
3725
+ "e"
3726
+ ],
3727
+ [
3728
+ "Ġser",
3729
+ "ies"
3730
+ ],
3731
+ [
3732
+ "Ġl",
3733
+ "oc"
3734
+ ],
3735
+ [
3736
+ "Ġnum",
3737
+ "ber"
3738
+ ],
3739
+ [
3740
+ "Ġp",
3741
+ "res"
3742
+ ],
3743
+ [
3744
+ "an",
3745
+ "e"
3746
+ ],
3747
+ [
3748
+ "aus",
3749
+ "e"
3750
+ ],
3751
+ [
3752
+ "od",
3753
+ "e"
3754
+ ],
3755
+ [
3756
+ "e",
3757
+ "k"
3758
+ ],
3759
+ [
3760
+ "t",
3761
+ "on"
3762
+ ],
3763
+ [
3764
+ "ĠS",
3765
+ "c"
3766
+ ],
3767
+ [
3768
+ "i",
3769
+ "er"
3770
+ ],
3771
+ [
3772
+ "is",
3773
+ "e"
3774
+ ],
3775
+ [
3776
+ "Ġse",
3777
+ "ver"
3778
+ ],
3779
+ [
3780
+ "in",
3781
+ "ce"
3782
+ ],
3783
+ [
3784
+ "Ġb",
3785
+ "oth"
3786
+ ],
3787
+ [
3788
+ "an",
3789
+ "k"
3790
+ ],
3791
+ [
3792
+ "ro",
3793
+ "w"
3794
+ ],
3795
+ [
3796
+ "ire",
3797
+ "ct"
3798
+ ],
3799
+ [
3800
+ "s",
3801
+ "on"
3802
+ ],
3803
+ [
3804
+ "Ġthe",
3805
+ "n"
3806
+ ],
3807
+ [
3808
+ "ĠB",
3809
+ "rit"
3810
+ ],
3811
+ [
3812
+ "i",
3813
+ "et"
3814
+ ],
3815
+ [
3816
+ "Ġ1",
3817
+ "6"
3818
+ ],
3819
+ [
3820
+ "Ġep",
3821
+ "is"
3822
+ ],
3823
+ [
3824
+ "Ġinclud",
3825
+ "ing"
3826
+ ],
3827
+ [
3828
+ "it",
3829
+ "s"
3830
+ ],
3831
+ [
3832
+ "ig",
3833
+ "in"
3834
+ ],
3835
+ [
3836
+ "p",
3837
+ "r"
3838
+ ],
3839
+ [
3840
+ "Ġ",
3841
+ "/"
3842
+ ],
3843
+ [
3844
+ "Ġagain",
3845
+ "st"
3846
+ ],
3847
+ [
3848
+ "Ġw",
3849
+ "ell"
3850
+ ],
3851
+ [
3852
+ "Ġbec",
3853
+ "ame"
3854
+ ],
3855
+ [
3856
+ "Ġex",
3857
+ "p"
3858
+ ],
3859
+ [
3860
+ "Ġkn",
3861
+ "own"
3862
+ ],
3863
+ [
3864
+ "Ġt",
3865
+ "rans"
3866
+ ],
3867
+ [
3868
+ "Ġchar",
3869
+ "ac"
3870
+ ],
3871
+ [
3872
+ "ĠâĢ",
3873
+ "Ķ"
3874
+ ],
3875
+ [
3876
+ "r",
3877
+ "am"
3878
+ ],
3879
+ [
3880
+ "Ġb",
3881
+ "ack"
3882
+ ],
3883
+ [
3884
+ "Ġad",
3885
+ "d"
3886
+ ],
3887
+ [
3888
+ "Ġp",
3889
+ "op"
3890
+ ],
3891
+ [
3892
+ "Ġg",
3893
+ "o"
3894
+ ],
3895
+ [
3896
+ "ur",
3897
+ "ch"
3898
+ ],
3899
+ [
3900
+ "Ġdes",
3901
+ "c"
3902
+ ],
3903
+ [
3904
+ "Ġs",
3905
+ "ing"
3906
+ ],
3907
+ [
3908
+ "iel",
3909
+ "d"
3910
+ ],
3911
+ [
3912
+ "Ġper",
3913
+ "form"
3914
+ ],
3915
+ [
3916
+ "ain",
3917
+ "ed"
3918
+ ],
3919
+ [
3920
+ "Ġre",
3921
+ "ce"
3922
+ ],
3923
+ [
3924
+ "id",
3925
+ "ent"
3926
+ ],
3927
+ [
3928
+ "Ġe",
3929
+ "m"
3930
+ ],
3931
+ [
3932
+ "er",
3933
+ "t"
3934
+ ],
3935
+ [
3936
+ "u",
3937
+ "res"
3938
+ ],
3939
+ [
3940
+ "Ġin",
3941
+ "v"
3942
+ ],
3943
+ [
3944
+ "Ġde",
3945
+ "p"
3946
+ ],
3947
+ [
3948
+ "Ġ19",
3949
+ "8"
3950
+ ],
3951
+ [
3952
+ "a",
3953
+ "ir"
3954
+ ],
3955
+ [
3956
+ "er",
3957
+ "n"
3958
+ ],
3959
+ [
3960
+ "at",
3961
+ "her"
3962
+ ],
3963
+ [
3964
+ "f",
3965
+ "ul"
3966
+ ],
3967
+ [
3968
+ "Ġ",
3969
+ "Z"
3970
+ ],
3971
+ [
3972
+ "Ġm",
3973
+ "on"
3974
+ ],
3975
+ [
3976
+ "Ġman",
3977
+ "y"
3978
+ ],
3979
+ [
3980
+ "Ġm",
3981
+ "ain"
3982
+ ],
3983
+ [
3984
+ "Ġst",
3985
+ "ud"
3986
+ ],
3987
+ [
3988
+ "Ġl",
3989
+ "ong"
3990
+ ],
3991
+ [
3992
+ "in",
3993
+ "n"
3994
+ ],
3995
+ [
3996
+ "th",
3997
+ "ough"
3998
+ ],
3999
+ [
4000
+ "u",
4001
+ "p"
4002
+ ],
4003
+ [
4004
+ "o",
4005
+ "ol"
4006
+ ],
4007
+ [
4008
+ "ĠUn",
4009
+ "ited"
4010
+ ],
4011
+ [
4012
+ "l",
4013
+ "ed"
4014
+ ],
4015
+ [
4016
+ "em",
4017
+ "ent"
4018
+ ],
4019
+ [
4020
+ "Ġ1",
4021
+ "5"
4022
+ ],
4023
+ [
4024
+ "ow",
4025
+ "er"
4026
+ ],
4027
+ [
4028
+ "ĠJo",
4029
+ "hn"
4030
+ ],
4031
+ [
4032
+ "Ġo",
4033
+ "p"
4034
+ ],
4035
+ [
4036
+ "Ġ1",
4037
+ "1"
4038
+ ],
4039
+ [
4040
+ "in",
4041
+ "ed"
4042
+ ],
4043
+ [
4044
+ "Ġm",
4045
+ "et"
4046
+ ],
4047
+ [
4048
+ "o",
4049
+ "ber"
4050
+ ],
4051
+ [
4052
+ "le",
4053
+ "y"
4054
+ ],
4055
+ [
4056
+ "Ġ1",
4057
+ "7"
4058
+ ],
4059
+ [
4060
+ "Ġcent",
4061
+ "ury"
4062
+ ],
4063
+ [
4064
+ "Ġte",
4065
+ "am"
4066
+ ],
4067
+ [
4068
+ "Ġ",
4069
+ "est"
4070
+ ],
4071
+ [
4072
+ "ĠA",
4073
+ "fter"
4074
+ ],
4075
+ [
4076
+ "y",
4077
+ "l"
4078
+ ],
4079
+ [
4080
+ "Ġm",
4081
+ "in"
4082
+ ],
4083
+ [
4084
+ "u",
4085
+ "ch"
4086
+ ],
4087
+ [
4088
+ "ut",
4089
+ "e"
4090
+ ],
4091
+ [
4092
+ "Ġde",
4093
+ "velop"
4094
+ ],
4095
+ [
4096
+ "ĠS",
4097
+ "he"
4098
+ ],
4099
+ [
4100
+ "i",
4101
+ "am"
4102
+ ],
4103
+ [
4104
+ "Ġsh",
4105
+ "ow"
4106
+ ],
4107
+ [
4108
+ "el",
4109
+ "f"
4110
+ ],
4111
+ [
4112
+ "Ġre",
4113
+ "p"
4114
+ ],
4115
+ [
4116
+ "Ġcon",
4117
+ "c"
4118
+ ],
4119
+ [
4120
+ "at",
4121
+ "ive"
4122
+ ],
4123
+ [
4124
+ "Ġc",
4125
+ "re"
4126
+ ],
4127
+ [
4128
+ "over",
4129
+ "n"
4130
+ ],
4131
+ [
4132
+ "a",
4133
+ "red"
4134
+ ],
4135
+ [
4136
+ "Ġ19",
4137
+ "4"
4138
+ ],
4139
+ [
4140
+ "Ġor",
4141
+ "igin"
4142
+ ],
4143
+ [
4144
+ "Ġs",
4145
+ "m"
4146
+ ],
4147
+ [
4148
+ "iv",
4149
+ "ers"
4150
+ ],
4151
+ [
4152
+ "a",
4153
+ "z"
4154
+ ],
4155
+ [
4156
+ "Ġle",
4157
+ "ad"
4158
+ ],
4159
+ [
4160
+ "Ġsever",
4161
+ "al"
4162
+ ],
4163
+ [
4164
+ "a",
4165
+ "h"
4166
+ ],
4167
+ [
4168
+ "Ġo",
4169
+ "b"
4170
+ ],
4171
+ [
4172
+ "Ġre",
4173
+ "v"
4174
+ ],
4175
+ [
4176
+ "Ġm",
4177
+ "ill"
4178
+ ],
4179
+ [
4180
+ "er",
4181
+ "m"
4182
+ ],
4183
+ [
4184
+ "u",
4185
+ "ally"
4186
+ ],
4187
+ [
4188
+ "o",
4189
+ "ot"
4190
+ ],
4191
+ [
4192
+ "Ġbe",
4193
+ "gan"
4194
+ ]
4195
  ]
4196
  }
4197
  }
tokenizer_config.json CHANGED
@@ -1,41 +1,40 @@
1
  {
2
- "add_bos_token": true,
3
  "add_prefix_space": false,
4
- "bos_token": {
5
- "__type": "AddedToken",
6
- "content": "</s>",
7
- "lstrip": false,
8
- "normalized": true,
9
- "rstrip": false,
10
- "single_word": false
11
- },
12
- "eos_token": {
13
- "__type": "AddedToken",
14
- "content": "</s>",
15
- "lstrip": false,
16
- "normalized": true,
17
- "rstrip": false,
18
- "single_word": false
 
 
 
 
 
 
 
 
 
 
19
  },
 
 
 
20
  "errors": "replace",
 
21
  "model_max_length": 1000000000000000019884624838656,
22
- "pad_token": {
23
- "__type": "AddedToken",
24
- "content": "<pad>",
25
- "lstrip": false,
26
- "normalized": true,
27
- "rstrip": false,
28
- "single_word": false
29
- },
30
  "processor_class": "Blip2Processor",
31
- "special_tokens_map_file": null,
32
  "tokenizer_class": "GPT2Tokenizer",
33
- "unk_token": {
34
- "__type": "AddedToken",
35
- "content": "</s>",
36
- "lstrip": false,
37
- "normalized": true,
38
- "rstrip": false,
39
- "single_word": false
40
- }
41
  }
 
1
  {
2
+ "add_bos_token": false,
3
  "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<pad>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "</s>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "1024": {
22
+ "content": "<image>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
  },
30
+ "bos_token": "</s>",
31
+ "clean_up_tokenization_spaces": false,
32
+ "eos_token": "</s>",
33
  "errors": "replace",
34
+ "extra_special_tokens": {},
35
  "model_max_length": 1000000000000000019884624838656,
36
+ "pad_token": "<pad>",
 
 
 
 
 
 
 
37
  "processor_class": "Blip2Processor",
 
38
  "tokenizer_class": "GPT2Tokenizer",
39
+ "unk_token": "</s>"
 
 
 
 
 
 
 
40
  }
vocab.json CHANGED
@@ -1,1026 +1 @@
1
- {
2
- "!": 2,
3
- "\"": 3,
4
- "#": 4,
5
- "$": 5,
6
- "%": 6,
7
- "&": 7,
8
- "'": 8,
9
- "(": 9,
10
- ")": 10,
11
- "*": 11,
12
- "+": 12,
13
- ",": 13,
14
- ",@": 625,
15
- "-": 14,
16
- "-@": 340,
17
- ".": 15,
18
- ".@": 574,
19
- "/": 16,
20
- "0": 17,
21
- "00": 396,
22
- "1": 18,
23
- "2": 19,
24
- "3": 20,
25
- "4": 21,
26
- "5": 22,
27
- "6": 23,
28
- "7": 24,
29
- "8": 25,
30
- "9": 26,
31
- ":": 27,
32
- ";": 28,
33
- "<": 29,
34
- "</s>": 1,
35
- "<pad>": 0,
36
- "=": 30,
37
- ">": 31,
38
- "?": 32,
39
- "@": 33,
40
- "A": 34,
41
- "B": 35,
42
- "C": 36,
43
- "D": 37,
44
- "E": 38,
45
- "F": 39,
46
- "G": 40,
47
- "H": 41,
48
- "I": 42,
49
- "J": 43,
50
- "K": 44,
51
- "L": 45,
52
- "M": 46,
53
- "N": 47,
54
- "O": 48,
55
- "P": 49,
56
- "Q": 50,
57
- "R": 51,
58
- "S": 52,
59
- "T": 53,
60
- "U": 54,
61
- "V": 55,
62
- "W": 56,
63
- "X": 57,
64
- "Y": 58,
65
- "Z": 59,
66
- "[": 60,
67
- "\\": 61,
68
- "]": 62,
69
- "^": 63,
70
- "_": 64,
71
- "`": 65,
72
- "a": 66,
73
- "ab": 436,
74
- "able": 678,
75
- "ac": 321,
76
- "ace": 635,
77
- "ach": 596,
78
- "ack": 458,
79
- "act": 602,
80
- "ad": 320,
81
- "ade": 598,
82
- "ag": 394,
83
- "age": 555,
84
- "ah": 1016,
85
- "aid": 874,
86
- "ail": 715,
87
- "ain": 398,
88
- "ained": 954,
89
- "air": 963,
90
- "aj": 898,
91
- "ak": 432,
92
- "ake": 721,
93
- "al": 286,
94
- "ale": 906,
95
- "all": 430,
96
- "ally": 481,
97
- "als": 701,
98
- "am": 315,
99
- "ame": 450,
100
- "ames": 792,
101
- "amp": 664,
102
- "an": 284,
103
- "ance": 631,
104
- "and": 389,
105
- "ane": 911,
106
- "ang": 632,
107
- "ank": 922,
108
- "ann": 850,
109
- "ans": 802,
110
- "ant": 437,
111
- "any": 869,
112
- "ap": 486,
113
- "ar": 281,
114
- "ard": 428,
115
- "are": 595,
116
- "ared": 1008,
117
- "ark": 636,
118
- "ars": 883,
119
- "art": 431,
120
- "ary": 494,
121
- "as": 290,
122
- "ase": 690,
123
- "ased": 684,
124
- "ason": 663,
125
- "ass": 545,
126
- "ast": 468,
127
- "at": 272,
128
- "ate": 429,
129
- "ated": 444,
130
- "ater": 539,
131
- "ates": 695,
132
- "ath": 697,
133
- "ather": 965,
134
- "ating": 825,
135
- "ation": 355,
136
- "ational": 762,
137
- "ations": 588,
138
- "ative": 1005,
139
- "att": 732,
140
- "aus": 722,
141
- "ause": 912,
142
- "av": 400,
143
- "ave": 493,
144
- "aw": 719,
145
- "ay": 352,
146
- "az": 1013,
147
- "b": 67,
148
- "ber": 440,
149
- "bum": 835,
150
- "c": 68,
151
- "cc": 502,
152
- "ce": 362,
153
- "ced": 649,
154
- "ces": 677,
155
- "ch": 373,
156
- "ct": 385,
157
- "ction": 621,
158
- "d": 69,
159
- "du": 565,
160
- "e": 70,
161
- "ear": 447,
162
- "ec": 326,
163
- "ect": 459,
164
- "ed": 268,
165
- "ek": 914,
166
- "el": 312,
167
- "elf": 1002,
168
- "ell": 480,
169
- "ely": 603,
170
- "em": 367,
171
- "ember": 640,
172
- "ement": 979,
173
- "en": 277,
174
- "ence": 571,
175
- "end": 547,
176
- "ener": 821,
177
- "ens": 643,
178
- "ent": 306,
179
- "ents": 682,
180
- "ep": 713,
181
- "er": 263,
182
- "ere": 414,
183
- "ered": 729,
184
- "erm": 1020,
185
- "ern": 964,
186
- "ers": 384,
187
- "ert": 958,
188
- "es": 280,
189
- "ess": 446,
190
- "est": 392,
191
- "et": 333,
192
- "ever": 681,
193
- "ew": 425,
194
- "ey": 703,
195
- "f": 71,
196
- "fer": 716,
197
- "ff": 529,
198
- "fore": 781,
199
- "form": 714,
200
- "ft": 742,
201
- "fter": 515,
202
- "ful": 966,
203
- "g": 72,
204
- "gan": 707,
205
- "ge": 475,
206
- "gh": 442,
207
- "h": 73,
208
- "he": 259,
209
- "hed": 522,
210
- "hen": 851,
211
- "her": 358,
212
- "hes": 823,
213
- "hip": 769,
214
- "hn": 774,
215
- "i": 74,
216
- "ia": 491,
217
- "ial": 517,
218
- "iam": 1000,
219
- "ian": 470,
220
- "ib": 615,
221
- "ic": 293,
222
- "ical": 516,
223
- "ice": 634,
224
- "ich": 433,
225
- "ick": 737,
226
- "ics": 854,
227
- "ict": 704,
228
- "id": 335,
229
- "ide": 510,
230
- "ident": 956,
231
- "ie": 542,
232
- "ied": 670,
233
- "iel": 857,
234
- "ield": 952,
235
- "ier": 917,
236
- "ies": 410,
237
- "iet": 928,
238
- "iew": 779,
239
- "if": 411,
240
- "ife": 886,
241
- "ific": 842,
242
- "ig": 342,
243
- "igh": 427,
244
- "ight": 484,
245
- "igin": 933,
246
- "ign": 538,
247
- "ike": 847,
248
- "il": 307,
249
- "ild": 691,
250
- "ile": 579,
251
- "ill": 415,
252
- "ilm": 730,
253
- "ily": 858,
254
- "im": 337,
255
- "ime": 569,
256
- "in": 261,
257
- "ince": 920,
258
- "ind": 756,
259
- "ine": 479,
260
- "ined": 985,
261
- "ines": 892,
262
- "ing": 287,
263
- "ings": 675,
264
- "inn": 973,
265
- "ins": 840,
266
- "int": 658,
267
- "io": 894,
268
- "ion": 297,
269
- "ions": 507,
270
- "ious": 696,
271
- "ip": 478,
272
- "ir": 339,
273
- "ird": 889,
274
- "ire": 546,
275
- "irect": 924,
276
- "irst": 500,
277
- "is": 279,
278
- "ise": 918,
279
- "ised": 902,
280
- "ish": 488,
281
- "ished": 800,
282
- "ision": 783,
283
- "iss": 702,
284
- "ist": 397,
285
- "it": 275,
286
- "ite": 624,
287
- "ited": 680,
288
- "ith": 351,
289
- "ities": 903,
290
- "ition": 586,
291
- "its": 932,
292
- "ity": 443,
293
- "iv": 336,
294
- "ive": 452,
295
- "ived": 841,
296
- "iver": 893,
297
- "ivers": 1012,
298
- "ix": 689,
299
- "iz": 593,
300
- "j": 75,
301
- "ject": 896,
302
- "k": 76,
303
- "ke": 623,
304
- "l": 77,
305
- "land": 572,
306
- "ld": 416,
307
- "le": 298,
308
- "lect": 855,
309
- "led": 978,
310
- "les": 657,
311
- "ley": 988,
312
- "ll": 864,
313
- "low": 562,
314
- "lud": 597,
315
- "ly": 328,
316
- "m": 78,
317
- "man": 644,
318
- "ment": 472,
319
- "mer": 559,
320
- "meric": 824,
321
- "n": 79,
322
- "nd": 271,
323
- "nder": 647,
324
- "o": 80,
325
- "ob": 770,
326
- "ober": 987,
327
- "oc": 439,
328
- "ock": 786,
329
- "od": 399,
330
- "ode": 913,
331
- "og": 496,
332
- "ok": 534,
333
- "ol": 334,
334
- "old": 700,
335
- "ollow": 749,
336
- "olog": 791,
337
- "om": 313,
338
- "ome": 543,
339
- "on": 264,
340
- "ond": 561,
341
- "one": 652,
342
- "ong": 426,
343
- "ons": 739,
344
- "ood": 687,
345
- "ook": 744,
346
- "ool": 976,
347
- "oot": 1022,
348
- "op": 407,
349
- "or": 274,
350
- "ord": 476,
351
- "ore": 420,
352
- "ork": 544,
353
- "orld": 814,
354
- "orm": 541,
355
- "orn": 784,
356
- "ors": 685,
357
- "ort": 413,
358
- "orth": 727,
359
- "ory": 585,
360
- "os": 380,
361
- "ose": 671,
362
- "ost": 483,
363
- "ot": 343,
364
- "oth": 743,
365
- "other": 895,
366
- "ou": 299,
367
- "ough": 578,
368
- "ould": 520,
369
- "oun": 490,
370
- "ound": 613,
371
- "ount": 609,
372
- "our": 482,
373
- "ous": 499,
374
- "out": 526,
375
- "outh": 750,
376
- "ov": 466,
377
- "over": 616,
378
- "overn": 1007,
379
- "ow": 348,
380
- "ower": 981,
381
- "owever": 884,
382
- "own": 533,
383
- "oy": 686,
384
- "p": 81,
385
- "per": 537,
386
- "ph": 639,
387
- "pl": 584,
388
- "port": 653,
389
- "pp": 418,
390
- "pr": 934,
391
- "pt": 445,
392
- "q": 82,
393
- "qu": 451,
394
- "r": 83,
395
- "ra": 378,
396
- "ral": 760,
397
- "ram": 944,
398
- "ran": 540,
399
- "rans": 867,
400
- "re": 266,
401
- "reat": 746,
402
- "red": 465,
403
- "ree": 577,
404
- "ren": 656,
405
- "res": 409,
406
- "ress": 630,
407
- "ri": 401,
408
- "rib": 672,
409
- "ric": 587,
410
- "rit": 477,
411
- "ro": 292,
412
- "rom": 388,
413
- "ron": 780,
414
- "rop": 699,
415
- "ros": 871,
416
- "rou": 453,
417
- "rough": 648,
418
- "round": 654,
419
- "row": 923,
420
- "ru": 506,
421
- "ry": 552,
422
- "s": 84,
423
- "se": 463,
424
- "so": 512,
425
- "son": 925,
426
- "st": 310,
427
- "t": 85,
428
- "te": 627,
429
- "ted": 808,
430
- "ten": 899,
431
- "ter": 344,
432
- "tern": 676,
433
- "ters": 754,
434
- "th": 363,
435
- "ther": 485,
436
- "though": 974,
437
- "ton": 915,
438
- "ts": 809,
439
- "u": 86,
440
- "ual": 843,
441
- "ually": 1021,
442
- "ub": 527,
443
- "ubl": 718,
444
- "uc": 878,
445
- "uch": 996,
446
- "ud": 438,
447
- "ue": 531,
448
- "ug": 740,
449
- "ul": 370,
450
- "ular": 764,
451
- "ult": 576,
452
- "um": 391,
453
- "un": 366,
454
- "up": 975,
455
- "ur": 324,
456
- "urch": 949,
457
- "ure": 511,
458
- "ures": 959,
459
- "uring": 583,
460
- "urn": 628,
461
- "ury": 807,
462
- "us": 347,
463
- "ust": 495,
464
- "ut": 350,
465
- "ute": 997,
466
- "v": 87,
467
- "ve": 775,
468
- "ved": 731,
469
- "vel": 660,
470
- "velop": 904,
471
- "ver": 356,
472
- "w": 88,
473
- "ward": 667,
474
- "way": 747,
475
- "we": 655,
476
- "ween": 751,
477
- "wn": 638,
478
- "wo": 525,
479
- "x": 89,
480
- "y": 90,
481
- "yl": 994,
482
- "z": 91,
483
- "{": 92,
484
- "|": 93,
485
- "}": 94,
486
- "~": 95,
487
- "¡": 96,
488
- "¢": 97,
489
- "£": 98,
490
- "¤": 99,
491
- "¥": 100,
492
- "¦": 101,
493
- "§": 102,
494
- "¨": 103,
495
- "©": 104,
496
- "ª": 105,
497
- "«": 106,
498
- "¬": 107,
499
- "®": 108,
500
- "¯": 109,
501
- "°": 110,
502
- "±": 111,
503
- "²": 112,
504
- "³": 113,
505
- "´": 114,
506
- "µ": 115,
507
- "¶": 116,
508
- "·": 117,
509
- "¸": 118,
510
- "¹": 119,
511
- "º": 120,
512
- "»": 121,
513
- "¼": 122,
514
- "½": 123,
515
- "¾": 124,
516
- "¿": 125,
517
- "À": 126,
518
- "Á": 127,
519
- "Â": 128,
520
- "Ã": 129,
521
- "Ä": 130,
522
- "Å": 131,
523
- "Æ": 132,
524
- "Ç": 133,
525
- "È": 134,
526
- "É": 135,
527
- "Ê": 136,
528
- "Ë": 137,
529
- "Ì": 138,
530
- "Í": 139,
531
- "Î": 140,
532
- "Ï": 141,
533
- "Ð": 142,
534
- "Ñ": 143,
535
- "Ò": 144,
536
- "Ó": 145,
537
- "Ô": 146,
538
- "Õ": 147,
539
- "Ö": 148,
540
- "×": 149,
541
- "Ø": 150,
542
- "Ù": 151,
543
- "Ú": 152,
544
- "Û": 153,
545
- "Ü": 154,
546
- "Ý": 155,
547
- "Þ": 156,
548
- "ß": 157,
549
- "à": 158,
550
- "á": 159,
551
- "â": 160,
552
- "ã": 161,
553
- "ä": 162,
554
- "å": 163,
555
- "æ": 164,
556
- "ç": 165,
557
- "è": 166,
558
- "é": 167,
559
- "ê": 168,
560
- "ë": 169,
561
- "ì": 170,
562
- "í": 171,
563
- "î": 172,
564
- "ï": 173,
565
- "ð": 174,
566
- "ñ": 175,
567
- "ò": 176,
568
- "ó": 177,
569
- "ô": 178,
570
- "õ": 179,
571
- "ö": 180,
572
- "÷": 181,
573
- "ø": 182,
574
- "ù": 183,
575
- "ú": 184,
576
- "û": 185,
577
- "ü": 186,
578
- "ý": 187,
579
- "þ": 188,
580
- "ÿ": 189,
581
- "Ā": 190,
582
- "ā": 191,
583
- "Ă": 192,
584
- "ă": 193,
585
- "Ą": 194,
586
- "ą": 195,
587
- "Ć": 196,
588
- "ć": 197,
589
- "Ĉ": 198,
590
- "ĉ": 199,
591
- "Ċ": 200,
592
- "ċ": 201,
593
- "Č": 202,
594
- "č": 203,
595
- "Ď": 204,
596
- "ď": 205,
597
- "Đ": 206,
598
- "đ": 207,
599
- "Ē": 208,
600
- "ē": 209,
601
- "Ĕ": 210,
602
- "ĕ": 211,
603
- "Ė": 212,
604
- "ė": 213,
605
- "Ę": 214,
606
- "ę": 215,
607
- "Ě": 216,
608
- "ě": 217,
609
- "Ĝ": 218,
610
- "ĝ": 219,
611
- "Ğ": 220,
612
- "ğ": 221,
613
- "Ġ": 222,
614
- "Ġ\"": 303,
615
- "Ġ'": 332,
616
- "Ġ(": 375,
617
- "Ġ)": 374,
618
- "Ġ,": 265,
619
- "Ġ.": 273,
620
- "Ġ/": 935,
621
- "Ġ0": 860,
622
- "Ġ00": 785,
623
- "Ġ000": 866,
624
- "Ġ1": 309,
625
- "Ġ10": 877,
626
- "Ġ11": 984,
627
- "Ġ12": 901,
628
- "Ġ15": 980,
629
- "Ġ16": 929,
630
- "Ġ17": 989,
631
- "Ġ18": 573,
632
- "Ġ19": 387,
633
- "Ġ194": 1009,
634
- "Ġ198": 962,
635
- "Ġ199": 753,
636
- "Ġ2": 353,
637
- "Ġ20": 528,
638
- "Ġ200": 497,
639
- "Ġ201": 610,
640
- "Ġ3": 492,
641
- "Ġ4": 557,
642
- "Ġ5": 566,
643
- "Ġ6": 669,
644
- "Ġ7": 693,
645
- "Ġ8": 736,
646
- "Ġ9": 734,
647
- "Ġ:": 530,
648
- "Ġ;": 504,
649
- "Ġ=": 302,
650
- "Ġ@": 319,
651
- "Ġ@,@": 626,
652
- "Ġ@-@": 341,
653
- "Ġ@.@": 575,
654
- "ĠA": 304,
655
- "ĠAfter": 993,
656
- "ĠAl": 581,
657
- "ĠAmeric": 828,
658
- "ĠAn": 692,
659
- "ĠAr": 768,
660
- "ĠAs": 726,
661
- "ĠAt": 839,
662
- "ĠB": 329,
663
- "ĠBrit": 927,
664
- "ĠC": 311,
665
- "ĠCh": 489,
666
- "ĠCom": 767,
667
- "ĠD": 364,
668
- "ĠE": 402,
669
- "ĠEn": 752,
670
- "ĠEng": 875,
671
- "ĠF": 368,
672
- "ĠG": 379,
673
- "ĠH": 346,
674
- "ĠHe": 556,
675
- "ĠI": 330,
676
- "ĠIn": 421,
677
- "ĠIt": 594,
678
- "ĠJ": 393,
679
- "ĠJo": 863,
680
- "ĠJohn": 982,
681
- "ĠK": 448,
682
- "ĠL": 381,
683
- "ĠLe": 827,
684
- "ĠM": 323,
685
- "ĠMar": 745,
686
- "ĠN": 382,
687
- "ĠNew": 816,
688
- "ĠO": 403,
689
- "ĠOn": 659,
690
- "ĠP": 354,
691
- "ĠR": 361,
692
- "ĠRe": 853,
693
- "ĠS": 305,
694
- "ĠSc": 916,
695
- "ĠSe": 806,
696
- "ĠSh": 872,
697
- "ĠShe": 999,
698
- "ĠSt": 467,
699
- "ĠT": 300,
700
- "ĠTh": 564,
701
- "ĠThe": 325,
702
- "ĠThis": 859,
703
- "ĠU": 461,
704
- "ĠUn": 673,
705
- "ĠUnited": 977,
706
- "ĠV": 501,
707
- "ĠW": 376,
708
- "ĠY": 622,
709
- "ĠZ": 967,
710
- "Ġa": 260,
711
- "Ġab": 549,
712
- "Ġabout": 757,
713
- "Ġac": 836,
714
- "Ġacc": 818,
715
- "Ġact": 797,
716
- "Ġad": 550,
717
- "Ġadd": 946,
718
- "Ġafter": 614,
719
- "Ġag": 606,
720
- "Ġagain": 759,
721
- "Ġagainst": 936,
722
- "Ġal": 372,
723
- "Ġalbum": 865,
724
- "Ġall": 674,
725
- "Ġalso": 535,
726
- "Ġam": 844,
727
- "Ġan": 390,
728
- "Ġand": 289,
729
- "Ġapp": 592,
730
- "Ġar": 518,
731
- "Ġare": 454,
732
- "Ġas": 345,
733
- "Ġass": 805,
734
- "Ġat": 383,
735
- "Ġatt": 633,
736
- "Ġb": 283,
737
- "Ġback": 945,
738
- "Ġbe": 357,
739
- "Ġbec": 591,
740
- "Ġbecame": 938,
741
- "Ġbeen": 568,
742
- "Ġbefore": 870,
743
- "Ġbegan": 1023,
744
- "Ġbeing": 876,
745
- "Ġbet": 709,
746
- "Ġbetween": 772,
747
- "Ġbl": 905,
748
- "Ġboth": 921,
749
- "Ġbu": 833,
750
- "Ġbut": 509,
751
- "Ġby": 371,
752
- "Ġc": 276,
753
- "Ġcall": 862,
754
- "Ġcan": 748,
755
- "Ġcent": 688,
756
- "Ġcentury": 990,
757
- "Ġch": 434,
758
- "Ġchar": 776,
759
- "Ġcharac": 942,
760
- "Ġcl": 536,
761
- "Ġco": 831,
762
- "Ġcol": 777,
763
- "Ġcom": 405,
764
- "Ġcomm": 661,
765
- "Ġcomp": 505,
766
- "Ġcon": 377,
767
- "Ġconc": 1004,
768
- "Ġcons": 724,
769
- "Ġcont": 513,
770
- "Ġcre": 1006,
771
- "Ġd": 295,
772
- "Ġday": 856,
773
- "Ġde": 417,
774
- "Ġdec": 735,
775
- "Ġdef": 834,
776
- "Ġdep": 961,
777
- "Ġdes": 605,
778
- "Ġdesc": 950,
779
- "Ġdevelop": 998,
780
- "Ġdis": 589,
781
- "Ġdo": 868,
782
- "Ġduring": 725,
783
- "Ġe": 317,
784
- "Ġear": 717,
785
- "Ġem": 957,
786
- "Ġen": 618,
787
- "Ġend": 790,
788
- "Ġent": 885,
789
- "Ġep": 879,
790
- "Ġepis": 930,
791
- "Ġest": 992,
792
- "Ġev": 645,
793
- "Ġex": 441,
794
- "Ġexp": 939,
795
- "Ġf": 278,
796
- "Ġfe": 705,
797
- "Ġfilm": 799,
798
- "Ġfin": 650,
799
- "Ġfirst": 519,
800
- "Ġfl": 773,
801
- "Ġfollow": 838,
802
- "Ġfor": 338,
803
- "Ġform": 804,
804
- "Ġfound": 888,
805
- "Ġfour": 848,
806
- "Ġfrom": 404,
807
- "Ġg": 331,
808
- "Ġgame": 738,
809
- "Ġgo": 948,
810
- "Ġh": 296,
811
- "Ġhad": 462,
812
- "Ġhas": 563,
813
- "Ġhave": 554,
814
- "Ġhe": 395,
815
- "Ġher": 532,
816
- "Ġhigh": 887,
817
- "Ġhim": 683,
818
- "Ġhis": 406,
819
- "Ġim": 620,
820
- "Ġin": 285,
821
- "Ġinc": 503,
822
- "Ġinclud": 629,
823
- "Ġincluding": 931,
824
- "Ġind": 849,
825
- "Ġint": 514,
826
- "Ġinter": 826,
827
- "Ġinto": 651,
828
- "Ġinv": 960,
829
- "Ġis": 365,
830
- "Ġit": 386,
831
- "Ġits": 521,
832
- "Ġj": 600,
833
- "Ġk": 457,
834
- "Ġkn": 720,
835
- "Ġknown": 940,
836
- "Ġl": 314,
837
- "Ġlar": 832,
838
- "Ġlater": 788,
839
- "Ġle": 473,
840
- "Ġlead": 1014,
841
- "Ġloc": 908,
842
- "Ġlong": 972,
843
- "Ġm": 294,
844
- "Ġmade": 810,
845
- "Ġmain": 970,
846
- "Ġman": 601,
847
- "Ġmany": 969,
848
- "Ġme": 765,
849
- "Ġmet": 986,
850
- "Ġmill": 1019,
851
- "Ġmin": 995,
852
- "Ġmon": 968,
853
- "Ġmore": 668,
854
- "Ġmost": 723,
855
- "Ġmov": 822,
856
- "Ġmus": 837,
857
- "Ġn": 318,
858
- "Ġne": 771,
859
- "Ġnew": 728,
860
- "Ġno": 890,
861
- "Ġnot": 474,
862
- "Ġnum": 789,
863
- "Ġnumber": 909,
864
- "Ġo": 269,
865
- "Ġob": 1017,
866
- "Ġof": 282,
867
- "Ġoff": 619,
868
- "Ġon": 327,
869
- "Ġone": 560,
870
- "Ġonly": 733,
871
- "Ġop": 983,
872
- "Ġor": 435,
873
- "Ġorigin": 1010,
874
- "Ġother": 612,
875
- "Ġout": 637,
876
- "Ġover": 611,
877
- "Ġp": 288,
878
- "Ġpart": 604,
879
- "Ġpe": 812,
880
- "Ġper": 567,
881
- "Ġperform": 953,
882
- "Ġpl": 449,
883
- "Ġplay": 599,
884
- "Ġpo": 698,
885
- "Ġpop": 947,
886
- "Ġpos": 793,
887
- "Ġpr": 801,
888
- "Ġpre": 641,
889
- "Ġpres": 910,
890
- "Ġpro": 408,
891
- "Ġprodu": 795,
892
- "Ġpubl": 861,
893
- "Ġqu": 819,
894
- "Ġr": 422,
895
- "Ġra": 829,
896
- "Ġre": 301,
897
- "Ġrec": 553,
898
- "Ġrece": 955,
899
- "Ġrecord": 778,
900
- "Ġreg": 803,
901
- "Ġrel": 755,
902
- "Ġrele": 798,
903
- "Ġrem": 712,
904
- "Ġrep": 1003,
905
- "Ġres": 548,
906
- "Ġret": 815,
907
- "Ġrev": 1018,
908
- "Ġro": 558,
909
- "Ġs": 267,
910
- "Ġsc": 582,
911
- "Ġse": 423,
912
- "Ġseason": 758,
913
- "Ġsec": 708,
914
- "Ġsecond": 846,
915
- "Ġser": 617,
916
- "Ġseries": 907,
917
- "Ġset": 891,
918
- "Ġsever": 919,
919
- "Ġseveral": 1015,
920
- "Ġsh": 456,
921
- "Ġshe": 662,
922
- "Ġshow": 1001,
923
- "Ġsing": 951,
924
- "Ġsm": 1011,
925
- "Ġso": 813,
926
- "Ġsome": 830,
927
- "Ġsong": 711,
928
- "Ġsp": 469,
929
- "Ġspec": 817,
930
- "Ġst": 349,
931
- "Ġstud": 971,
932
- "Ġsu": 419,
933
- "Ġsub": 811,
934
- "Ġsuch": 845,
935
- "Ġsupp": 880,
936
- "Ġsur": 897,
937
- "Ġsy": 873,
938
- "Ġt": 258,
939
- "Ġte": 665,
940
- "Ġteam": 991,
941
- "Ġth": 308,
942
- "Ġthan": 766,
943
- "Ġthat": 359,
944
- "Ġthe": 262,
945
- "Ġtheir": 508,
946
- "Ġthem": 706,
947
- "Ġthen": 926,
948
- "Ġthere": 881,
949
- "Ġthey": 607,
950
- "Ġthis": 590,
951
- "Ġthree": 761,
952
- "Ġthrough": 782,
953
- "Ġtime": 646,
954
- "Ġto": 291,
955
- "Ġtr": 741,
956
- "Ġtra": 900,
957
- "Ġtrans": 941,
958
- "Ġtwo": 570,
959
- "Ġu": 487,
960
- "Ġun": 498,
961
- "Ġunder": 763,
962
- "Ġup": 642,
963
- "Ġus": 551,
964
- "Ġused": 852,
965
- "Ġv": 412,
966
- "Ġver": 787,
967
- "Ġw": 270,
968
- "Ġwas": 322,
969
- "Ġwe": 679,
970
- "Ġwell": 937,
971
- "Ġwere": 424,
972
- "Ġwh": 369,
973
- "Ġwhe": 524,
974
- "Ġwhen": 710,
975
- "Ġwhere": 820,
976
- "Ġwhich": 464,
977
- "Ġwhile": 796,
978
- "Ġwho": 580,
979
- "Ġwith": 360,
980
- "Ġwork": 694,
981
- "Ġwould": 666,
982
- "Ġwrit": 794,
983
- "Ġy": 471,
984
- "Ġyear": 608,
985
- "Ġyears": 882,
986
- "Ġâ": 455,
987
- "ĠâĢ": 460,
988
- "ĠâĢĵ": 523,
989
- "ĠâĢĶ": 943,
990
- "ĠĊ": 316,
991
- "ġ": 223,
992
- "Ģ": 224,
993
- "ģ": 225,
994
- "Ĥ": 226,
995
- "ĥ": 227,
996
- "Ħ": 228,
997
- "ħ": 229,
998
- "Ĩ": 230,
999
- "ĩ": 231,
1000
- "Ī": 232,
1001
- "ī": 233,
1002
- "Ĭ": 234,
1003
- "ĭ": 235,
1004
- "Į": 236,
1005
- "į": 237,
1006
- "İ": 238,
1007
- "ı": 239,
1008
- "IJ": 240,
1009
- "ij": 241,
1010
- "Ĵ": 242,
1011
- "ĵ": 243,
1012
- "Ķ": 244,
1013
- "ķ": 245,
1014
- "ĸ": 246,
1015
- "Ĺ": 247,
1016
- "ĺ": 248,
1017
- "Ļ": 249,
1018
- "ļ": 250,
1019
- "Ľ": 251,
1020
- "ľ": 252,
1021
- "Ŀ": 253,
1022
- "ŀ": 254,
1023
- "Ł": 255,
1024
- "ł": 256,
1025
- "Ń": 257
1026
- }
 
1
+ {"<pad>":0,"</s>":1,"!":2,"\"":3,"#":4,"$":5,"%":6,"&":7,"'":8,"(":9,")":10,"*":11,"+":12,",":13,"-":14,".":15,"/":16,"0":17,"1":18,"2":19,"3":20,"4":21,"5":22,"6":23,"7":24,"8":25,"9":26,":":27,";":28,"<":29,"=":30,">":31,"?":32,"@":33,"A":34,"B":35,"C":36,"D":37,"E":38,"F":39,"G":40,"H":41,"I":42,"J":43,"K":44,"L":45,"M":46,"N":47,"O":48,"P":49,"Q":50,"R":51,"S":52,"T":53,"U":54,"V":55,"W":56,"X":57,"Y":58,"Z":59,"[":60,"\\":61,"]":62,"^":63,"_":64,"`":65,"a":66,"b":67,"c":68,"d":69,"e":70,"f":71,"g":72,"h":73,"i":74,"j":75,"k":76,"l":77,"m":78,"n":79,"o":80,"p":81,"q":82,"r":83,"s":84,"t":85,"u":86,"v":87,"w":88,"x":89,"y":90,"z":91,"{":92,"|":93,"}":94,"~":95,"¡":96,"¢":97,"£":98,"¤":99,"¥":100,"¦":101,"§":102,"¨":103,"©":104,"ª":105,"«":106,"¬":107,"®":108,"¯":109,"°":110,"±":111,"²":112,"³":113,"´":114,"µ":115,"¶":116,"·":117,"¸":118,"¹":119,"º":120,"»":121,"¼":122,"½":123,"¾":124,"¿":125,"À":126,"Á":127,"Â":128,"Ã":129,"Ä":130,"Å":131,"Æ":132,"Ç":133,"È":134,"É":135,"Ê":136,"Ë":137,"Ì":138,"Í":139,"Î":140,"Ï":141,"Ð":142,"Ñ":143,"Ò":144,"Ó":145,"Ô":146,"Õ":147,"Ö":148,"×":149,"Ø":150,"Ù":151,"Ú":152,"Û":153,"Ü":154,"Ý":155,"Þ":156,"ß":157,"à":158,"á":159,"â":160,"ã":161,"ä":162,"å":163,"æ":164,"ç":165,"è":166,"é":167,"ê":168,"ë":169,"ì":170,"í":171,"î":172,"ï":173,"ð":174,"ñ":175,"ò":176,"ó":177,"ô":178,"õ":179,"ö":180,"÷":181,"ø":182,"ù":183,"ú":184,"û":185,"ü":186,"ý":187,"þ":188,"ÿ":189,"Ā":190,"ā":191,"Ă":192,"ă":193,"Ą":194,"ą":195,"Ć":196,"ć":197,"Ĉ":198,"ĉ":199,"Ċ":200,"ċ":201,"Č":202,"č":203,"Ď":204,"ď":205,"Đ":206,"đ":207,"Ē":208,"ē":209,"Ĕ":210,"ĕ":211,"Ė":212,"ė":213,"Ę":214,"ę":215,"Ě":216,"ě":217,"Ĝ":218,"ĝ":219,"Ğ":220,"ğ":221,"Ġ":222,"ġ":223,"Ģ":224,"ģ":225,"Ĥ":226,"ĥ":227,"Ħ":228,"ħ":229,"Ĩ":230,"ĩ":231,"Ī":232,"ī":233,"Ĭ":234,"ĭ":235,"Į":236,"į":237,"İ":238,"ı":239,"IJ":240,"ij":241,"Ĵ":242,"ĵ":243,"Ķ":244,"ķ":245,"ĸ":246,"Ĺ":247,"ĺ":248,"Ļ":249,"ļ":250,"Ľ":251,"ľ":252,"Ŀ":253,"ŀ":254,"Ł":255,"ł":256,"Ń":257,"Ġt":258,"he":259,"Ġa":260,"in":261,"Ġthe":262,"er":263,"on":264,"Ġ,":265,"re":266,"Ġs":267,"ed":268,"Ġo":269,"Ġw":270,"nd":271,"at":272,"Ġ.":273,"or":274,"it":275,"Ġc":276,"en":277,"Ġf":278,"is":279,"es":280,"ar":281,"Ġof":282,"Ġb":283,"an":284,"Ġin":285,"al":286,"ing":287,"Ġp":288,"Ġand":289,"as":290,"Ġto":291,"ro":292,"ic":293,"Ġm":294,"Ġd":295,"Ġh":296,"ion":297,"le":298,"ou":299,"ĠT":300,"Ġre":301,"Ġ=":302,"Ġ\"":303,"ĠA":304,"ĠS":305,"ent":306,"il":307,"Ġth":308,"Ġ1":309,"st":310,"ĠC":311,"el":312,"om":313,"Ġl":314,"am":315,"ĠĊ":316,"Ġe":317,"Ġn":318,"Ġ@":319,"ad":320,"ac":321,"Ġwas":322,"ĠM":323,"ur":324,"ĠThe":325,"ec":326,"Ġon":327,"ly":328,"ĠB":329,"ĠI":330,"Ġg":331,"Ġ'":332,"et":333,"ol":334,"id":335,"iv":336,"im":337,"Ġfor":338,"ir":339,"-@":340,"Ġ@-@":341,"ig":342,"ot":343,"ter":344,"Ġas":345,"ĠH":346,"us":347,"ow":348,"Ġst":349,"ut":350,"ith":351,"ay":352,"Ġ2":353,"ĠP":354,"ation":355,"ver":356,"Ġbe":357,"her":358,"Ġthat":359,"Ġwith":360,"ĠR":361,"ce":362,"th":363,"ĠD":364,"Ġis":365,"un":366,"em":367,"ĠF":368,"Ġwh":369,"ul":370,"Ġby":371,"Ġal":372,"ch":373,"Ġ)":374,"Ġ(":375,"ĠW":376,"Ġcon":377,"ra":378,"ĠG":379,"os":380,"ĠL":381,"ĠN":382,"Ġat":383,"ers":384,"ct":385,"Ġit":386,"Ġ19":387,"rom":388,"and":389,"Ġan":390,"um":391,"est":392,"ĠJ":393,"ag":394,"Ġhe":395,"00":396,"ist":397,"ain":398,"od":399,"av":400,"ri":401,"ĠE":402,"ĠO":403,"Ġfrom":404,"Ġcom":405,"Ġhis":406,"op":407,"Ġpro":408,"res":409,"ies":410,"if":411,"Ġv":412,"ort":413,"ere":414,"ill":415,"ld":416,"Ġde":417,"pp":418,"Ġsu":419,"ore":420,"ĠIn":421,"Ġr":422,"Ġse":423,"Ġwere":424,"ew":425,"ong":426,"igh":427,"ard":428,"ate":429,"all":430,"art":431,"ak":432,"ich":433,"Ġch":434,"Ġor":435,"ab":436,"ant":437,"ud":438,"oc":439,"ber":440,"Ġex":441,"gh":442,"ity":443,"ated":444,"pt":445,"ess":446,"ear":447,"ĠK":448,"Ġpl":449,"ame":450,"qu":451,"ive":452,"rou":453,"Ġare":454,"Ġâ":455,"Ġsh":456,"Ġk":457,"ack":458,"ect":459,"ĠâĢ":460,"ĠU":461,"Ġhad":462,"se":463,"Ġwhich":464,"red":465,"ov":466,"ĠSt":467,"ast":468,"Ġsp":469,"ian":470,"Ġy":471,"ment":472,"Ġle":473,"Ġnot":474,"ge":475,"ord":476,"rit":477,"ip":478,"ine":479,"ell":480,"ally":481,"our":482,"ost":483,"ight":484,"ther":485,"ap":486,"Ġu":487,"ish":488,"ĠCh":489,"oun":490,"ia":491,"Ġ3":492,"ave":493,"ary":494,"ust":495,"og":496,"Ġ200":497,"Ġun":498,"ous":499,"irst":500,"ĠV":501,"cc":502,"Ġinc":503,"Ġ;":504,"Ġcomp":505,"ru":506,"ions":507,"Ġtheir":508,"Ġbut":509,"ide":510,"ure":511,"so":512,"Ġcont":513,"Ġint":514,"fter":515,"ical":516,"ial":517,"Ġar":518,"Ġfirst":519,"ould":520,"Ġits":521,"hed":522,"ĠâĢĵ":523,"Ġwhe":524,"wo":525,"out":526,"ub":527,"Ġ20":528,"ff":529,"Ġ:":530,"ue":531,"Ġher":532,"own":533,"ok":534,"Ġalso":535,"Ġcl":536,"per":537,"ign":538,"ater":539,"ran":540,"orm":541,"ie":542,"ome":543,"ork":544,"ass":545,"ire":546,"end":547,"Ġres":548,"Ġab":549,"Ġad":550,"Ġus":551,"ry":552,"Ġrec":553,"Ġhave":554,"age":555,"ĠHe":556,"Ġ4":557,"Ġro":558,"mer":559,"Ġone":560,"ond":561,"low":562,"Ġhas":563,"ĠTh":564,"du":565,"Ġ5":566,"Ġper":567,"Ġbeen":568,"ime":569,"Ġtwo":570,"ence":571,"land":572,"Ġ18":573,".@":574,"Ġ@.@":575,"ult":576,"ree":577,"ough":578,"ile":579,"Ġwho":580,"ĠAl":581,"Ġsc":582,"uring":583,"pl":584,"ory":585,"ition":586,"ric":587,"ations":588,"Ġdis":589,"Ġthis":590,"Ġbec":591,"Ġapp":592,"iz":593,"ĠIt":594,"are":595,"ach":596,"lud":597,"ade":598,"Ġplay":599,"Ġj":600,"Ġman":601,"act":602,"ely":603,"Ġpart":604,"Ġdes":605,"Ġag":606,"Ġthey":607,"Ġyear":608,"ount":609,"Ġ201":610,"Ġover":611,"Ġother":612,"ound":613,"Ġafter":614,"ib":615,"over":616,"Ġser":617,"Ġen":618,"Ġoff":619,"Ġim":620,"ction":621,"ĠY":622,"ke":623,"ite":624,",@":625,"Ġ@,@":626,"te":627,"urn":628,"Ġinclud":629,"ress":630,"ance":631,"ang":632,"Ġatt":633,"ice":634,"ace":635,"ark":636,"Ġout":637,"wn":638,"ph":639,"ember":640,"Ġpre":641,"Ġup":642,"ens":643,"man":644,"Ġev":645,"Ġtime":646,"nder":647,"rough":648,"ced":649,"Ġfin":650,"Ġinto":651,"one":652,"port":653,"round":654,"we":655,"ren":656,"les":657,"int":658,"ĠOn":659,"vel":660,"Ġcomm":661,"Ġshe":662,"ason":663,"amp":664,"Ġte":665,"Ġwould":666,"ward":667,"Ġmore":668,"Ġ6":669,"ied":670,"ose":671,"rib":672,"ĠUn":673,"Ġall":674,"ings":675,"tern":676,"ces":677,"able":678,"Ġwe":679,"ited":680,"ever":681,"ents":682,"Ġhim":683,"ased":684,"ors":685,"oy":686,"ood":687,"Ġcent":688,"ix":689,"ase":690,"ild":691,"ĠAn":692,"Ġ7":693,"Ġwork":694,"ates":695,"ious":696,"ath":697,"Ġpo":698,"rop":699,"old":700,"als":701,"iss":702,"ey":703,"ict":704,"Ġfe":705,"Ġthem":706,"gan":707,"Ġsec":708,"Ġbet":709,"Ġwhen":710,"Ġsong":711,"Ġrem":712,"ep":713,"form":714,"ail":715,"fer":716,"Ġear":717,"ubl":718,"aw":719,"Ġkn":720,"ake":721,"aus":722,"Ġmost":723,"Ġcons":724,"Ġduring":725,"ĠAs":726,"orth":727,"Ġnew":728,"ered":729,"ilm":730,"ved":731,"att":732,"Ġonly":733,"Ġ9":734,"Ġdec":735,"Ġ8":736,"ick":737,"Ġgame":738,"ons":739,"ug":740,"Ġtr":741,"ft":742,"oth":743,"ook":744,"ĠMar":745,"reat":746,"way":747,"Ġcan":748,"ollow":749,"outh":750,"ween":751,"ĠEn":752,"Ġ199":753,"ters":754,"Ġrel":755,"ind":756,"Ġabout":757,"Ġseason":758,"Ġagain":759,"ral":760,"Ġthree":761,"ational":762,"Ġunder":763,"ular":764,"Ġme":765,"Ġthan":766,"ĠCom":767,"ĠAr":768,"hip":769,"ob":770,"Ġne":771,"Ġbetween":772,"Ġfl":773,"hn":774,"ve":775,"Ġchar":776,"Ġcol":777,"Ġrecord":778,"iew":779,"ron":780,"fore":781,"Ġthrough":782,"ision":783,"orn":784,"Ġ00":785,"ock":786,"Ġver":787,"Ġlater":788,"Ġnum":789,"Ġend":790,"olog":791,"ames":792,"Ġpos":793,"Ġwrit":794,"Ġprodu":795,"Ġwhile":796,"Ġact":797,"Ġrele":798,"Ġfilm":799,"ished":800,"Ġpr":801,"ans":802,"Ġreg":803,"Ġform":804,"Ġass":805,"ĠSe":806,"ury":807,"ted":808,"ts":809,"Ġmade":810,"Ġsub":811,"Ġpe":812,"Ġso":813,"orld":814,"Ġret":815,"ĠNew":816,"Ġspec":817,"Ġacc":818,"Ġqu":819,"Ġwhere":820,"ener":821,"Ġmov":822,"hes":823,"meric":824,"ating":825,"Ġinter":826,"ĠLe":827,"ĠAmeric":828,"Ġra":829,"Ġsome":830,"Ġco":831,"Ġlar":832,"Ġbu":833,"Ġdef":834,"bum":835,"Ġac":836,"Ġmus":837,"Ġfollow":838,"ĠAt":839,"ins":840,"ived":841,"ific":842,"ual":843,"Ġam":844,"Ġsuch":845,"Ġsecond":846,"ike":847,"Ġfour":848,"Ġind":849,"ann":850,"hen":851,"Ġused":852,"ĠRe":853,"ics":854,"lect":855,"Ġday":856,"iel":857,"ily":858,"ĠThis":859,"Ġ0":860,"Ġpubl":861,"Ġcall":862,"ĠJo":863,"ll":864,"Ġalbum":865,"Ġ000":866,"rans":867,"Ġdo":868,"any":869,"Ġbefore":870,"ros":871,"ĠSh":872,"Ġsy":873,"aid":874,"ĠEng":875,"Ġbeing":876,"Ġ10":877,"uc":878,"Ġep":879,"Ġsupp":880,"Ġthere":881,"Ġyears":882,"ars":883,"owever":884,"Ġent":885,"ife":886,"Ġhigh":887,"Ġfound":888,"ird":889,"Ġno":890,"Ġset":891,"ines":892,"iver":893,"io":894,"other":895,"ject":896,"Ġsur":897,"aj":898,"ten":899,"Ġtra":900,"Ġ12":901,"ised":902,"ities":903,"velop":904,"Ġbl":905,"ale":906,"Ġseries":907,"Ġloc":908,"Ġnumber":909,"Ġpres":910,"ane":911,"ause":912,"ode":913,"ek":914,"ton":915,"ĠSc":916,"ier":917,"ise":918,"Ġsever":919,"ince":920,"Ġboth":921,"ank":922,"row":923,"irect":924,"son":925,"Ġthen":926,"ĠBrit":927,"iet":928,"Ġ16":929,"Ġepis":930,"Ġincluding":931,"its":932,"igin":933,"pr":934,"Ġ/":935,"Ġagainst":936,"Ġwell":937,"Ġbecame":938,"Ġexp":939,"Ġknown":940,"Ġtrans":941,"Ġcharac":942,"ĠâĢĶ":943,"ram":944,"Ġback":945,"Ġadd":946,"Ġpop":947,"Ġgo":948,"urch":949,"Ġdesc":950,"Ġsing":951,"ield":952,"Ġperform":953,"ained":954,"Ġrece":955,"ident":956,"Ġem":957,"ert":958,"ures":959,"Ġinv":960,"Ġdep":961,"Ġ198":962,"air":963,"ern":964,"ather":965,"ful":966,"ĠZ":967,"Ġmon":968,"Ġmany":969,"Ġmain":970,"Ġstud":971,"Ġlong":972,"inn":973,"though":974,"up":975,"ool":976,"ĠUnited":977,"led":978,"ement":979,"Ġ15":980,"ower":981,"ĠJohn":982,"Ġop":983,"Ġ11":984,"ined":985,"Ġmet":986,"ober":987,"ley":988,"Ġ17":989,"Ġcentury":990,"Ġteam":991,"Ġest":992,"ĠAfter":993,"yl":994,"Ġmin":995,"uch":996,"ute":997,"Ġdevelop":998,"ĠShe":999,"iam":1000,"Ġshow":1001,"elf":1002,"Ġrep":1003,"Ġconc":1004,"ative":1005,"Ġcre":1006,"overn":1007,"ared":1008,"Ġ194":1009,"Ġorigin":1010,"Ġsm":1011,"ivers":1012,"az":1013,"Ġlead":1014,"Ġseveral":1015,"ah":1016,"Ġob":1017,"Ġrev":1018,"Ġmill":1019,"erm":1020,"ually":1021,"oot":1022,"Ġbegan":1023}