TheRootOf3's picture
Upload folder using huggingface_hub
a4cd388 verified
{
" ": 50959,
" !|": 50684,
" \"\",": 50713,
" \"\".": 50839,
" (,": 50383,
" (;": 50386,
" 1812": 51073,
" 1820": 51111,
" 1832": 51188,
" 1835": 51185,
" 1836": 51173,
" 1837": 51108,
" 1838": 51156,
" 1839": 51190,
" 1841": 51162,
" 1842": 51213,
" 1843": 51166,
" 1844": 51161,
" 1845": 51086,
" 1846": 51089,
" 1847": 51050,
" 1848": 50852,
" 1849": 51049,
" 1851": 50991,
" 1852": 50984,
" 1853": 51023,
" 1854": 50914,
" 1855": 50925,
" 1856": 50972,
" 1857": 50874,
" 1858": 50915,
" 1859": 50868,
" 1864": 50700,
" 1866": 50791,
" 1867": 50773,
" 1868": 50792,
" 1869": 50825,
" 1871": 50742,
" 1872": 50774,
" 1873": 50789,
" 1874": 50787,
" 1875": 50765,
" 1876": 50745,
" 1877": 50788,
" 1878": 50757,
" 1879": 50768,
" 1881": 50722,
" 1882": 50724,
" 1883": 50736,
" 1884": 50707,
" 1885": 50667,
" 1887": 50686,
" 1891": 50666,
" 1892": 50654,
" 1894": 50642,
" 2023": 50971,
" APC": 50997,
" ATTENTION": 50485,
" Abb": 50461,
" Academ": 50331,
" Achie": 51038,
" Addition": 50497,
" Adela": 50694,
" Agre": 51043,
" Agricult": 50597,
" Alab": 50528,
" Albanian": 50884,
" Alber": 50526,
" Albums": 50782,
" Aleks": 51021,
" Amaz": 50672,
" André": 50817,
" Angl": 50645,
" Anglican": 50754,
" Artillery": 50819,
" Atlant": 50387,
" Bagh": 51112,
" Ballet": 51181,
" Banglades": 50530,
" Barcel": 50635,
" Baronet": 50805,
" Batter": 51109,
" Bavaria": 51082,
" Bea": 50987,
" Belf": 50958,
" Belgrade": 50957,
" Bengali": 51257,
" Bey": 50691,
" Biography": 50362,
" Bla": 50476,
" Blo": 50688,
" Bonn": 50938,
" Bruns": 50806,
" Buhari": 50815,
" Bulld": 51157,
" CDP": 50939,
" COVID": 50494,
" Canton": 50869,
" Carne": 51066,
" Categ": 51047,
" Cauc": 51045,
" Cavalry": 50898,
" Ceramby": 50935,
" Cerambycidae": 50943,
" Cinc": 50692,
" Clif": 50940,
" Colle": 50838,
" Commer": 50554,
" Comple": 50661,
" Concer": 50830,
" Congreg": 51115,
" Constitu": 51069,
" Controvers": 51211,
" Copa": 50933,
" Cypr": 50904,
" Czechoslov": 50883,
" Darl": 51127,
" Demographics": 50543,
" Deuts": 51012,
" Diam": 50717,
" Diocese": 50640,
" Dipl": 51042,
" Distinguished": 51029,
" División": 51061,
" Doubles": 51205,
" Dougl": 50515,
" Eis": 50964,
" Eliz": 50413,
" Emp": 50360,
" Epis": 50970,
" Estonian": 51037,
" Etymology": 50983,
" Eurovision": 50947,
" Examp": 50809,
" Excell": 51206,
" Exped": 50835,
" Ferdin": 50855,
" Ferg": 51001,
" Fergus": 51085,
" Founded": 50857,
" Francesco": 51253,
" Fras": 50917,
" Frid": 50562,
" Gaelic": 51104,
" García": 51176,
" Gav": 51141,
" Geography": 50404,
" Geor": 50300,
" Georges": 51216,
" Gloucester": 50896,
" Gmina": 50525,
" Grammar": 51152,
" Gymn": 50965,
" Gó": 51186,
" Harv": 50462,
" Heinrich": 51120,
" Hern": 51046,
" Herzegovina": 50976,
" Histor": 50353,
" Honours": 50665,
" Hous": 50870,
" Independ": 50436,
" Indoor": 50999,
" Ira": 50448,
" Isab": 50818,
" Kann": 50998,
" Karnataka": 51183,
" Kras": 51218,
" Lagos": 50807,
" Lancashire": 50912,
" Legit": 50351,
" Leip": 51140,
" Leipzig": 51197,
" Leop": 50992,
" Lig": 50897,
" Lithuanian": 51160,
" Lut": 51136,
" Luxemb": 50923,
" Lé": 51117,
" MacD": 51222,
" Madh": 51128,
" Malay": 50778,
" Marl": 51135,
" Marri": 50986,
" María": 50934,
" Mater": 51231,
" Merced": 51133,
" Mey": 50708,
" Midd": 50843,
" Mikh": 51123,
" Minn": 50415,
" Moham": 50619,
" Mugabe": 51075,
" Muham": 50547,
" Municipality": 50564,
" Myan": 51039,
" Mā": 51096,
" NAIJ": 51098,
" Nacional": 51058,
" Napole": 50816,
" Natal": 51054,
" Niel": 51094,
" Nigerians": 50876,
" Nol": 51143,
" Northampton": 51254,
" Notable": 50452,
" Oblast": 50551,
" Oly": 50310,
" Outstanding": 50974,
" PDP": 51027,
" Pab": 51122,
" Paralymp": 50659,
" Paralympics": 50856,
" Patric": 51125,
" Phoen": 50689,
" Pitts": 50553,
" Pla": 50350,
" Pomer": 50929,
" Prad": 50578,
" Prefecture": 50932,
" Princip": 50685,
" Profess": 50358,
" Prote": 50569,
" Pseud": 51210,
" Publ": 50643,
" Pé": 50996,
" Railways": 50796,
" Raja": 51248,
" Ral": 50719,
" Rang": 50541,
" Reception": 50477,
" Rever": 51204,
" Rhine": 51072,
" Rif": 50848,
" Rol": 50618,
" Romanized": 50606,
" Rovers": 50891,
" Rup": 51243,
" Sacram": 51114,
" Sain": 50593,
" Santo": 51235,
" Saxony": 51240,
" Seminary": 51100,
" Sene": 51192,
" Serb": 50445,
" Sey": 50812,
" Shakes": 50893,
" Sib": 51132,
" Singles": 50561,
" Slovak": 51163,
" Spen": 50793,
" Stakes": 50837,
" Subsequently": 51084,
" Sustain": 50886,
" Swimming": 51017,
" Synopsis": 51146,
" São": 50718,
" Taxonomy": 51087,
" Teh": 50927,
" Telugu": 51195,
" Theod": 50682,
" Thir": 50752,
" Tod": 50418,
" Tourn": 50471,
" Treas": 50846,
" UCI": 51165,
" Underg": 51238,
" Urugu": 50859,
" Vat": 51113,
" Venez": 50594,
" Verm": 50638,
" Vinc": 50589,
" Voiv": 50460,
" Voivodeship": 50467,
" Warri": 50750,
" Wednes": 50710,
" Wimb": 51228,
" Yank": 50826,
" YouT": 50628,
" aband": 50449,
" abbrev": 50920,
" absol": 50794,
" adap": 50535,
" agricult": 50428,
" airfield": 51024,
" alcoh": 50625,
" alumin": 51053,
" anc": 50363,
" announc": 50963,
" appoin": 50313,
" apprec": 50775,
" apprent": 51011,
" arran": 50397,
" asp": 50427,
" attra": 50488,
" avo": 50785,
" bassist": 50953,
" battalions": 51233,
" becom": 50335,
" bey": 50508,
" bord": 50853,
" bordered": 51150,
" botan": 50941,
" bran": 50573,
" canc": 50613,
" canton": 50956,
" carri": 50479,
" catal": 50693,
" cerem": 50430,
" codice": 51025,
" colla": 50346,
" commune": 50495,
" competes": 51201,
" compla": 50731,
" complet": 50399,
" composers": 51142,
" compris": 50492,
" concer": 50394,
" confron": 50954,
" cong": 51044,
" conne": 50435,
" consid": 50303,
" consort": 51202,
" consp": 50711,
" constitut": 50704,
" constru": 50308,
" controll": 50760,
" cosm": 51007,
" craf": 51116,
" cricketer": 50650,
" danc": 50675,
" dang": 50657,
" datab": 50766,
" daugh": 50325,
" decla": 50405,
" declar": 51000,
" degrad": 51234,
" deliv": 50545,
" demonst": 50559,
" determ": 50365,
" dia": 50457,
" dif": 50287,
" diocese": 50924,
" dipl": 50469,
" disagre": 51081,
" disch": 50763,
" discip": 50663,
" discography": 51083,
" displa": 50568,
" doctorate": 50899,
" draf": 50616,
" earl": 50336,
" eigh": 50762,
" els": 50771,
" emigrated": 51064,
" emp": 50320,
" encoura": 50503,
" encourag": 51078,
" enfor": 50549,
" engra": 51159,
" enj": 50470,
" enl": 50557,
" ere": 50539,
" examp": 50323,
" excell": 50651,
" exchang": 50995,
" exclud": 50744,
" expla": 50406,
" extens": 50626,
" fier": 51230,
" financ": 51209,
" fla": 51068,
" forewings": 51207,
" freestyle": 50770,
" fron": 51026,
" gastropod": 50723,
" genera": 50926,
" geomet": 50740,
" gla": 51052,
" gradu": 50324,
" guar": 50764,
" hamlet": 50908,
" hap": 50453,
" headqu": 50407,
" herit": 50581,
" honours": 50936,
" householder": 50810,
" ille": 50536,
" immigr": 50558,
" impr": 50533,
" inaugurated": 51070,
" inducted": 50761,
" insc": 50592,
" inse": 50674,
" insp": 50379,
" inspe": 51227,
" institut": 50429,
" instru": 50523,
" interp": 50450,
" interse": 50518,
" irreg": 50980,
" journ": 50498,
" judg": 50781,
" libr": 50746,
" locomot": 50519,
" locomotive": 50844,
" locomotives": 50715,
" manor": 51013,
" marri": 51101,
" memb": 50905,
" merg": 50402,
" moll": 50664,
" mollusk": 50981,
" moths": 51252,
" nam": 50799,
" negot": 50501,
" nickn": 50577,
" ninet": 50730,
" nomin": 50355,
" nov": 50327,
" obl": 51015,
" orch": 51178,
" orchest": 50889,
" organiz": 50401,
" pandemic": 50566,
" parishes": 51196,
" partn": 50403,
" passeng": 50410,
" performan": 50322,
" pianist": 50833,
" pilgrim": 51134,
" pla": 50265,
" plann": 50359,
" playwright": 51129,
" poin": 50296,
" portfol": 51147,
" portra": 50811,
" posthum": 51048,
" predecess": 50649,
" premi": 50583,
" produc": 50330,
" promin": 50780,
" prosp": 50772,
" publ": 50271,
" purp": 50389,
" rebu": 50544,
" referend": 50841,
" regiments": 51225,
" reinfor": 50907,
" rejoin": 51137,
" remn": 51124,
" remov": 50373,
" reop": 50850,
" repla": 50314,
" resol": 50827,
" ri": 50534,
" rif": 50636,
" sacrif": 51032,
" samp": 50506,
" schem": 51110,
" schol": 50514,
" screenwriter": 51154,
" sculptor": 50951,
" seg": 50510,
" sele": 50339,
" shrub": 50824,
" sla": 50422,
" snails": 51059,
" soci": 50571,
" songwriter": 50749,
" spok": 50489,
" subd": 50624,
" subfamily": 50767,
" subsidi": 50610,
" subspecies": 50985,
" subtropical": 51092,
" suppor": 50511,
" surre": 50614,
" surrend": 51028,
" swimmer": 51242,
" swit": 50702,
" terminus": 50681,
" theore": 50582,
" thir": 50473,
" tonn": 51194,
" tourn": 50334,
" transp": 50863,
" trav": 50512,
" treas": 51106,
" tribut": 50560,
" tributary": 50648,
" trigg": 51138,
" troubl": 51093,
" underst": 50490,
" unexp": 50955,
" unincorporated": 50605,
" unsuccess": 50588,
" upgrad": 50620,
" urg": 50603,
" variab": 51232,
" vess": 50455,
" villa": 50289,
" vocalist": 50777,
" warri": 51006,
" wick": 50660,
" wickets": 51010,
" wides": 50615,
" wingspan": 50977,
" winn": 50368,
" worksh": 50690,
" Á": 50695,
" Ç": 51107,
" Č": 51005,
" Đ": 51022,
" İ": 51189,
" Ł": 50747,
" Ś": 51091,
" Š": 50669,
" Ž": 51090,
" ‘": 50468,
" “": 50306,
"\"\".": 51182,
",”": 50585,
".\\": 51199,
".”": 50438,
"2021": 51067,
"::::\"": 50487,
"Amer": 50366,
"Billboard": 50507,
"CAA": 50437,
"EAD": 50584,
"Geor": 50822,
"Ital": 51035,
"Jos": 51148,
"Rol": 51220,
"SCO": 51071,
"TENT": 50482,
"TENTION": 50484,
"acional": 50877,
"adio": 50311,
"agre": 50677,
"airman": 50374,
"akk": 50982,
"alymp": 50646,
"ambig": 50580,
"ambiguation": 50595,
"amby": 50911,
"amese": 50705,
"amics": 50714,
"ampionship": 50291,
"ancell": 50641,
"ancellor": 50696,
"andin": 51153,
"anner": 50464,
"anning": 50491,
"anskrit": 51219,
"appe": 50769,
"appeared": 50871,
"aptist": 50598,
"apur": 50804,
"areer": 50276,
"arged": 50709,
"arked": 50697,
"arliam": 50326,
"arliament": 50328,
"arly": 50270,
"arri": 50279,
"arriage": 50378,
"arried": 50301,
"arrier": 50734,
"artered": 50779,
"artet": 50946,
"arting": 50671,
"arv": 50678,
"arvae": 50840,
"asan": 50612,
"ashire": 50894,
"asium": 51014,
"atab": 50662,
"atty": 51105,
"auf": 51198,
"auge": 50721,
"autiful": 50521,
"auty": 50611,
"avalry": 50556,
"avel": 50629,
"avian": 50832,
"avirus": 50990,
"bidae": 51079,
"brahim": 50973,
"brev": 50784,
"canic": 51051,
"ceae": 50994,
"celand": 50658,
"cement": 50392,
"chang": 50609,
"chestra": 50465,
"chez": 51018,
"cidae": 50885,
"cio": 51229,
"clesiast": 51118,
"clesiastical": 51179,
"coh": 50587,
"compass": 50741,
"cted": 50269,
"ctober": 50286,
"ctoral": 50412,
"ctorate": 50910,
"cts": 50480,
"designated": 51217,
"dess": 50872,
"died": 50733,
"diocese": 51239,
"dire": 50821,
"disambiguation": 50599,
"dorf": 50902,
"duce": 50442,
"duced": 50292,
"ducer": 51130,
"duces": 50890,
"ducing": 50555,
"ducted": 50670,
"ductive": 51126,
"eastern": 50845,
"ebru": 50293,
"ebruary": 50294,
"ecess": 50364,
"eck": 50338,
"ecting": 50496,
"ections": 51177,
"ects": 50375,
"ecut": 50312,
"ecution": 50516,
"ecutive": 50333,
"edding": 50579,
"eech": 50505,
"eep": 50340,
"eld": 50451,
"eleb": 50349,
"eler": 50531,
"elic": 51245,
"ellers": 50945,
"ellite": 50563,
"ely": 50267,
"emann": 51255,
"embers": 50295,
"emble": 50567,
"embly": 50337,
"emeter": 50447,
"emor": 50344,
"emorial": 50398,
"empor": 51172,
"eneral": 50309,
"enesis": 50922,
"enheim": 51149,
"enia": 50633,
"ennium": 51099,
"eo": 50656,
"eppe": 51121,
"eptember": 50285,
"erals": 51060,
"eras": 50790,
"erbai": 50572,
"erner": 51164,
"ersey": 50393,
"erted": 50673,
"erts": 51019,
"esota": 50454,
"estion": 51158,
"estock": 51174,
"estrian": 50930,
"ething": 50443,
"etwork": 50321,
"ević": 51215,
"evo": 51033,
"ewhere": 50680,
"ewise": 51214,
"families": 50864,
"ferences": 50411,
"fers": 50630,
"fess": 50281,
"fic": 50342,
"ficult": 50385,
"fielder": 50542,
"footballer": 50540,
"forman": 50316,
"fra": 50966,
"gend": 50423,
"gether": 50329,
"getown": 51237,
"habilit": 50944,
"hanced": 51063,
"hausen": 51236,
"heme": 50600,
"hev": 50829,
"histor": 50952,
"iab": 50499,
"iast": 50786,
"ibn": 50729,
"icine": 50426,
"icul": 51088,
"iec": 50441,
"ieces": 50493,
"ieuten": 50419,
"ieutenant": 50424,
"iffer": 50854,
"ifically": 50483,
"ifican": 50652,
"ificance": 50655,
"iformes": 51057,
"iforn": 50317,
"ifornia": 50318,
"igade": 50444,
"iggest": 50548,
"igital": 50390,
"ignated": 50834,
"ilarly": 50621,
"ilding": 50417,
"illiant": 51244,
"ilm": 50268,
"imensions": 51020,
"inae": 50644,
"inced": 50865,
"incip": 50347,
"includ": 50431,
"incorporated": 50591,
"inded": 51250,
"inental": 50676,
"inist": 50627,
"innati": 50699,
"inned": 51009,
"inum": 50738,
"ipur": 50937,
"iracy": 50836,
"irical": 51175,
"iscount": 51249,
"isión": 50931,
"islav": 51004,
"istory": 50275,
"itively": 51169,
"itro": 50813,
"ivision": 50297,
"ión": 50867,
"ić": 50367,
"ič": 50962,
"jord": 51155,
"kal": 51168,
"lac": 50538,
"lach": 51065,
"lacier": 50748,
"lack": 50298,
"lades": 50517,
"laimed": 50601,
"lair": 50900,
"lant": 50356,
"lave": 51170,
"lax": 50800,
"ledon": 51151,
"legiate": 50882,
"lend": 50596,
"leyball": 50604,
"licated": 50758,
"lications": 50668,
"licit": 50703,
"linary": 51055,
"loy": 50305,
"loyd": 50550,
"lywood": 50472,
"mina": 50486,
"minton": 51180,
"miral": 50602,
"mphony": 50639,
"mpt": 50631,
"nal": 50851,
"ndez": 50906,
"nez": 51031,
"nold": 50575,
"nolds": 51036,
"nowiki": 50753,
"ntario": 50434,
"née": 50687,
"occer": 50432,
"ociety": 50315,
"odeship": 50463,
"odia": 50892,
"oeing": 50901,
"oints": 50913,
"olished": 50509,
"ollowing": 50348,
"omat": 51203,
"omber": 50950,
"ommended": 50433,
"ommission": 51241,
"omot": 50466,
"onald": 50481,
"onlyinclude": 50647,
"onn": 50369,
"onnell": 51200,
"onsin": 50459,
"ootball": 50274,
"oque": 51008,
"ordin": 50475,
"orey": 50895,
"orms": 51171,
"orning": 50425,
"osely": 51256,
"osex": 50802,
"osm": 50565,
"osoph": 50416,
"osopher": 50756,
"osophical": 50975,
"osophy": 50478,
"ospitals": 50726,
"osy": 50820,
"osystem": 51131,
"otan": 50637,
"oucester": 50881,
"ounc": 50299,
"ouncil": 50302,
"ouncill": 50706,
"ouns": 51095,
"ounsel": 50683,
"ounted": 51003,
"ountry": 50921,
"oura": 50408,
"ourag": 50968,
"ourg": 50801,
"overn": 50272,
"overnment": 50278,
"overnor": 50332,
"oviet": 50352,
"ović": 50522,
"ovsk": 51226,
"ová": 51258,
"owever": 50280,
"owo": 50875,
"oyalty": 51191,
"oye": 50653,
"peare": 50888,
"pective": 50622,
"phab": 50847,
"pla": 50860,
"politician": 50988,
"poration": 50400,
"pow": 50823,
"pril": 50288,
"quir": 50727,
"quiry": 50909,
"quis": 51251,
"racks": 50849,
"racy": 50537,
"rague": 50737,
"randed": 50878,
"rapped": 50916,
"rapy": 50617,
"raska": 50679,
"rass": 50720,
"reater": 50395,
"reland": 50343,
"rena": 50590,
"retary": 50354,
"rews": 51223,
"ricket": 50396,
"ricketer": 50546,
"ricts": 50446,
"ricult": 50380,
"riculum": 50861,
"rif": 50421,
"rific": 50948,
"riptions": 50716,
"rizona": 50500,
"roat": 51184,
"roke": 50524,
"rong": 50574,
"ronze": 50439,
"ropical": 50414,
"ropod": 50634,
"roscop": 50979,
"rud": 50989,
"ryst": 50520,
"rystal": 50698,
"rí": 51080,
"seud": 50529,
"shan": 51002,
"ska": 50701,
"songwriter": 50808,
"sor": 50732,
"stance": 50743,
"stances": 50608,
"stit": 50277,
"stitu": 50887,
"stitut": 50361,
"stitute": 50319,
"stitutes": 50949,
"stitution": 50388,
"stitutional": 50725,
"surname": 50969,
"tage": 51016,
"teg": 50371,
"tend": 50284,
"tended": 50304,
"teneg": 50928,
"tenham": 51074,
"tense": 50831,
"terbury": 50903,
"ternational": 50283,
"ternet": 50456,
"thlet": 50341,
"thlete": 50712,
"thm": 50623,
"tingham": 50803,
"titled": 51167,
"tlement": 50842,
"took": 50978,
"ture": 50570,
"tym": 50751,
"ubl": 50266,
"ublican": 50382,
"ublin": 50502,
"uccess": 50504,
"uchy": 50961,
"uerto": 50527,
"ugby": 50372,
"ugust": 50282,
"uis": 50739,
"uisine": 51224,
"ularly": 50345,
"umbent": 50632,
"umberland": 50866,
"unar": 51077,
"unior": 50377,
"unnel": 50586,
"urday": 50513,
"urname": 50381,
"uros": 50858,
"usband": 50370,
"usic": 50273,
"ussels": 50828,
"usz": 51246,
"uxiliary": 51041,
"vanced": 50879,
"veill": 51103,
"veillance": 51119,
"veloped": 51034,
"velopment": 50391,
"vement": 50409,
"vements": 50552,
"vens": 50797,
"viv": 51187,
"wara": 51247,
"warf": 51208,
"weep": 50795,
"yal": 50440,
"ych": 50993,
"ycl": 51102,
"ydro": 50420,
"ygen": 50755,
"yler": 50783,
"ylvan": 50376,
"ylvania": 50384,
"ymn": 50474,
"zegov": 50918,
"zegovina": 50942,
"zil": 50357,
"ález": 51259,
"ández": 51097,
"ár": 50862,
"âte": 51212,
"är": 50873,
"ès": 51062,
"éd": 51221,
"él": 50759,
"ém": 51040,
"ód": 51030,
"ów": 50458,
"ør": 50919,
"ün": 50798,
"āb": 51145,
"ād": 51139,
"ān": 50814,
"ław": 50735,
"ń": 50532,
"ř": 50880,
"ś": 50776,
"ź": 51076,
"ż": 51056,
"ž": 50607,
"ș": 50576,
"ț": 50960,
"ə": 51144,
"’": 50290,
"“": 51193,
"”": 50307,
"”,": 50967,
"”.": 50728
}