Muennighoff's picture
Add MTEB evaluation
154c4e9
{
"test": {
"af": {
"accuracy": 0.4054808338937458,
"accuracy_stderr": 0.014447259403752678,
"f1": 0.39490307545239717,
"f1_stderr": 0.009212717959217511,
"main_score": 0.4054808338937458
},
"am": {
"accuracy": 0.2418291862811029,
"accuracy_stderr": 0.012904896103629838,
"f1": 0.23437620034727474,
"f1_stderr": 0.009361920647479921,
"main_score": 0.2418291862811029
},
"ar": {
"accuracy": 0.30134498991257563,
"accuracy_stderr": 0.014810775844007782,
"f1": 0.28787175191531283,
"f1_stderr": 0.012174439394246464,
"main_score": 0.30134498991257563
},
"az": {
"accuracy": 0.35884330867518494,
"accuracy_stderr": 0.012717802447884442,
"f1": 0.36264500398782124,
"f1_stderr": 0.010633859333577812,
"main_score": 0.35884330867518494
},
"bn": {
"accuracy": 0.2917283120376597,
"accuracy_stderr": 0.011891186572931844,
"f1": 0.278101616531901,
"f1_stderr": 0.010627299989024891,
"main_score": 0.2917283120376597
},
"cy": {
"accuracy": 0.41788836583725625,
"accuracy_stderr": 0.015348362273085753,
"f1": 0.39714131810548015,
"f1_stderr": 0.009372174520376655,
"main_score": 0.41788836583725625
},
"da": {
"accuracy": 0.44176193678547404,
"accuracy_stderr": 0.00924625540944034,
"f1": 0.4219249982655229,
"f1_stderr": 0.008618307806061426,
"main_score": 0.44176193678547404
},
"de": {
"accuracy": 0.4207464694014795,
"accuracy_stderr": 0.006478416557311502,
"f1": 0.39441882591831623,
"f1_stderr": 0.008645545638557534,
"main_score": 0.4207464694014795
},
"el": {
"accuracy": 0.362542030934768,
"accuracy_stderr": 0.012041979440806665,
"f1": 0.3446592715936761,
"f1_stderr": 0.009835042341951889,
"main_score": 0.362542030934768
},
"en": {
"accuracy": 0.6140887693342301,
"accuracy_stderr": 0.015504655249298095,
"f1": 0.5979854802683996,
"f1_stderr": 0.01208669884455989,
"main_score": 0.6140887693342301
},
"es": {
"accuracy": 0.42679892400806996,
"accuracy_stderr": 0.015347073096769526,
"f1": 0.4204801248338172,
"f1_stderr": 0.010741334091620194,
"main_score": 0.42679892400806996
},
"evaluation_time": 2374.32,
"fa": {
"accuracy": 0.3559179556153329,
"accuracy_stderr": 0.009888477522743777,
"f1": 0.34045862930486165,
"f1_stderr": 0.0084840956345157,
"main_score": 0.3559179556153329
},
"fi": {
"accuracy": 0.40036987222595827,
"accuracy_stderr": 0.013803399246107682,
"f1": 0.3811770343936278,
"f1_stderr": 0.01028587503250668,
"main_score": 0.40036987222595827
},
"fr": {
"accuracy": 0.4343981170141224,
"accuracy_stderr": 0.012246472905114743,
"f1": 0.42708438898786494,
"f1_stderr": 0.009512925256512221,
"main_score": 0.4343981170141224
},
"he": {
"accuracy": 0.3159381304640215,
"accuracy_stderr": 0.014973549858983267,
"f1": 0.2998550522450782,
"f1_stderr": 0.011727646762679908,
"main_score": 0.3159381304640215
},
"hi": {
"accuracy": 0.27044384667114996,
"accuracy_stderr": 0.007563774725762617,
"f1": 0.27313059184832666,
"f1_stderr": 0.008056028697421164,
"main_score": 0.27044384667114996
},
"hu": {
"accuracy": 0.38453261600538,
"accuracy_stderr": 0.01598445677659242,
"f1": 0.37309189326110437,
"f1_stderr": 0.010195896901809987,
"main_score": 0.38453261600538
},
"hy": {
"accuracy": 0.2797915265635508,
"accuracy_stderr": 0.015095564553015866,
"f1": 0.27430939684346445,
"f1_stderr": 0.011012889120934774,
"main_score": 0.2797915265635508
},
"id": {
"accuracy": 0.4397108271687963,
"accuracy_stderr": 0.017542356737953325,
"f1": 0.43405857056887615,
"f1_stderr": 0.010528016012937845,
"main_score": 0.4397108271687963
},
"is": {
"accuracy": 0.40302622730329524,
"accuracy_stderr": 0.012582306948623233,
"f1": 0.39108052180520747,
"f1_stderr": 0.009992598454055583,
"main_score": 0.40302622730329524
},
"it": {
"accuracy": 0.45474108944182917,
"accuracy_stderr": 0.01340459754818702,
"f1": 0.4585950328241134,
"f1_stderr": 0.010236110188058374,
"main_score": 0.45474108944182917
},
"ja": {
"accuracy": 0.4560860793544048,
"accuracy_stderr": 0.010590674677296958,
"f1": 0.4394920708216737,
"f1_stderr": 0.010641769554373246,
"main_score": 0.4560860793544048
},
"jv": {
"accuracy": 0.386684599865501,
"accuracy_stderr": 0.012145936599746584,
"f1": 0.37699003401885905,
"f1_stderr": 0.00943165160039381,
"main_score": 0.386684599865501
},
"ka": {
"accuracy": 0.25652320107599197,
"accuracy_stderr": 0.008190163090884097,
"f1": 0.25279084273189584,
"f1_stderr": 0.00943958002987095,
"main_score": 0.25652320107599197
},
"km": {
"accuracy": 0.28295225285810355,
"accuracy_stderr": 0.011478062668929577,
"f1": 0.2664582563877155,
"f1_stderr": 0.007309349177116762,
"main_score": 0.28295225285810355
},
"kn": {
"accuracy": 0.23480161398789506,
"accuracy_stderr": 0.013124898182906054,
"f1": 0.22275241866506734,
"f1_stderr": 0.009107560102876623,
"main_score": 0.23480161398789506
},
"ko": {
"accuracy": 0.3655682582380632,
"accuracy_stderr": 0.0138514372378647,
"f1": 0.3600475317106361,
"f1_stderr": 0.012525556508818685,
"main_score": 0.3655682582380632
},
"lv": {
"accuracy": 0.4184936112979153,
"accuracy_stderr": 0.01503834658744963,
"f1": 0.4138932672359119,
"f1_stderr": 0.009112866608871466,
"main_score": 0.4184936112979153
},
"ml": {
"accuracy": 0.2490921318090114,
"accuracy_stderr": 0.011318911223656808,
"f1": 0.23968687483768808,
"f1_stderr": 0.009623239538185528,
"main_score": 0.2490921318090114
},
"mn": {
"accuracy": 0.2986213853396099,
"accuracy_stderr": 0.013066535467207235,
"f1": 0.2997715207525541,
"f1_stderr": 0.012860089685643984,
"main_score": 0.2986213853396099
},
"ms": {
"accuracy": 0.4242098184263618,
"accuracy_stderr": 0.013890887562095782,
"f1": 0.4150877432664628,
"f1_stderr": 0.012091881563068337,
"main_score": 0.4242098184263618
},
"my": {
"accuracy": 0.25131136516476127,
"accuracy_stderr": 0.0075325439284173994,
"f1": 0.23938932214086775,
"f1_stderr": 0.006306918181473474,
"main_score": 0.25131136516476127
},
"nb": {
"accuracy": 0.3981506388702084,
"accuracy_stderr": 0.013968199431054802,
"f1": 0.3880958658779166,
"f1_stderr": 0.011576475125850125,
"main_score": 0.3981506388702084
},
"nl": {
"accuracy": 0.4362138533960995,
"accuracy_stderr": 0.01079982721922183,
"f1": 0.4201386842914633,
"f1_stderr": 0.009992268819898372,
"main_score": 0.4362138533960995
},
"pl": {
"accuracy": 0.4219569603227976,
"accuracy_stderr": 0.01868909945354249,
"f1": 0.4000556559825827,
"f1_stderr": 0.012543479799886282,
"main_score": 0.4219569603227976
},
"pt": {
"accuracy": 0.4520847343644923,
"accuracy_stderr": 0.01543976511380644,
"f1": 0.44241150050290506,
"f1_stderr": 0.008326917982409131,
"main_score": 0.4520847343644923
},
"ro": {
"accuracy": 0.4180901143241426,
"accuracy_stderr": 0.016105657510711916,
"f1": 0.40474074848670083,
"f1_stderr": 0.015837216995188204,
"main_score": 0.4180901143241426
},
"ru": {
"accuracy": 0.3596839273705447,
"accuracy_stderr": 0.019516291427541597,
"f1": 0.35095456843621,
"f1_stderr": 0.012440228318941022,
"main_score": 0.3596839273705447
},
"sl": {
"accuracy": 0.40605245460659045,
"accuracy_stderr": 0.01602119555635458,
"f1": 0.39302383051500134,
"f1_stderr": 0.0108929281580567,
"main_score": 0.40605245460659045
},
"sq": {
"accuracy": 0.42757229320780094,
"accuracy_stderr": 0.013061126335718017,
"f1": 0.41537639314973884,
"f1_stderr": 0.011214750874227073,
"main_score": 0.42757229320780094
},
"sv": {
"accuracy": 0.42347007397444514,
"accuracy_stderr": 0.01443766953082292,
"f1": 0.41043660179486263,
"f1_stderr": 0.009119701786380115,
"main_score": 0.42347007397444514
},
"sw": {
"accuracy": 0.4112306657700067,
"accuracy_stderr": 0.016030226034380948,
"f1": 0.39712940473289027,
"f1_stderr": 0.01079211644663987,
"main_score": 0.4112306657700067
},
"ta": {
"accuracy": 0.24603227975790182,
"accuracy_stderr": 0.007348449965253495,
"f1": 0.23969236788828607,
"f1_stderr": 0.00842584449511441,
"main_score": 0.24603227975790182
},
"te": {
"accuracy": 0.2503698722259583,
"accuracy_stderr": 0.010974450116174157,
"f1": 0.2437196123281459,
"f1_stderr": 0.007768870065899431,
"main_score": 0.2503698722259583
},
"th": {
"accuracy": 0.35400134498991254,
"accuracy_stderr": 0.012277223814879825,
"f1": 0.35063600413688034,
"f1_stderr": 0.008031998429326455,
"main_score": 0.35400134498991254
},
"tl": {
"accuracy": 0.4119031607262945,
"accuracy_stderr": 0.014317194926727485,
"f1": 0.4024043230427301,
"f1_stderr": 0.009308382803276337,
"main_score": 0.4119031607262945
},
"tr": {
"accuracy": 0.3640551445864156,
"accuracy_stderr": 0.01158473822441319,
"f1": 0.3603844992856558,
"f1_stderr": 0.011004967374166683,
"main_score": 0.3640551445864156
},
"ur": {
"accuracy": 0.25934767989240076,
"accuracy_stderr": 0.011556400737346494,
"f1": 0.252074457023531,
"f1_stderr": 0.00828687176833062,
"main_score": 0.25934767989240076
},
"vi": {
"accuracy": 0.38799596503026224,
"accuracy_stderr": 0.012823731186170102,
"f1": 0.37160233794673125,
"f1_stderr": 0.013799233781790802,
"main_score": 0.38799596503026224
},
"zh-CN": {
"accuracy": 0.4624411566913248,
"accuracy_stderr": 0.01869309179104032,
"f1": 0.44367480561291905,
"f1_stderr": 0.01471127926363261,
"main_score": 0.4624411566913248
},
"zh-TW": {
"accuracy": 0.4230665770006724,
"accuracy_stderr": 0.015603332261143462,
"f1": 0.41964222328351397,
"f1_stderr": 0.013651788714198228,
"main_score": 0.4230665770006724
}
},
"validation": {
"af": {
"accuracy": 0.4180029513034924,
"accuracy_stderr": 0.010921152256864068,
"f1": 0.408564524920107,
"f1_stderr": 0.011580488915745207,
"main_score": 0.4180029513034924
},
"am": {
"accuracy": 0.22936546974913924,
"accuracy_stderr": 0.011915475401965652,
"f1": 0.22443454994948162,
"f1_stderr": 0.01293675359583084,
"main_score": 0.22936546974913924
},
"ar": {
"accuracy": 0.2941957697983276,
"accuracy_stderr": 0.013140667522280231,
"f1": 0.278025426878666,
"f1_stderr": 0.01146329357734503,
"main_score": 0.2941957697983276
},
"az": {
"accuracy": 0.3528283325135268,
"accuracy_stderr": 0.012346286276438762,
"f1": 0.3586288453850816,
"f1_stderr": 0.013126667211852334,
"main_score": 0.3528283325135268
},
"bn": {
"accuracy": 0.29242498770290204,
"accuracy_stderr": 0.012778098967926376,
"f1": 0.2743238187163509,
"f1_stderr": 0.007196357124921039,
"main_score": 0.29242498770290204
},
"cy": {
"accuracy": 0.4091982292179046,
"accuracy_stderr": 0.01750643809327536,
"f1": 0.3968229515847022,
"f1_stderr": 0.011600617097504318,
"main_score": 0.4091982292179046
},
"da": {
"accuracy": 0.4363994097393015,
"accuracy_stderr": 0.013211089954710703,
"f1": 0.41855164392134825,
"f1_stderr": 0.012090836177572879,
"main_score": 0.4363994097393015
},
"de": {
"accuracy": 0.4300049188391539,
"accuracy_stderr": 0.01189667797960137,
"f1": 0.40793611600487506,
"f1_stderr": 0.014239149206748571,
"main_score": 0.4300049188391539
},
"el": {
"accuracy": 0.3712739793408756,
"accuracy_stderr": 0.011689463686097046,
"f1": 0.3550955737747622,
"f1_stderr": 0.01024985130519696,
"main_score": 0.3712739793408756
},
"en": {
"accuracy": 0.6291687161829808,
"accuracy_stderr": 0.01686014188369135,
"f1": 0.6127498027362954,
"f1_stderr": 0.012061423584454146,
"main_score": 0.6291687161829808
},
"es": {
"accuracy": 0.43580914904082635,
"accuracy_stderr": 0.012224082295727747,
"f1": 0.42917002190317477,
"f1_stderr": 0.011405640175485206,
"main_score": 0.43580914904082635
},
"evaluation_time": 1881.4,
"fa": {
"accuracy": 0.3547466797835711,
"accuracy_stderr": 0.010601895712032338,
"f1": 0.33945762420706,
"f1_stderr": 0.009267694956350405,
"main_score": 0.3547466797835711
},
"fi": {
"accuracy": 0.40078701426463353,
"accuracy_stderr": 0.011417217998563597,
"f1": 0.3883449448052677,
"f1_stderr": 0.012122410583794865,
"main_score": 0.40078701426463353
},
"fr": {
"accuracy": 0.4429414658140679,
"accuracy_stderr": 0.015256729395188778,
"f1": 0.43394278610572457,
"f1_stderr": 0.014586034300029853,
"main_score": 0.4429414658140679
},
"he": {
"accuracy": 0.311460895228726,
"accuracy_stderr": 0.014779126801429157,
"f1": 0.2995793979884509,
"f1_stderr": 0.013615081888042758,
"main_score": 0.311460895228726
},
"hi": {
"accuracy": 0.25961633054599115,
"accuracy_stderr": 0.007878745912513887,
"f1": 0.25982460372695954,
"f1_stderr": 0.0071203448616418506,
"main_score": 0.25961633054599115
},
"hu": {
"accuracy": 0.3748155435317265,
"accuracy_stderr": 0.011499989267418064,
"f1": 0.36610577802929695,
"f1_stderr": 0.012185955975190215,
"main_score": 0.3748155435317265
},
"hy": {
"accuracy": 0.2815543531726513,
"accuracy_stderr": 0.013230122952822734,
"f1": 0.2770068958000932,
"f1_stderr": 0.01277048573808052,
"main_score": 0.2815543531726513
},
"id": {
"accuracy": 0.4424495818986719,
"accuracy_stderr": 0.011069574031270856,
"f1": 0.43510898494968553,
"f1_stderr": 0.0076463739176352115,
"main_score": 0.4424495818986719
},
"is": {
"accuracy": 0.40157402852926705,
"accuracy_stderr": 0.015005603921624816,
"f1": 0.3876853823428391,
"f1_stderr": 0.01201970040174986,
"main_score": 0.40157402852926705
},
"it": {
"accuracy": 0.456714215445155,
"accuracy_stderr": 0.015036288695625667,
"f1": 0.463502133111645,
"f1_stderr": 0.015187785561573016,
"main_score": 0.456714215445155
},
"ja": {
"accuracy": 0.4479094933595672,
"accuracy_stderr": 0.014653187990596124,
"f1": 0.4280973013012505,
"f1_stderr": 0.01655707191595756,
"main_score": 0.4479094933595672
},
"jv": {
"accuracy": 0.384505656665027,
"accuracy_stderr": 0.012104842420943921,
"f1": 0.380167978724446,
"f1_stderr": 0.008009104534033291,
"main_score": 0.384505656665027
},
"ka": {
"accuracy": 0.24835218888342353,
"accuracy_stderr": 0.013181112319925865,
"f1": 0.24709500138710574,
"f1_stderr": 0.01059101293402143,
"main_score": 0.24835218888342353
},
"km": {
"accuracy": 0.2742252828332513,
"accuracy_stderr": 0.011534127186569546,
"f1": 0.2602068523353439,
"f1_stderr": 0.012575174825235999,
"main_score": 0.2742252828332513
},
"kn": {
"accuracy": 0.2259222823413674,
"accuracy_stderr": 0.014722127495659583,
"f1": 0.21716530479138849,
"f1_stderr": 0.010961787165686507,
"main_score": 0.2259222823413674
},
"ko": {
"accuracy": 0.3673389080177078,
"accuracy_stderr": 0.013767126952905771,
"f1": 0.37221618799085243,
"f1_stderr": 0.014709606757115768,
"main_score": 0.3673389080177078
},
"lv": {
"accuracy": 0.4103295622233153,
"accuracy_stderr": 0.016045974090390387,
"f1": 0.40406596723582255,
"f1_stderr": 0.0069132258926177265,
"main_score": 0.4103295622233153
},
"ml": {
"accuracy": 0.24200688637481554,
"accuracy_stderr": 0.011347496474072452,
"f1": 0.23514331789309012,
"f1_stderr": 0.01204913680015678,
"main_score": 0.24200688637481554
},
"mn": {
"accuracy": 0.29070339399901624,
"accuracy_stderr": 0.01309926671195602,
"f1": 0.29527156314146025,
"f1_stderr": 0.010723196469553449,
"main_score": 0.29070339399901624
},
"ms": {
"accuracy": 0.42287260206591243,
"accuracy_stderr": 0.019597793502122884,
"f1": 0.41872839411817814,
"f1_stderr": 0.014555274766542817,
"main_score": 0.42287260206591243
},
"my": {
"accuracy": 0.24195769798327596,
"accuracy_stderr": 0.00958467079934785,
"f1": 0.23386230455157248,
"f1_stderr": 0.010002828286172527,
"main_score": 0.24195769798327596
},
"nb": {
"accuracy": 0.40196753566158383,
"accuracy_stderr": 0.014139930174690087,
"f1": 0.3918493283614314,
"f1_stderr": 0.013552558677777658,
"main_score": 0.40196753566158383
},
"nl": {
"accuracy": 0.4228726020659125,
"accuracy_stderr": 0.016822214166599285,
"f1": 0.4111008537872992,
"f1_stderr": 0.01409045918592202,
"main_score": 0.4228726020659125
},
"pl": {
"accuracy": 0.4145597638957206,
"accuracy_stderr": 0.020333079371071323,
"f1": 0.39761508941215074,
"f1_stderr": 0.015094892952504711,
"main_score": 0.4145597638957206
},
"pt": {
"accuracy": 0.45209050664043293,
"accuracy_stderr": 0.01835254418074865,
"f1": 0.4487810416996396,
"f1_stderr": 0.01196627715625292,
"main_score": 0.45209050664043293
},
"ro": {
"accuracy": 0.41559272011805215,
"accuracy_stderr": 0.008131571624077545,
"f1": 0.40039259678785666,
"f1_stderr": 0.00515152110757374,
"main_score": 0.41559272011805215
},
"ru": {
"accuracy": 0.35622233152975896,
"accuracy_stderr": 0.021799726881596905,
"f1": 0.34781156799018975,
"f1_stderr": 0.015093828682005378,
"main_score": 0.35622233152975896
},
"sl": {
"accuracy": 0.4014264633546484,
"accuracy_stderr": 0.012750044779015257,
"f1": 0.3891333558812916,
"f1_stderr": 0.011224536409706653,
"main_score": 0.4014264633546484
},
"sq": {
"accuracy": 0.43580914904082635,
"accuracy_stderr": 0.0145750441613665,
"f1": 0.4232383304393984,
"f1_stderr": 0.009598219357928728,
"main_score": 0.43580914904082635
},
"sv": {
"accuracy": 0.43433349729463844,
"accuracy_stderr": 0.012787089799130798,
"f1": 0.4239334259272507,
"f1_stderr": 0.010594787809411465,
"main_score": 0.43433349729463844
},
"sw": {
"accuracy": 0.4067879980324644,
"accuracy_stderr": 0.01684801143160469,
"f1": 0.3915460778697146,
"f1_stderr": 0.012876417288175717,
"main_score": 0.4067879980324644
},
"ta": {
"accuracy": 0.2374815543531726,
"accuracy_stderr": 0.007291836875634601,
"f1": 0.23306972516495983,
"f1_stderr": 0.006823162268817619,
"main_score": 0.2374815543531726
},
"te": {
"accuracy": 0.24535169699950812,
"accuracy_stderr": 0.010303295016847668,
"f1": 0.23951706003507978,
"f1_stderr": 0.012099393447774153,
"main_score": 0.24535169699950812
},
"th": {
"accuracy": 0.3489424495818987,
"accuracy_stderr": 0.011552886222641766,
"f1": 0.33677383997436106,
"f1_stderr": 0.006869691313087882,
"main_score": 0.3489424495818987
},
"tl": {
"accuracy": 0.4083620265617315,
"accuracy_stderr": 0.014225228406157971,
"f1": 0.3990401121375912,
"f1_stderr": 0.014254629179738524,
"main_score": 0.4083620265617315
},
"tr": {
"accuracy": 0.361829808165273,
"accuracy_stderr": 0.018463935842145066,
"f1": 0.3560425832290258,
"f1_stderr": 0.015144371708336025,
"main_score": 0.361829808165273
},
"ur": {
"accuracy": 0.25853418593212,
"accuracy_stderr": 0.011733674034744004,
"f1": 0.25059229515932524,
"f1_stderr": 0.010461858886336843,
"main_score": 0.25853418593212
},
"vi": {
"accuracy": 0.38180029513034924,
"accuracy_stderr": 0.014392627591048972,
"f1": 0.35555792018834453,
"f1_stderr": 0.014687028346239177,
"main_score": 0.38180029513034924
},
"zh-CN": {
"accuracy": 0.46173143138219375,
"accuracy_stderr": 0.01566177233818183,
"f1": 0.45269945997397354,
"f1_stderr": 0.012845282312811623,
"main_score": 0.46173143138219375
},
"zh-TW": {
"accuracy": 0.4192326610919824,
"accuracy_stderr": 0.01305866106572301,
"f1": 0.42394738901751217,
"f1_stderr": 0.012561702136094713,
"main_score": 0.4192326610919824
}
},
"dataset_version": null,
"mteb_version": "0.0.2"
}