SGPT-125M-weightedmean-msmarco-specb-bitfit / evaluation /mteb /MassiveScenarioClassification.json
Muennighoff's picture
Add MTEB evaluation
154c4e9
raw
history blame
26.2 kB
{
"test": {
"af": {
"accuracy": 0.43248150638870203,
"accuracy_stderr": 0.01914237726930163,
"f1": 0.40924230769590786,
"f1_stderr": 0.017594605316604825,
"main_score": 0.43248150638870203
},
"am": {
"accuracy": 0.2530262273032952,
"accuracy_stderr": 0.0171025727714938,
"f1": 0.24937105830264067,
"f1_stderr": 0.014061996186754674,
"main_score": 0.2530262273032952
},
"ar": {
"accuracy": 0.3207128446536651,
"accuracy_stderr": 0.013430382612278666,
"f1": 0.3180245816594883,
"f1_stderr": 0.01399700671221373,
"main_score": 0.3207128446536651
},
"az": {
"accuracy": 0.3668123739071957,
"accuracy_stderr": 0.017293355231396697,
"f1": 0.3637219042508338,
"f1_stderr": 0.013273690138493005,
"main_score": 0.3668123739071957
},
"bn": {
"accuracy": 0.2956624075319435,
"accuracy_stderr": 0.01956084843583552,
"f1": 0.2838604205636276,
"f1_stderr": 0.01649525524185898,
"main_score": 0.2956624075319435
},
"cy": {
"accuracy": 0.421049092131809,
"accuracy_stderr": 0.01842488912674414,
"f1": 0.38926150886991295,
"f1_stderr": 0.013736606444137845,
"main_score": 0.421049092131809
},
"da": {
"accuracy": 0.4544384667114997,
"accuracy_stderr": 0.018828807973369888,
"f1": 0.42578252395460003,
"f1_stderr": 0.01878751493281785,
"main_score": 0.4544384667114997
},
"de": {
"accuracy": 0.43211163416274373,
"accuracy_stderr": 0.015219386970045799,
"f1": 0.41044658583047894,
"f1_stderr": 0.012995583154456956,
"main_score": 0.43211163416274373
},
"el": {
"accuracy": 0.3650302622730329,
"accuracy_stderr": 0.014415725865475081,
"f1": 0.3449785095312759,
"f1_stderr": 0.015562024871571323,
"main_score": 0.3650302622730329
},
"en": {
"accuracy": 0.6973772696704774,
"accuracy_stderr": 0.011262031912799892,
"f1": 0.6921759502909044,
"f1_stderr": 0.013799487672182479,
"main_score": 0.6973772696704774
},
"es": {
"accuracy": 0.44078681909885675,
"accuracy_stderr": 0.018595701586506804,
"f1": 0.4305914426901129,
"f1_stderr": 0.016625685820030444,
"main_score": 0.44078681909885675
},
"evaluation_time": 1815.94,
"fa": {
"accuracy": 0.32612642905178213,
"accuracy_stderr": 0.012812264412745333,
"f1": 0.3202463177462754,
"f1_stderr": 0.012798802292501087,
"main_score": 0.32612642905178213
},
"fi": {
"accuracy": 0.40356422326832553,
"accuracy_stderr": 0.014078935277749945,
"f1": 0.3813642481807678,
"f1_stderr": 0.012692369056549692,
"main_score": 0.40356422326832553
},
"fr": {
"accuracy": 0.4506724949562878,
"accuracy_stderr": 0.015178175408214766,
"f1": 0.4319827608343738,
"f1_stderr": 0.014020710010605711,
"main_score": 0.4506724949562878
},
"he": {
"accuracy": 0.3217888365837256,
"accuracy_stderr": 0.02418084644697299,
"f1": 0.29979761884698775,
"f1_stderr": 0.020294571210800923,
"main_score": 0.3217888365837256
},
"hi": {
"accuracy": 0.26903160726294556,
"accuracy_stderr": 0.02497685106922395,
"f1": 0.25833010434083364,
"f1_stderr": 0.021329106099270956,
"main_score": 0.26903160726294556
},
"hu": {
"accuracy": 0.4037995965030262,
"accuracy_stderr": 0.020340860350433913,
"f1": 0.37931343552928826,
"f1_stderr": 0.01617066924214891,
"main_score": 0.4037995965030262
},
"hy": {
"accuracy": 0.28375924680564896,
"accuracy_stderr": 0.02244735882940363,
"f1": 0.2696255693013172,
"f1_stderr": 0.017311893310356832,
"main_score": 0.28375924680564896
},
"id": {
"accuracy": 0.44361129791526566,
"accuracy_stderr": 0.022926086894172665,
"f1": 0.4354445012295126,
"f1_stderr": 0.020284381381570574,
"main_score": 0.44361129791526566
},
"is": {
"accuracy": 0.39290517821116344,
"accuracy_stderr": 0.02370210834656748,
"f1": 0.3726982052174147,
"f1_stderr": 0.019988763380559,
"main_score": 0.39290517821116344
},
"it": {
"accuracy": 0.46469401479488903,
"accuracy_stderr": 0.018658554046491128,
"f1": 0.44060986162841564,
"f1_stderr": 0.017892736302635378,
"main_score": 0.46469401479488903
},
"ja": {
"accuracy": 0.46257565568258235,
"accuracy_stderr": 0.018244361807715694,
"f1": 0.4562513945675882,
"f1_stderr": 0.016350188403047698,
"main_score": 0.46257565568258235
},
"jv": {
"accuracy": 0.41126429051782115,
"accuracy_stderr": 0.018576843941362883,
"f1": 0.3954392378396527,
"f1_stderr": 0.011255653042251292,
"main_score": 0.41126429051782115
},
"ka": {
"accuracy": 0.24727639542703428,
"accuracy_stderr": 0.018326690606661058,
"f1": 0.23337743140804484,
"f1_stderr": 0.011940429119171217,
"main_score": 0.24727639542703428
},
"km": {
"accuracy": 0.2974108944182918,
"accuracy_stderr": 0.013625492982156541,
"f1": 0.2757087619008375,
"f1_stderr": 0.01055451354659993,
"main_score": 0.2974108944182918
},
"kn": {
"accuracy": 0.23850033624747816,
"accuracy_stderr": 0.014455552445217143,
"f1": 0.2286733484540032,
"f1_stderr": 0.010992305614270776,
"main_score": 0.23850033624747816
},
"ko": {
"accuracy": 0.3656691324815064,
"accuracy_stderr": 0.020280629432627,
"f1": 0.35504081677134564,
"f1_stderr": 0.019299833634584138,
"main_score": 0.3656691324815064
},
"lv": {
"accuracy": 0.40928043039677203,
"accuracy_stderr": 0.017879262085215602,
"f1": 0.3910858913121125,
"f1_stderr": 0.015622608383011384,
"main_score": 0.40928043039677203
},
"ml": {
"accuracy": 0.25527908540685945,
"accuracy_stderr": 0.012492668582100332,
"f1": 0.25333391622280477,
"f1_stderr": 0.011431795353486644,
"main_score": 0.25527908540685945
},
"mn": {
"accuracy": 0.29105581708137185,
"accuracy_stderr": 0.02289852732480194,
"f1": 0.28478235012692815,
"f1_stderr": 0.0211390543174164,
"main_score": 0.29105581708137185
},
"ms": {
"accuracy": 0.43786146603900467,
"accuracy_stderr": 0.02361491677556193,
"f1": 0.41964014392626703,
"f1_stderr": 0.016224233488107753,
"main_score": 0.43786146603900467
},
"my": {
"accuracy": 0.27269670477471414,
"accuracy_stderr": 0.017084548735816784,
"f1": 0.26228386764141853,
"f1_stderr": 0.01770505820877428,
"main_score": 0.27269670477471414
},
"nb": {
"accuracy": 0.3901815736381977,
"accuracy_stderr": 0.02356766226099208,
"f1": 0.37641949339321856,
"f1_stderr": 0.018189340920191487,
"main_score": 0.3901815736381977
},
"nl": {
"accuracy": 0.4535978480161399,
"accuracy_stderr": 0.016327230257174263,
"f1": 0.426851176096831,
"f1_stderr": 0.009526114688499471,
"main_score": 0.4535978480161399
},
"pl": {
"accuracy": 0.41893073301950234,
"accuracy_stderr": 0.020686982211902172,
"f1": 0.4088871064261502,
"f1_stderr": 0.019791742479992352,
"main_score": 0.41893073301950234
},
"pt": {
"accuracy": 0.45901143241425685,
"accuracy_stderr": 0.017537457772563485,
"f1": 0.44496942353920543,
"f1_stderr": 0.016580297609253208,
"main_score": 0.45901143241425685
},
"ro": {
"accuracy": 0.44115669132481505,
"accuracy_stderr": 0.017138728900302158,
"f1": 0.41953945105870616,
"f1_stderr": 0.01663750637309216,
"main_score": 0.44115669132481505
},
"ru": {
"accuracy": 0.3276395427034297,
"accuracy_stderr": 0.01520582329589761,
"f1": 0.31436372571600935,
"f1_stderr": 0.016822070079219324,
"main_score": 0.3276395427034297
},
"sl": {
"accuracy": 0.40504371217215873,
"accuracy_stderr": 0.01737927871109968,
"f1": 0.39322752749628165,
"f1_stderr": 0.016021377230910933,
"main_score": 0.40504371217215873
},
"sq": {
"accuracy": 0.4251849361129792,
"accuracy_stderr": 0.02703155777439191,
"f1": 0.41413929711846303,
"f1_stderr": 0.02203846614787482,
"main_score": 0.4251849361129792
},
"sv": {
"accuracy": 0.42293207800941496,
"accuracy_stderr": 0.02634507038010069,
"f1": 0.4050409536806683,
"f1_stderr": 0.021882375504727304,
"main_score": 0.42293207800941496
},
"sw": {
"accuracy": 0.42999327505043705,
"accuracy_stderr": 0.015164574873190428,
"f1": 0.4104541622497327,
"f1_stderr": 0.01416304033082228,
"main_score": 0.42999327505043705
},
"ta": {
"accuracy": 0.2832548755884331,
"accuracy_stderr": 0.017499864243874726,
"f1": 0.2727684199556187,
"f1_stderr": 0.017340547403638454,
"main_score": 0.2832548755884331
},
"te": {
"accuracy": 0.26593813046402154,
"accuracy_stderr": 0.021809193915635242,
"f1": 0.25483878616197586,
"f1_stderr": 0.019449647389494947,
"main_score": 0.26593813046402154
},
"th": {
"accuracy": 0.36788836583725626,
"accuracy_stderr": 0.01545089176597426,
"f1": 0.34603932909177687,
"f1_stderr": 0.016869984806312827,
"main_score": 0.36788836583725626
},
"tl": {
"accuracy": 0.425689307330195,
"accuracy_stderr": 0.015430059348496856,
"f1": 0.40924469309079825,
"f1_stderr": 0.008776200992571783,
"main_score": 0.425689307330195
},
"tr": {
"accuracy": 0.37094821788836585,
"accuracy_stderr": 0.022152967877636806,
"f1": 0.3794962882285716,
"f1_stderr": 0.0210046248379818,
"main_score": 0.37094821788836585
},
"ur": {
"accuracy": 0.2883658372562206,
"accuracy_stderr": 0.02184377077895051,
"f1": 0.2780655865551234,
"f1_stderr": 0.021979806560091308,
"main_score": 0.2883658372562206
},
"vi": {
"accuracy": 0.37357094821788833,
"accuracy_stderr": 0.015912003760378605,
"f1": 0.3750791896103816,
"f1_stderr": 0.013705906683792032,
"main_score": 0.37357094821788833
},
"zh-CN": {
"accuracy": 0.4937794216543375,
"accuracy_stderr": 0.014582638723526031,
"f1": 0.4720421153697707,
"f1_stderr": 0.014185512249352985,
"main_score": 0.4937794216543375
},
"zh-TW": {
"accuracy": 0.44421654337592476,
"accuracy_stderr": 0.027457834005907886,
"f1": 0.4434741861198931,
"f1_stderr": 0.02234941824008831,
"main_score": 0.44421654337592476
}
},
"validation": {
"af": {
"accuracy": 0.42297097884899165,
"accuracy_stderr": 0.01949058592896654,
"f1": 0.41228412552668264,
"f1_stderr": 0.02072529581458811,
"main_score": 0.42297097884899165
},
"am": {
"accuracy": 0.2450565666502705,
"accuracy_stderr": 0.008479256952982387,
"f1": 0.24806662079898306,
"f1_stderr": 0.008780275014937335,
"main_score": 0.2450565666502705
},
"ar": {
"accuracy": 0.3055582882439744,
"accuracy_stderr": 0.017670617309841773,
"f1": 0.3069995212499811,
"f1_stderr": 0.015362354242331443,
"main_score": 0.3055582882439744
},
"az": {
"accuracy": 0.3621249385145106,
"accuracy_stderr": 0.012152220847254775,
"f1": 0.36357765091456506,
"f1_stderr": 0.010139140710942176,
"main_score": 0.3621249385145106
},
"bn": {
"accuracy": 0.2971470732907034,
"accuracy_stderr": 0.022069587561896618,
"f1": 0.2882207128496783,
"f1_stderr": 0.01683479482879926,
"main_score": 0.2971470732907034
},
"cy": {
"accuracy": 0.4136251844564683,
"accuracy_stderr": 0.024426828453818297,
"f1": 0.39296855913661843,
"f1_stderr": 0.020973943849242817,
"main_score": 0.4136251844564683
},
"da": {
"accuracy": 0.43807181505164783,
"accuracy_stderr": 0.017869947001923518,
"f1": 0.4206737421170841,
"f1_stderr": 0.016872412523955146,
"main_score": 0.43807181505164783
},
"de": {
"accuracy": 0.43133300541072306,
"accuracy_stderr": 0.020020883880127186,
"f1": 0.416915841608262,
"f1_stderr": 0.021672651279716317,
"main_score": 0.43133300541072306
},
"el": {
"accuracy": 0.3577471716674865,
"accuracy_stderr": 0.01701527354158656,
"f1": 0.34927355878305144,
"f1_stderr": 0.016442978728160182,
"main_score": 0.3577471716674865
},
"en": {
"accuracy": 0.7017707820954255,
"accuracy_stderr": 0.016159041704017264,
"f1": 0.6966771799036044,
"f1_stderr": 0.01608645681525308,
"main_score": 0.7017707820954255
},
"es": {
"accuracy": 0.43846532218396456,
"accuracy_stderr": 0.02347394558102388,
"f1": 0.4343598552554334,
"f1_stderr": 0.0181860533015881,
"main_score": 0.43846532218396456
},
"evaluation_time": 1332.6,
"fa": {
"accuracy": 0.33084112149532713,
"accuracy_stderr": 0.011130497227832424,
"f1": 0.33401019078365096,
"f1_stderr": 0.013062402784378667,
"main_score": 0.33084112149532713
},
"fi": {
"accuracy": 0.39316281357599603,
"accuracy_stderr": 0.01045771116792211,
"f1": 0.3785817393037779,
"f1_stderr": 0.013819345424261865,
"main_score": 0.39316281357599603
},
"fr": {
"accuracy": 0.4451549434333497,
"accuracy_stderr": 0.015595125043219043,
"f1": 0.4346460544394509,
"f1_stderr": 0.013779384653720472,
"main_score": 0.4451549434333497
},
"he": {
"accuracy": 0.3149532710280374,
"accuracy_stderr": 0.023701506980689756,
"f1": 0.3011898277187477,
"f1_stderr": 0.02093714853708861,
"main_score": 0.3149532710280374
},
"hi": {
"accuracy": 0.26099360550909984,
"accuracy_stderr": 0.020137465573030665,
"f1": 0.2551702530489754,
"f1_stderr": 0.016256501108798407,
"main_score": 0.26099360550909984
},
"hu": {
"accuracy": 0.3777668470241023,
"accuracy_stderr": 0.01415737274034313,
"f1": 0.3636064864884589,
"f1_stderr": 0.009160637137956562,
"main_score": 0.3777668470241023
},
"hy": {
"accuracy": 0.28558780127889816,
"accuracy_stderr": 0.01877837310408312,
"f1": 0.2772579956450185,
"f1_stderr": 0.015424825177141353,
"main_score": 0.28558780127889816
},
"id": {
"accuracy": 0.4339399901623217,
"accuracy_stderr": 0.014721716629155198,
"f1": 0.4297916006449869,
"f1_stderr": 0.01354829266304612,
"main_score": 0.4339399901623217
},
"is": {
"accuracy": 0.3939498278406296,
"accuracy_stderr": 0.019989438522709065,
"f1": 0.37779417546607796,
"f1_stderr": 0.015353564518631574,
"main_score": 0.3939498278406296
},
"it": {
"accuracy": 0.45327102803738323,
"accuracy_stderr": 0.01699848617376043,
"f1": 0.44153892466033684,
"f1_stderr": 0.01882228199455719,
"main_score": 0.45327102803738323
},
"ja": {
"accuracy": 0.4578455484505656,
"accuracy_stderr": 0.020281847759727148,
"f1": 0.45729859048271465,
"f1_stderr": 0.01661016283738532,
"main_score": 0.4578455484505656
},
"jv": {
"accuracy": 0.39758976881455976,
"accuracy_stderr": 0.01707347422003101,
"f1": 0.39358025541686337,
"f1_stderr": 0.014149347886283038,
"main_score": 0.39758976881455976
},
"ka": {
"accuracy": 0.2424003935071323,
"accuracy_stderr": 0.019907387368651303,
"f1": 0.23256193506176298,
"f1_stderr": 0.015099183200210618,
"main_score": 0.2424003935071323
},
"km": {
"accuracy": 0.2993605509099853,
"accuracy_stderr": 0.013532083217551187,
"f1": 0.2809200454959333,
"f1_stderr": 0.009393641610664295,
"main_score": 0.2993605509099853
},
"kn": {
"accuracy": 0.23177570093457942,
"accuracy_stderr": 0.018427996145927942,
"f1": 0.22821426633751796,
"f1_stderr": 0.017679796969382704,
"main_score": 0.23177570093457942
},
"ko": {
"accuracy": 0.3586817511067388,
"accuracy_stderr": 0.021898282999062484,
"f1": 0.35579471530107626,
"f1_stderr": 0.019292875338196964,
"main_score": 0.3586817511067388
},
"lv": {
"accuracy": 0.4004426955238564,
"accuracy_stderr": 0.014973725234459603,
"f1": 0.387872887807314,
"f1_stderr": 0.015108580814192384,
"main_score": 0.4004426955238564
},
"ml": {
"accuracy": 0.24756517461878996,
"accuracy_stderr": 0.012515618686269558,
"f1": 0.24899937888159857,
"f1_stderr": 0.012555787929717375,
"main_score": 0.24756517461878996
},
"mn": {
"accuracy": 0.2843580914904083,
"accuracy_stderr": 0.019142375289924282,
"f1": 0.2816633850287075,
"f1_stderr": 0.01723360155398601,
"main_score": 0.2843580914904083
},
"ms": {
"accuracy": 0.43320216428922764,
"accuracy_stderr": 0.02159818527886489,
"f1": 0.4249245664682754,
"f1_stderr": 0.014510663777917886,
"main_score": 0.43320216428922764
},
"my": {
"accuracy": 0.26463354648302995,
"accuracy_stderr": 0.019322963284725016,
"f1": 0.2608329884167839,
"f1_stderr": 0.02115978672739665,
"main_score": 0.26463354648302995
},
"nb": {
"accuracy": 0.3869650762420069,
"accuracy_stderr": 0.024525284668151657,
"f1": 0.3814300772740455,
"f1_stderr": 0.020457575337737684,
"main_score": 0.3869650762420069
},
"nl": {
"accuracy": 0.4398425971470733,
"accuracy_stderr": 0.01625719128512154,
"f1": 0.42260609210168926,
"f1_stderr": 0.011560492888814633,
"main_score": 0.4398425971470733
},
"pl": {
"accuracy": 0.40083620265617326,
"accuracy_stderr": 0.022349561833830214,
"f1": 0.3991485290358368,
"f1_stderr": 0.01999404587448531,
"main_score": 0.40083620265617326
},
"pt": {
"accuracy": 0.45031972454500735,
"accuracy_stderr": 0.01750477954059943,
"f1": 0.4437398935443329,
"f1_stderr": 0.018517944459345348,
"main_score": 0.45031972454500735
},
"ro": {
"accuracy": 0.43812100344318744,
"accuracy_stderr": 0.021415938790220652,
"f1": 0.42362645820764067,
"f1_stderr": 0.018731210126253738,
"main_score": 0.43812100344318744
},
"ru": {
"accuracy": 0.33271028037383177,
"accuracy_stderr": 0.022300521711360505,
"f1": 0.32468290003396405,
"f1_stderr": 0.021982683919350685,
"main_score": 0.33271028037383177
},
"sl": {
"accuracy": 0.3973930152484014,
"accuracy_stderr": 0.018751618725414546,
"f1": 0.392100156522409,
"f1_stderr": 0.014838827675853575,
"main_score": 0.3973930152484014
},
"sq": {
"accuracy": 0.4194786030496803,
"accuracy_stderr": 0.02354660171070067,
"f1": 0.4169388707660075,
"f1_stderr": 0.017217415251225807,
"main_score": 0.4194786030496803
},
"sv": {
"accuracy": 0.42543039842597147,
"accuracy_stderr": 0.021562756889860635,
"f1": 0.4177387745760652,
"f1_stderr": 0.019505579966050932,
"main_score": 0.42543039842597147
},
"sw": {
"accuracy": 0.4136251844564683,
"accuracy_stderr": 0.012985083231251377,
"f1": 0.40084113718397746,
"f1_stderr": 0.00942359858438975,
"main_score": 0.4136251844564683
},
"ta": {
"accuracy": 0.2627643876045253,
"accuracy_stderr": 0.014139930174690075,
"f1": 0.25844837599995923,
"f1_stderr": 0.012773160336082992,
"main_score": 0.2627643876045253
},
"te": {
"accuracy": 0.2584358091490408,
"accuracy_stderr": 0.016944240742103198,
"f1": 0.2514435908623849,
"f1_stderr": 0.012722578409134445,
"main_score": 0.2584358091490408
},
"th": {
"accuracy": 0.35833743236596166,
"accuracy_stderr": 0.013268562919427823,
"f1": 0.3419105153327198,
"f1_stderr": 0.01289016362222908,
"main_score": 0.35833743236596166
},
"tl": {
"accuracy": 0.4170191834727004,
"accuracy_stderr": 0.018806183012845677,
"f1": 0.409812860893571,
"f1_stderr": 0.014175082125879828,
"main_score": 0.4170191834727004
},
"tr": {
"accuracy": 0.3648303000491884,
"accuracy_stderr": 0.024139475649706935,
"f1": 0.3753352910025087,
"f1_stderr": 0.020068175204176857,
"main_score": 0.3648303000491884
},
"ur": {
"accuracy": 0.27668470241023124,
"accuracy_stderr": 0.016621382716288085,
"f1": 0.2703604403224913,
"f1_stderr": 0.018845793770781712,
"main_score": 0.27668470241023124
},
"vi": {
"accuracy": 0.3775700934579439,
"accuracy_stderr": 0.017434075812833565,
"f1": 0.37914946223283774,
"f1_stderr": 0.016214172379444072,
"main_score": 0.3775700934579439
},
"zh-CN": {
"accuracy": 0.48888342351205105,
"accuracy_stderr": 0.014925171646575468,
"f1": 0.4748460692222126,
"f1_stderr": 0.017161662351747747,
"main_score": 0.48888342351205105
},
"zh-TW": {
"accuracy": 0.43866207575012295,
"accuracy_stderr": 0.02551147284957334,
"f1": 0.4429342869790666,
"f1_stderr": 0.020015637537278582,
"main_score": 0.43866207575012295
}
},
"dataset_version": null,
"mteb_version": "0.0.2"
}