PyTorch
English
Tevatron
phi3_v
vidore
custom_code
dse-phi3-docmatix-v1 / results.json
MrLight's picture
Rename Tevatron-DSE-Phi3-Docmatix-V1(ZeroShot)_metrics.json to results.json (#1)
b21002e verified
raw
history blame
16.5 kB
{"vidore/arxivqa_test_subsampled": {"ndcg_at_1": 0.596, "ndcg_at_3": 0.66243, "ndcg_at_5": 0.67792, "ndcg_at_10": 0.69366, "ndcg_at_20": 0.70384, "ndcg_at_100": 0.72371, "ndcg_at_1000": 0.73073, "map_at_1": 0.596, "map_at_3": 0.646, "map_at_5": 0.6545, "map_at_10": 0.66113, "map_at_20": 0.66396, "map_at_100": 0.66653, "map_at_1000": 0.6668, "recall_at_1": 0.596, "recall_at_3": 0.71, "recall_at_5": 0.748, "recall_at_10": 0.796, "recall_at_20": 0.836, "recall_at_100": 0.946, "recall_at_1000": 1.0, "precision_at_1": 0.596, "precision_at_3": 0.23667, "precision_at_5": 0.1496, "precision_at_10": 0.0796, "precision_at_20": 0.0418, "precision_at_100": 0.00946, "precision_at_1000": 0.001, "mrr_at_1": 0.596, "mrr_at_3": 0.6450000000000001, "mrr_at_5": 0.6536000000000001, "mrr_at_10": 0.6598706349206349, "mrr_at_20": 0.6629809069585384, "mrr_at_100": 0.6654406578751838, "mrr_at_1000": 0.6657332561707343, "naucs_at_1_max": 0.7246937364449195, "naucs_at_1_std": -0.017820313561876112, "naucs_at_1_diff1": 0.8557946305631229, "naucs_at_3_max": 0.6708040694549973, "naucs_at_3_std": -0.033660100439189634, "naucs_at_3_diff1": 0.7817912273224249, "naucs_at_5_max": 0.6890550781542456, "naucs_at_5_std": 0.014004542013625434, "naucs_at_5_diff1": 0.7861884519719491, "naucs_at_10_max": 0.6785984829852334, "naucs_at_10_std": 0.04659261955591343, "naucs_at_10_diff1": 0.7349848003384261, "naucs_at_20_max": 0.6409373425163786, "naucs_at_20_std": -0.00043195507667230873, "naucs_at_20_diff1": 0.7472222888819562, "naucs_at_100_max": 0.6469032057267319, "naucs_at_100_std": 0.2275651001141189, "naucs_at_100_diff1": 0.7340837569595693, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "vidore/docvqa_test_subsampled": {"ndcg_at_1": 0.32151, "ndcg_at_3": 0.37398, "ndcg_at_5": 0.39125, "ndcg_at_10": 0.42574, "ndcg_at_20": 0.44475, "ndcg_at_100": 0.47117, "ndcg_at_1000": 0.49931, "map_at_1": 0.32151, "map_at_3": 0.36142, "map_at_5": 0.37095, "map_at_10": 0.38522, "map_at_20": 0.39042, "map_at_100": 0.39383, "map_at_1000": 0.39489, "recall_at_1": 0.32151, "recall_at_3": 0.4102, "recall_at_5": 0.45233, "recall_at_10": 0.55876, "recall_at_20": 0.63415, "recall_at_100": 0.78049, "recall_at_1000": 1.0, "precision_at_1": 0.32151, "precision_at_3": 0.13673, "precision_at_5": 0.09047, "precision_at_10": 0.05588, "precision_at_20": 0.03171, "precision_at_100": 0.0078, "precision_at_1000": 0.001, "mrr_at_1": 0.3237250554323725, "mrr_at_3": 0.36326681448632664, "mrr_at_5": 0.37224685883222475, "mrr_at_10": 0.3867155879351, "mrr_at_20": 0.3917151389871112, "mrr_at_100": 0.39512710530526335, "mrr_at_1000": 0.39618168336845516, "naucs_at_1_max": 0.6256785840841385, "naucs_at_1_std": -0.03730509812435271, "naucs_at_1_diff1": 0.674216337409315, "naucs_at_3_max": 0.6511075508786901, "naucs_at_3_std": 0.04122671844455631, "naucs_at_3_diff1": 0.5984312115373969, "naucs_at_5_max": 0.6152561194605339, "naucs_at_5_std": 0.022961636177233475, "naucs_at_5_diff1": 0.5516923967880357, "naucs_at_10_max": 0.5333660881205546, "naucs_at_10_std": 0.011802201264660667, "naucs_at_10_diff1": 0.4507880429065512, "naucs_at_20_max": 0.5237151537088786, "naucs_at_20_std": 0.0427430228586714, "naucs_at_20_diff1": 0.41580033541654005, "naucs_at_100_max": 0.5429221150726199, "naucs_at_100_std": 0.17744076480594545, "naucs_at_100_diff1": 0.3453165107281001, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "vidore/infovqa_test_subsampled": {"ndcg_at_1": 0.72672, "ndcg_at_3": 0.78506, "ndcg_at_5": 0.799, "ndcg_at_10": 0.81584, "ndcg_at_20": 0.82187, "ndcg_at_100": 0.82913, "ndcg_at_1000": 0.83241, "map_at_1": 0.72672, "map_at_3": 0.77092, "map_at_5": 0.77851, "map_at_10": 0.78575, "map_at_20": 0.78734, "map_at_100": 0.78825, "map_at_1000": 0.7884, "recall_at_1": 0.72672, "recall_at_3": 0.82591, "recall_at_5": 0.86032, "recall_at_10": 0.91093, "recall_at_20": 0.93522, "recall_at_100": 0.97571, "recall_at_1000": 1.0, "precision_at_1": 0.72672, "precision_at_3": 0.2753, "precision_at_5": 0.17206, "precision_at_10": 0.09109, "precision_at_20": 0.04676, "precision_at_100": 0.00976, "precision_at_1000": 0.001, "mrr_at_1": 0.7267206477732794, "mrr_at_3": 0.7705802968960863, "mrr_at_5": 0.7790823211875841, "mrr_at_10": 0.7856114645588328, "mrr_at_20": 0.7871923873632456, "mrr_at_100": 0.7881034229308816, "mrr_at_1000": 0.788251594053257, "naucs_at_1_max": 0.47221709620160274, "naucs_at_1_std": -0.11250120422293125, "naucs_at_1_diff1": 0.7954957576839476, "naucs_at_3_max": 0.5824486017719426, "naucs_at_3_std": 0.036334804793392704, "naucs_at_3_diff1": 0.7435312107663508, "naucs_at_5_max": 0.5554459778521117, "naucs_at_5_std": 0.024058370809194176, "naucs_at_5_diff1": 0.7242014023783527, "naucs_at_10_max": 0.7404933356921699, "naucs_at_10_std": 0.275870180087623, "naucs_at_10_diff1": 0.7452029588069703, "naucs_at_20_max": 0.7501725856961272, "naucs_at_20_std": 0.3580509440415641, "naucs_at_20_diff1": 0.7443274144873248, "naucs_at_100_max": 0.810938425147745, "naucs_at_100_std": 0.466711749157426, "naucs_at_100_diff1": 0.6806539863630526, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "vidore/tabfquad_test_subsampled": {"ndcg_at_1": 0.71429, "ndcg_at_3": 0.78444, "ndcg_at_5": 0.80534, "ndcg_at_10": 0.81719, "ndcg_at_20": 0.82173, "ndcg_at_100": 0.83456, "ndcg_at_1000": 0.83456, "map_at_1": 0.71429, "map_at_3": 0.76786, "map_at_5": 0.77964, "map_at_10": 0.78471, "map_at_20": 0.78597, "map_at_100": 0.78807, "map_at_1000": 0.78807, "recall_at_1": 0.71429, "recall_at_3": 0.83214, "recall_at_5": 0.88214, "recall_at_10": 0.91786, "recall_at_20": 0.93571, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.71429, "precision_at_3": 0.27738, "precision_at_5": 0.17643, "precision_at_10": 0.09179, "precision_at_20": 0.04679, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.7071428571428572, "mrr_at_3": 0.7630952380952382, "mrr_at_5": 0.777202380952381, "mrr_at_10": 0.7805994897959184, "mrr_at_20": 0.7823797928262214, "mrr_at_100": 0.7843190954564946, "mrr_at_1000": 0.7843190954564946, "naucs_at_1_max": 0.7238742964352718, "naucs_at_1_std": 0.39114178504422414, "naucs_at_1_diff1": 0.8626172607879926, "naucs_at_3_max": 0.7673721081443874, "naucs_at_3_std": 0.4297879480672015, "naucs_at_3_diff1": 0.8080416133565446, "naucs_at_5_max": 0.7627514938082737, "naucs_at_5_std": 0.42317351268654524, "naucs_at_5_diff1": 0.8074647114857261, "naucs_at_10_max": 0.7285551901920189, "naucs_at_10_std": 0.5494052693541182, "naucs_at_10_diff1": 0.8134616165306697, "naucs_at_20_max": 0.7943510737628384, "naucs_at_20_std": 0.5192706712314574, "naucs_at_20_diff1": 0.8638344226579525, "naucs_at_100_max": 1.0, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": 1.0, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/tatdqa_test": {"ndcg_at_1": 0.31592, "ndcg_at_3": 0.41472, "ndcg_at_5": 0.44938, "ndcg_at_10": 0.48243, "ndcg_at_20": 0.50469, "ndcg_at_100": 0.53675, "ndcg_at_1000": 0.54567, "map_at_1": 0.31592, "map_at_3": 0.38994, "map_at_5": 0.4091, "map_at_10": 0.42277, "map_at_20": 0.42903, "map_at_100": 0.43325, "map_at_1000": 0.43371, "recall_at_1": 0.31592, "recall_at_3": 0.48663, "recall_at_5": 0.57108, "recall_at_10": 0.67315, "recall_at_20": 0.76002, "recall_at_100": 0.93621, "recall_at_1000": 1.0, "precision_at_1": 0.31592, "precision_at_3": 0.16221, "precision_at_5": 0.11422, "precision_at_10": 0.06731, "precision_at_20": 0.038, "precision_at_100": 0.00936, "precision_at_1000": 0.001, "mrr_at_1": 0.31044957472660994, "mrr_at_3": 0.3864925070878902, "mrr_at_5": 0.4059943296881336, "mrr_at_10": 0.41956546895793634, "mrr_at_20": 0.4260674130893106, "mrr_at_100": 0.4301655843391273, "mrr_at_1000": 0.4306392409533613, "naucs_at_1_max": 0.2416151418019763, "naucs_at_1_std": -0.07037021373258057, "naucs_at_1_diff1": 0.5866693865534238, "naucs_at_3_max": 0.23762632124514996, "naucs_at_3_std": -0.03389778406712713, "naucs_at_3_diff1": 0.4642014043376365, "naucs_at_5_max": 0.25382411919899783, "naucs_at_5_std": -0.02506506030747841, "naucs_at_5_diff1": 0.46315795574545704, "naucs_at_10_max": 0.27167605810586737, "naucs_at_10_std": 0.006659991744018172, "naucs_at_10_diff1": 0.44121843963199747, "naucs_at_20_max": 0.26668806904438636, "naucs_at_20_std": -0.0005063482380820688, "naucs_at_20_diff1": 0.4427114530463436, "naucs_at_100_max": 0.3584412512381841, "naucs_at_100_std": 0.09348770451691558, "naucs_at_100_diff1": 0.4635313221579285, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/shiftproject_test": {"ndcg_at_1": 0.41, "ndcg_at_3": 0.58226, "ndcg_at_5": 0.61153, "ndcg_at_10": 0.64147, "ndcg_at_20": 0.6569, "ndcg_at_100": 0.66812, "ndcg_at_1000": 0.66933, "map_at_1": 0.41, "map_at_3": 0.53833, "map_at_5": 0.55483, "map_at_10": 0.56768, "map_at_20": 0.57206, "map_at_100": 0.57363, "map_at_1000": 0.57367, "recall_at_1": 0.41, "recall_at_3": 0.71, "recall_at_5": 0.78, "recall_at_10": 0.87, "recall_at_20": 0.93, "recall_at_100": 0.99, "recall_at_1000": 1.0, "precision_at_1": 0.41, "precision_at_3": 0.23667, "precision_at_5": 0.156, "precision_at_10": 0.087, "precision_at_20": 0.0465, "precision_at_100": 0.0099, "precision_at_1000": 0.001, "mrr_at_1": 0.42, "mrr_at_3": 0.5516666666666667, "mrr_at_5": 0.5631666666666668, "mrr_at_10": 0.5782619047619049, "mrr_at_20": 0.5809463402094983, "mrr_at_100": 0.5825439643036239, "mrr_at_1000": 0.5825765375935262, "naucs_at_1_max": 0.10389191004559734, "naucs_at_1_std": -0.16301329165428774, "naucs_at_1_diff1": 0.5778188253226034, "naucs_at_3_max": 0.33660100439189855, "naucs_at_3_std": 0.03793339881029641, "naucs_at_3_diff1": 0.4029983507217912, "naucs_at_5_max": 0.415936384290815, "naucs_at_5_std": 0.01599666156628259, "naucs_at_5_diff1": 0.43190986228960887, "naucs_at_10_max": 0.5307339109827454, "naucs_at_10_std": 0.23928015996445295, "naucs_at_10_diff1": 0.4889654150929423, "naucs_at_20_max": 0.32186207816459933, "naucs_at_20_std": -0.1321195144724533, "naucs_at_20_diff1": 0.621915432839802, "naucs_at_100_max": 0.35807656395892007, "naucs_at_100_std": 0.35807656395892007, "naucs_at_100_diff1": 0.7222222222222041, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/syntheticDocQA_artificial_intelligence_test": {"ndcg_at_1": 0.91, "ndcg_at_3": 0.95286, "ndcg_at_5": 0.96059, "ndcg_at_10": 0.96059, "ndcg_at_20": 0.96059, "ndcg_at_100": 0.96059, "ndcg_at_1000": 0.96059, "map_at_1": 0.91, "map_at_3": 0.94333, "map_at_5": 0.94733, "map_at_10": 0.94733, "map_at_20": 0.94733, "map_at_100": 0.94733, "map_at_1000": 0.94733, "recall_at_1": 0.91, "recall_at_3": 0.98, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.91, "precision_at_3": 0.32667, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.92, "mrr_at_3": 0.9483333333333335, "mrr_at_5": 0.9523333333333334, "mrr_at_10": 0.9523333333333334, "mrr_at_20": 0.9523333333333334, "mrr_at_100": 0.9523333333333334, "mrr_at_1000": 0.9523333333333334, "naucs_at_1_max": 0.6255835667600368, "naucs_at_1_std": -0.007677144932046806, "naucs_at_1_diff1": 0.8524224504616646, "naucs_at_3_max": 0.7770774976657261, "naucs_at_3_std": -0.07586367880486825, "naucs_at_3_diff1": 0.8611111111111119, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/syntheticDocQA_energy_test": {"ndcg_at_1": 0.82, "ndcg_at_3": 0.86917, "ndcg_at_5": 0.89026, "ndcg_at_10": 0.90572, "ndcg_at_20": 0.90572, "ndcg_at_100": 0.90572, "ndcg_at_1000": 0.90572, "map_at_1": 0.82, "map_at_3": 0.85833, "map_at_5": 0.87033, "map_at_10": 0.8763, "map_at_20": 0.8763, "map_at_100": 0.8763, "map_at_1000": 0.8763, "recall_at_1": 0.82, "recall_at_3": 0.9, "recall_at_5": 0.95, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.82, "precision_at_3": 0.3, "precision_at_5": 0.19, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.83, "mrr_at_3": 0.8633333333333334, "mrr_at_5": 0.8753333333333333, "mrr_at_10": 0.8816507936507937, "mrr_at_20": 0.8816507936507937, "mrr_at_100": 0.8816507936507937, "mrr_at_1000": 0.8816507936507937, "naucs_at_1_max": 0.40053970701619107, "naucs_at_1_std": -0.2971968278444761, "naucs_at_1_diff1": 0.9158772992620331, "naucs_at_3_max": 0.5719421101774032, "naucs_at_3_std": -0.3404761904761918, "naucs_at_3_diff1": 0.8459383753501405, "naucs_at_5_max": 0.8291316526610654, "naucs_at_5_std": -0.37917833800185535, "naucs_at_5_diff1": 0.8029878618113909, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/syntheticDocQA_government_reports_test": {"ndcg_at_1": 0.79, "ndcg_at_3": 0.88178, "ndcg_at_5": 0.8904, "ndcg_at_10": 0.89396, "ndcg_at_20": 0.89659, "ndcg_at_100": 0.89837, "ndcg_at_1000": 0.89837, "map_at_1": 0.79, "map_at_3": 0.85833, "map_at_5": 0.86333, "map_at_10": 0.865, "map_at_20": 0.86577, "map_at_100": 0.86598, "map_at_1000": 0.86598, "recall_at_1": 0.79, "recall_at_3": 0.95, "recall_at_5": 0.97, "recall_at_10": 0.98, "recall_at_20": 0.99, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.79, "precision_at_3": 0.31667, "precision_at_5": 0.194, "precision_at_10": 0.098, "precision_at_20": 0.0495, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.8, "mrr_at_3": 0.8633333333333333, "mrr_at_5": 0.8683333333333333, "mrr_at_10": 0.87, "mrr_at_20": 0.870909090909091, "mrr_at_100": 0.8711264822134388, "mrr_at_1000": 0.8711264822134388, "naucs_at_1_max": 0.3062495491379518, "naucs_at_1_std": -0.24070021881838138, "naucs_at_1_diff1": 0.8087623536201219, "naucs_at_3_max": 0.6540616246498568, "naucs_at_3_std": -0.46423902894490915, "naucs_at_3_diff1": 0.7428571428571419, "naucs_at_5_max": 0.6374105197634568, "naucs_at_5_std": -0.4344849050731363, "naucs_at_5_diff1": 0.6640211640211615, "naucs_at_10_max": 0.6790382819794609, "naucs_at_10_std": 0.21825396825397442, "naucs_at_10_diff1": 0.4960317460317504, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 0.8692810457516413, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/syntheticDocQA_healthcare_industry_test": {"ndcg_at_1": 0.87, "ndcg_at_3": 0.92786, "ndcg_at_5": 0.93603, "ndcg_at_10": 0.93959, "ndcg_at_20": 0.93959, "ndcg_at_100": 0.93959, "ndcg_at_1000": 0.93959, "map_at_1": 0.87, "map_at_3": 0.91333, "map_at_5": 0.91783, "map_at_10": 0.9195, "map_at_20": 0.9195, "map_at_100": 0.9195, "map_at_1000": 0.9195, "recall_at_1": 0.87, "recall_at_3": 0.97, "recall_at_5": 0.99, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.87, "precision_at_3": 0.32333, "precision_at_5": 0.198, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.87, "mrr_at_3": 0.9133333333333333, "mrr_at_5": 0.9178333333333333, "mrr_at_10": 0.9194999999999999, "mrr_at_20": 0.9194999999999999, "mrr_at_100": 0.9194999999999999, "mrr_at_1000": 0.9194999999999999, "naucs_at_1_max": 0.4242760867955278, "naucs_at_1_std": -0.4802266163074866, "naucs_at_1_diff1": 0.9115011478930612, "naucs_at_3_max": 0.1956115779645167, "naucs_at_3_std": -1.0210084033613431, "naucs_at_3_diff1": 0.9128540305010931, "naucs_at_5_max": 0.8692810457516413, "naucs_at_5_std": -1.1517273576097316, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}}