diff --git a/eval/beir.json b/evaluation/beir/beir.json similarity index 100% rename from eval/beir.json rename to evaluation/beir/beir.json diff --git a/evaluation/mteb/AmazonCounterfactualClassification.json b/evaluation/mteb/AmazonCounterfactualClassification.json new file mode 100644 index 0000000000000000000000000000000000000000..cfe3bceb42b94028d89657e4a3192b4578727d09 --- /dev/null +++ b/evaluation/mteb/AmazonCounterfactualClassification.json @@ -0,0 +1,16 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "en": { + "accuracy": 0.6756716417910448, + "accuracy_stderr": 0.035149491395717075, + "ap": 0.3075574629595259, + "ap_stderr": 0.022378514563472136, + "f1": 0.6180512130185866, + "f1_stderr": 0.027752667739340424, + "main_score": 0.6756716417910448 + }, + "evaluation_time": 33.75 + } +} \ No newline at end of file diff --git a/evaluation/mteb/AmazonPolarityClassification.json b/evaluation/mteb/AmazonPolarityClassification.json new file mode 100644 index 0000000000000000000000000000000000000000..e2585739bf2f82a9c0b25f03af8eaac12f8fed1c --- /dev/null +++ b/evaluation/mteb/AmazonPolarityClassification.json @@ -0,0 +1,14 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "accuracy": 0.71439575, + "accuracy_stderr": 0.04118535930779893, + "ap": 0.6591341330532453, + "ap_stderr": 0.039567766080541335, + "evaluation_time": 15193.17, + "f1": 0.7090561852619555, + "f1_stderr": 0.04610604328985526, + "main_score": 0.71439575 + } +} \ No newline at end of file diff --git a/evaluation/mteb/AmazonReviewsClassification.json b/evaluation/mteb/AmazonReviewsClassification.json new file mode 100644 index 0000000000000000000000000000000000000000..85bd14faa33e4c7fe1ce1f1e38dd78a0b3abf43a --- /dev/null +++ b/evaluation/mteb/AmazonReviewsClassification.json @@ -0,0 +1,14 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "en": { + "accuracy": 0.35748, + "accuracy_stderr": 0.017313624692709504, + "f1": 0.35485762871863474, + "f1_stderr": 0.016676501915976406, + "main_score": 0.35748 + }, + "evaluation_time": 125.24 + } +} \ No newline at end of file diff --git a/evaluation/mteb/ArguAna.json b/evaluation/mteb/ArguAna.json new file mode 100644 index 0000000000000000000000000000000000000000..a2824a183e892b9b8cf72524fd196fb10a627717 --- /dev/null +++ b/evaluation/mteb/ArguAna.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 254.16, + "map_at_1": 0.2596, + "map_at_10": 0.41619, + "map_at_100": 0.42673, + "map_at_1000": 0.42684, + "map_at_3": 0.36569, + "map_at_5": 0.39397, + "mrr_at_1": 0.26316, + "mrr_at_10": 0.41772, + "mrr_at_100": 0.4282, + "mrr_at_1000": 0.4283, + "mrr_at_3": 0.36724, + "mrr_at_5": 0.39529, + "ndcg_at_1": 0.2596, + "ndcg_at_10": 0.50491, + "ndcg_at_100": 0.54865, + "ndcg_at_1000": 0.55107, + "ndcg_at_3": 0.40053, + "ndcg_at_5": 0.45134, + "precision_at_1": 0.2596, + "precision_at_10": 0.07895, + "precision_at_100": 0.00978, + "precision_at_1000": 0.001, + "precision_at_3": 0.16714, + "precision_at_5": 0.12489, + "recall_at_1": 0.2596, + "recall_at_10": 0.78947, + "recall_at_100": 0.97795, + "recall_at_1000": 0.99644, + "recall_at_3": 0.50142, + "recall_at_5": 0.62447 + } +} \ No newline at end of file diff --git a/evaluation/mteb/ArxivClusteringP2P.json b/evaluation/mteb/ArxivClusteringP2P.json new file mode 100644 index 0000000000000000000000000000000000000000..cd096990cef161aa17054df8cabf9becce77e38a --- /dev/null +++ b/evaluation/mteb/ArxivClusteringP2P.json @@ -0,0 +1,9 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 57365.88, + "v_measure": 0.44721257146422017, + "v_measure_std": 0.135218958073186 + } +} \ No newline at end of file diff --git a/evaluation/mteb/ArxivClusteringS2S.json b/evaluation/mteb/ArxivClusteringS2S.json new file mode 100644 index 0000000000000000000000000000000000000000..51f764eeea3ef58e6a7d73c62d0eb28deb0d81df --- /dev/null +++ b/evaluation/mteb/ArxivClusteringS2S.json @@ -0,0 +1,9 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 6983.79, + "v_measure": 0.35081451519142065, + "v_measure_std": 0.14064701566219873 + } +} \ No newline at end of file diff --git a/evaluation/mteb/AskUbuntuDupQuestions.json b/evaluation/mteb/AskUbuntuDupQuestions.json new file mode 100644 index 0000000000000000000000000000000000000000..990194bc55610a6fdf64020971add9ade47a9ff7 --- /dev/null +++ b/evaluation/mteb/AskUbuntuDupQuestions.json @@ -0,0 +1,9 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 56.03, + "map": 0.5963466199039206, + "mrr": 0.736813525040672 + } +} \ No newline at end of file diff --git a/evaluation/mteb/BIOSSES.json b/evaluation/mteb/BIOSSES.json new file mode 100644 index 0000000000000000000000000000000000000000..c2ba982a611fe6612c0bb04889e0b5b951d56751 --- /dev/null +++ b/evaluation/mteb/BIOSSES.json @@ -0,0 +1,19 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "cos_sim": { + "pearson": 0.8742754550496836, + "spearman": 0.8484289705838665 + }, + "euclidean": { + "pearson": 0.8559331970450859, + "spearman": 0.858525586184271 + }, + "evaluation_time": 6.0, + "manhattan": { + "pearson": 0.8541233134466698, + "spearman": 0.8552303303767403 + } + } +} \ No newline at end of file diff --git a/evaluation/mteb/Banking77Classification.json b/evaluation/mteb/Banking77Classification.json new file mode 100644 index 0000000000000000000000000000000000000000..811dec4111953f3bffdcf592060ab43091f07e99 --- /dev/null +++ b/evaluation/mteb/Banking77Classification.json @@ -0,0 +1,12 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "accuracy": 0.8321753246753246, + "accuracy_stderr": 0.008047037149251285, + "evaluation_time": 109.45, + "f1": 0.8315394543120915, + "f1_stderr": 0.008443343664861741, + "main_score": 0.8321753246753246 + } +} \ No newline at end of file diff --git a/evaluation/mteb/BiorxivClusteringP2P.json b/evaluation/mteb/BiorxivClusteringP2P.json new file mode 100644 index 0000000000000000000000000000000000000000..245e90844924cfccc6d11149e0ca2e988ebbaf06 --- /dev/null +++ b/evaluation/mteb/BiorxivClusteringP2P.json @@ -0,0 +1,9 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 6657.01, + "v_measure": 0.3441414219680629, + "v_measure_std": 0.010243021348181702 + } +} \ No newline at end of file diff --git a/evaluation/mteb/BiorxivClusteringS2S.json b/evaluation/mteb/BiorxivClusteringS2S.json new file mode 100644 index 0000000000000000000000000000000000000000..7b3e91f99d13cc4b440e1eded72001e72312cece --- /dev/null +++ b/evaluation/mteb/BiorxivClusteringS2S.json @@ -0,0 +1,9 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 843.62, + "v_measure": 0.30533275862270026, + "v_measure_std": 0.011135486000086606 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackAndroidRetrieval.json b/evaluation/mteb/CQADupstackAndroidRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..2776b086f94f1c16f0b63060102a8eb69a0f3715 --- /dev/null +++ b/evaluation/mteb/CQADupstackAndroidRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 1118.91, + "map_at_1": 0.30809, + "map_at_10": 0.40617, + "map_at_100": 0.41895, + "map_at_1000": 0.42025, + "map_at_3": 0.37, + "map_at_5": 0.38993, + "mrr_at_1": 0.37482, + "mrr_at_10": 0.46497, + "mrr_at_100": 0.47144, + "mrr_at_1000": 0.47189, + "mrr_at_3": 0.43705, + "mrr_at_5": 0.45193, + "ndcg_at_1": 0.37482, + "ndcg_at_10": 0.46688, + "ndcg_at_100": 0.51726, + "ndcg_at_1000": 0.53825, + "ndcg_at_3": 0.41242, + "ndcg_at_5": 0.43657, + "precision_at_1": 0.37482, + "precision_at_10": 0.08827, + "precision_at_100": 0.01393, + "precision_at_1000": 0.00186, + "precision_at_3": 0.19361, + "precision_at_5": 0.14106, + "recall_at_1": 0.30809, + "recall_at_10": 0.5847, + "recall_at_100": 0.80519, + "recall_at_1000": 0.93809, + "recall_at_3": 0.42462, + "recall_at_5": 0.49385 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackEnglishRetrieval.json b/evaluation/mteb/CQADupstackEnglishRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..c2d40e2a879489f8bfc00ec126080a6a8fbbae19 --- /dev/null +++ b/evaluation/mteb/CQADupstackEnglishRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 1732.23, + "map_at_1": 0.26962, + "map_at_10": 0.3693, + "map_at_100": 0.38102, + "map_at_1000": 0.3822, + "map_at_3": 0.34065, + "map_at_5": 0.3572, + "mrr_at_1": 0.33567, + "mrr_at_10": 0.42269, + "mrr_at_100": 0.4299, + "mrr_at_1000": 0.43033, + "mrr_at_3": 0.40064, + "mrr_at_5": 0.41258, + "ndcg_at_1": 0.33567, + "ndcg_at_10": 0.42405, + "ndcg_at_100": 0.46847, + "ndcg_at_1000": 0.48951, + "ndcg_at_3": 0.38312, + "ndcg_at_5": 0.40242, + "precision_at_1": 0.33567, + "precision_at_10": 0.08032, + "precision_at_100": 0.01295, + "precision_at_1000": 0.00176, + "precision_at_3": 0.18662, + "precision_at_5": 0.13299, + "recall_at_1": 0.26962, + "recall_at_10": 0.52489, + "recall_at_100": 0.71635, + "recall_at_1000": 0.85141, + "recall_at_3": 0.4028, + "recall_at_5": 0.45757 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackGamingRetrieval.json b/evaluation/mteb/CQADupstackGamingRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..d96866446a835a71ef084e5f78a2c39b37d11b9d --- /dev/null +++ b/evaluation/mteb/CQADupstackGamingRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 1830.25, + "map_at_1": 0.36318, + "map_at_10": 0.4797, + "map_at_100": 0.49003, + "map_at_1000": 0.49066, + "map_at_3": 0.45031, + "map_at_5": 0.46633, + "mrr_at_1": 0.41505, + "mrr_at_10": 0.51431, + "mrr_at_100": 0.52129, + "mrr_at_1000": 0.52161, + "mrr_at_3": 0.48934, + "mrr_at_5": 0.5042, + "ndcg_at_1": 0.41505, + "ndcg_at_10": 0.53676, + "ndcg_at_100": 0.57867, + "ndcg_at_1000": 0.59166, + "ndcg_at_3": 0.48516, + "ndcg_at_5": 0.50984, + "precision_at_1": 0.41505, + "precision_at_10": 0.08608, + "precision_at_100": 0.01156, + "precision_at_1000": 0.00133, + "precision_at_3": 0.21463, + "precision_at_5": 0.14721, + "recall_at_1": 0.36318, + "recall_at_10": 0.67066, + "recall_at_100": 0.8534, + "recall_at_1000": 0.94491, + "recall_at_3": 0.53216, + "recall_at_5": 0.59214 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackGisRetrieval.json b/evaluation/mteb/CQADupstackGisRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..5fc0bae1c8d92c2788476f6dbaecfb52a126d295 --- /dev/null +++ b/evaluation/mteb/CQADupstackGisRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 2522.93, + "map_at_1": 0.22167, + "map_at_10": 0.29544, + "map_at_100": 0.30579, + "map_at_1000": 0.3067, + "map_at_3": 0.26982, + "map_at_5": 0.28474, + "mrr_at_1": 0.24068, + "mrr_at_10": 0.31237, + "mrr_at_100": 0.32222, + "mrr_at_1000": 0.32292, + "mrr_at_3": 0.28776, + "mrr_at_5": 0.30234, + "ndcg_at_1": 0.24068, + "ndcg_at_10": 0.33973, + "ndcg_at_100": 0.39135, + "ndcg_at_1000": 0.41444, + "ndcg_at_3": 0.29018, + "ndcg_at_5": 0.31559, + "precision_at_1": 0.24068, + "precision_at_10": 0.05299, + "precision_at_100": 0.00823, + "precision_at_1000": 0.00106, + "precision_at_3": 0.12166, + "precision_at_5": 0.08768, + "recall_at_1": 0.22167, + "recall_at_10": 0.46115, + "recall_at_100": 0.69867, + "recall_at_1000": 0.87234, + "recall_at_3": 0.32798, + "recall_at_5": 0.38951 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackMathematicaRetrieval.json b/evaluation/mteb/CQADupstackMathematicaRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..38513be94b928105034babb7ec300576c0459e3a --- /dev/null +++ b/evaluation/mteb/CQADupstackMathematicaRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 1350.85, + "map_at_1": 0.12033, + "map_at_10": 0.19314, + "map_at_100": 0.20562, + "map_at_1000": 0.20695, + "map_at_3": 0.16946, + "map_at_5": 0.18077, + "mrr_at_1": 0.14801, + "mrr_at_10": 0.2274, + "mrr_at_100": 0.23876, + "mrr_at_1000": 0.23949, + "mrr_at_3": 0.20211, + "mrr_at_5": 0.21573, + "ndcg_at_1": 0.14801, + "ndcg_at_10": 0.24038, + "ndcg_at_100": 0.30186, + "ndcg_at_1000": 0.33321, + "ndcg_at_3": 0.19431, + "ndcg_at_5": 0.2134, + "precision_at_1": 0.14801, + "precision_at_10": 0.04776, + "precision_at_100": 0.00897, + "precision_at_1000": 0.00133, + "precision_at_3": 0.0966, + "precision_at_5": 0.07239, + "recall_at_1": 0.12033, + "recall_at_10": 0.35098, + "recall_at_100": 0.62175, + "recall_at_1000": 0.84171, + "recall_at_3": 0.2261, + "recall_at_5": 0.27279 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackPhysicsRetrieval.json b/evaluation/mteb/CQADupstackPhysicsRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..c42036870fecbcdf9d6b4a5366b8e56e54c684b0 --- /dev/null +++ b/evaluation/mteb/CQADupstackPhysicsRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 2467.52, + "map_at_1": 0.26651, + "map_at_10": 0.36901, + "map_at_100": 0.38249, + "map_at_1000": 0.38361, + "map_at_3": 0.33891, + "map_at_5": 0.35439, + "mrr_at_1": 0.32724, + "mrr_at_10": 0.42504, + "mrr_at_100": 0.43392, + "mrr_at_1000": 0.43436, + "mrr_at_3": 0.3999, + "mrr_at_5": 0.41347, + "ndcg_at_1": 0.32724, + "ndcg_at_10": 0.43007, + "ndcg_at_100": 0.48601, + "ndcg_at_1000": 0.50697, + "ndcg_at_3": 0.3799, + "ndcg_at_5": 0.40084, + "precision_at_1": 0.32724, + "precision_at_10": 0.07873, + "precision_at_100": 0.01247, + "precision_at_1000": 0.00162, + "precision_at_3": 0.18062, + "precision_at_5": 0.12666, + "recall_at_1": 0.26651, + "recall_at_10": 0.55674, + "recall_at_100": 0.78904, + "recall_at_1000": 0.92558, + "recall_at_3": 0.4136, + "recall_at_5": 0.46984 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackProgrammersRetrieval.json b/evaluation/mteb/CQADupstackProgrammersRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..1828ced17a8904acbe165604fc5d303e15a947b7 --- /dev/null +++ b/evaluation/mteb/CQADupstackProgrammersRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 2301.71, + "map_at_1": 0.22589, + "map_at_10": 0.32244, + "map_at_100": 0.3346, + "map_at_1000": 0.33593, + "map_at_3": 0.2921, + "map_at_5": 0.3102, + "mrr_at_1": 0.28425, + "mrr_at_10": 0.37282, + "mrr_at_100": 0.38187, + "mrr_at_1000": 0.38248, + "mrr_at_3": 0.34684, + "mrr_at_5": 0.36123, + "ndcg_at_1": 0.28425, + "ndcg_at_10": 0.37942, + "ndcg_at_100": 0.43443, + "ndcg_at_1000": 0.45996, + "ndcg_at_3": 0.32874, + "ndcg_at_5": 0.35325, + "precision_at_1": 0.28425, + "precision_at_10": 0.071, + "precision_at_100": 0.01166, + "precision_at_1000": 0.00158, + "precision_at_3": 0.1602, + "precision_at_5": 0.11644, + "recall_at_1": 0.22589, + "recall_at_10": 0.5004, + "recall_at_100": 0.73973, + "recall_at_1000": 0.91128, + "recall_at_3": 0.35883, + "recall_at_5": 0.42188 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackRetrieval.json b/evaluation/mteb/CQADupstackRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..ee1d430b6369bb156825d99678ee8cf142955695 --- /dev/null +++ b/evaluation/mteb/CQADupstackRetrieval.json @@ -0,0 +1 @@ +{"dataset_version": null, "mteb_version": "0.0.2", "test": {"evaluation_time": 3522.66, "map_at_1": 0.23190833333333336, "map_at_10": 0.31504916666666666, "map_at_100": 0.32649083333333334, "map_at_1000": 0.3277075, "map_at_3": 0.2882575, "map_at_5": 0.302755, "mrr_at_1": 0.27427499999999994, "mrr_at_10": 0.35364833333333334, "mrr_at_100": 0.36234416666666663, "mrr_at_1000": 0.36297583333333333, "mrr_at_3": 0.3297966666666667, "mrr_at_5": 0.34294583333333334, "ndcg_at_1": 0.27427499999999994, "ndcg_at_10": 0.3653358333333333, "ndcg_at_100": 0.4164508333333333, "ndcg_at_1000": 0.4414499999999999, "ndcg_at_3": 0.3188908333333333, "ndcg_at_5": 0.33984333333333333, "precision_at_1": 0.27427499999999994, "precision_at_10": 0.06481083333333333, "precision_at_100": 0.010610833333333333, "precision_at_1000": 0.0014691666666666666, "precision_at_3": 0.1465675, "precision_at_5": 0.10493583333333333, "recall_at_1": 0.23190833333333336, "recall_at_10": 0.4765175, "recall_at_100": 0.7041016666666666, "recall_at_1000": 0.8782708333333332, "recall_at_3": 0.34637583333333327, "recall_at_5": 0.4005008333333333}} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackStatsRetrieval.json b/evaluation/mteb/CQADupstackStatsRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..1722f3e533e386eb454a9e81a5acaf70718cec59 --- /dev/null +++ b/evaluation/mteb/CQADupstackStatsRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 3154.61, + "map_at_1": 0.20409, + "map_at_10": 0.26794, + "map_at_100": 0.27682, + "map_at_1000": 0.27783, + "map_at_3": 0.24461, + "map_at_5": 0.25668, + "mrr_at_1": 0.22853, + "mrr_at_10": 0.29296, + "mrr_at_100": 0.30103, + "mrr_at_1000": 0.30179, + "mrr_at_3": 0.27173, + "mrr_at_5": 0.28223, + "ndcg_at_1": 0.22853, + "ndcg_at_10": 0.31007, + "ndcg_at_100": 0.35581, + "ndcg_at_1000": 0.38147, + "ndcg_at_3": 0.26591, + "ndcg_at_5": 0.2843, + "precision_at_1": 0.22853, + "precision_at_10": 0.05031, + "precision_at_100": 0.00794, + "precision_at_1000": 0.0011, + "precision_at_3": 0.11401, + "precision_at_5": 0.0816, + "recall_at_1": 0.20409, + "recall_at_10": 0.41766, + "recall_at_100": 0.62964, + "recall_at_1000": 0.81682, + "recall_at_3": 0.29281, + "recall_at_5": 0.3383 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackTexRetrieval.json b/evaluation/mteb/CQADupstackTexRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..f80255a807a204975b41e8572c211cf5ff5f7ff9 --- /dev/null +++ b/evaluation/mteb/CQADupstackTexRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 5341.04, + "map_at_1": 0.14549, + "map_at_10": 0.20315, + "map_at_100": 0.21301, + "map_at_1000": 0.21425, + "map_at_3": 0.18132, + "map_at_5": 0.19429, + "mrr_at_1": 0.1786, + "mrr_at_10": 0.23861, + "mrr_at_100": 0.24737, + "mrr_at_1000": 0.2482, + "mrr_at_3": 0.21685, + "mrr_at_5": 0.23008, + "ndcg_at_1": 0.1786, + "ndcg_at_10": 0.24396, + "ndcg_at_100": 0.29328, + "ndcg_at_1000": 0.32486, + "ndcg_at_3": 0.20375, + "ndcg_at_5": 0.22411, + "precision_at_1": 0.1786, + "precision_at_10": 0.0447, + "precision_at_100": 0.0081, + "precision_at_1000": 0.00125, + "precision_at_3": 0.09475, + "precision_at_5": 0.07171, + "recall_at_1": 0.14549, + "recall_at_10": 0.33365, + "recall_at_100": 0.55797, + "recall_at_1000": 0.78632, + "recall_at_3": 0.22229, + "recall_at_5": 0.27339 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackUnixRetrieval.json b/evaluation/mteb/CQADupstackUnixRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..984e4ef4c277a527d52dbd8cd12bddfd17b4b417 --- /dev/null +++ b/evaluation/mteb/CQADupstackUnixRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 3320.59, + "map_at_1": 0.23286, + "map_at_10": 0.30728, + "map_at_100": 0.3184, + "map_at_1000": 0.31953, + "map_at_3": 0.28302, + "map_at_5": 0.29615, + "mrr_at_1": 0.27239, + "mrr_at_10": 0.34408, + "mrr_at_100": 0.35335, + "mrr_at_1000": 0.35405, + "mrr_at_3": 0.32152, + "mrr_at_5": 0.33355, + "ndcg_at_1": 0.27239, + "ndcg_at_10": 0.35324, + "ndcg_at_100": 0.40866, + "ndcg_at_1000": 0.43584, + "ndcg_at_3": 0.30899, + "ndcg_at_5": 0.32813, + "precision_at_1": 0.27239, + "precision_at_10": 0.05896, + "precision_at_100": 0.00979, + "precision_at_1000": 0.00133, + "precision_at_3": 0.13713, + "precision_at_5": 0.09683, + "recall_at_1": 0.23286, + "recall_at_10": 0.45711, + "recall_at_100": 0.70611, + "recall_at_1000": 0.90029, + "recall_at_3": 0.33615, + "recall_at_5": 0.3841 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackWebmastersRetrieval.json b/evaluation/mteb/CQADupstackWebmastersRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..b9b6b0a53bf48dc0950516674d5b5b56a77f6e1c --- /dev/null +++ b/evaluation/mteb/CQADupstackWebmastersRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 1047.0, + "map_at_1": 0.23962, + "map_at_10": 0.31943, + "map_at_100": 0.33384, + "map_at_1000": 0.33611, + "map_at_3": 0.29243, + "map_at_5": 0.30446, + "mrr_at_1": 0.28458, + "mrr_at_10": 0.36157, + "mrr_at_100": 0.37093, + "mrr_at_1000": 0.37163, + "mrr_at_3": 0.3386, + "mrr_at_5": 0.35086, + "ndcg_at_1": 0.28458, + "ndcg_at_10": 0.37201, + "ndcg_at_100": 0.42591, + "ndcg_at_1000": 0.45539, + "ndcg_at_3": 0.32889, + "ndcg_at_5": 0.34483, + "precision_at_1": 0.28458, + "precision_at_10": 0.07332, + "precision_at_100": 0.01437, + "precision_at_1000": 0.00233, + "precision_at_3": 0.15547, + "precision_at_5": 0.11146, + "recall_at_1": 0.23962, + "recall_at_10": 0.46751, + "recall_at_100": 0.71626, + "recall_at_1000": 0.90939, + "recall_at_3": 0.34138, + "recall_at_5": 0.38673 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackWordpressRetrieval.json b/evaluation/mteb/CQADupstackWordpressRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..38565f86dc448ee71edcde5cdcd81b8e0f3bf3a5 --- /dev/null +++ b/evaluation/mteb/CQADupstackWordpressRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 3522.66, + "map_at_1": 0.18555, + "map_at_10": 0.24759, + "map_at_100": 0.25732, + "map_at_1000": 0.25847, + "map_at_3": 0.22646, + "map_at_5": 0.23792, + "mrr_at_1": 0.20148, + "mrr_at_10": 0.26696, + "mrr_at_100": 0.27605, + "mrr_at_1000": 0.27696, + "mrr_at_3": 0.24522, + "mrr_at_5": 0.25715, + "ndcg_at_1": 0.20148, + "ndcg_at_10": 0.28746, + "ndcg_at_100": 0.3357, + "ndcg_at_1000": 0.36584, + "ndcg_at_3": 0.24532, + "ndcg_at_5": 0.26484, + "precision_at_1": 0.20148, + "precision_at_10": 0.04529, + "precision_at_100": 0.00736, + "precision_at_1000": 0.00108, + "precision_at_3": 0.10351, + "precision_at_5": 0.0732, + "recall_at_1": 0.18555, + "recall_at_10": 0.39276, + "recall_at_100": 0.61511, + "recall_at_1000": 0.84111, + "recall_at_3": 0.27779, + "recall_at_5": 0.32591 + } +} \ No newline at end of file diff --git a/evaluation/mteb/ClimateFEVER.json b/evaluation/mteb/ClimateFEVER.json new file mode 100644 index 0000000000000000000000000000000000000000..b3a0bcb7b49e7758c23db370715003717f5c578d --- /dev/null +++ b/evaluation/mteb/ClimateFEVER.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 30738.76, + "map_at_1": 0.10367, + "map_at_10": 0.18954, + "map_at_100": 0.20675, + "map_at_1000": 0.20868, + "map_at_3": 0.15486, + "map_at_5": 0.17347, + "mrr_at_1": 0.23257, + "mrr_at_10": 0.35419, + "mrr_at_100": 0.36361, + "mrr_at_1000": 0.36403, + "mrr_at_3": 0.31748, + "mrr_at_5": 0.34077, + "ndcg_at_1": 0.23257, + "ndcg_at_10": 0.2711, + "ndcg_at_100": 0.33981, + "ndcg_at_1000": 0.37444, + "ndcg_at_3": 0.21472, + "ndcg_at_5": 0.23769, + "precision_at_1": 0.23257, + "precision_at_10": 0.08704, + "precision_at_100": 0.01606, + "precision_at_1000": 0.00225, + "precision_at_3": 0.16287, + "precision_at_5": 0.13068, + "recall_at_1": 0.10367, + "recall_at_10": 0.33706, + "recall_at_100": 0.57375, + "recall_at_1000": 0.7679, + "recall_at_3": 0.2018, + "recall_at_5": 0.26215 + } +} \ No newline at end of file diff --git a/evaluation/mteb/DBPedia.json b/evaluation/mteb/DBPedia.json new file mode 100644 index 0000000000000000000000000000000000000000..f62611e7dbcf3eec4b818ee16b3b603d711ccda8 --- /dev/null +++ b/evaluation/mteb/DBPedia.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 21716.72, + "map_at_1": 0.08246, + "map_at_10": 0.15979, + "map_at_100": 0.21025, + "map_at_1000": 0.2219, + "map_at_3": 0.11997, + "map_at_5": 0.13697, + "mrr_at_1": 0.6075, + "mrr_at_10": 0.68701, + "mrr_at_100": 0.691, + "mrr_at_1000": 0.69111, + "mrr_at_3": 0.66583, + "mrr_at_5": 0.67871, + "ndcg_at_1": 0.4975, + "ndcg_at_10": 0.34702, + "ndcg_at_100": 0.37607, + "ndcg_at_1000": 0.44322, + "ndcg_at_3": 0.39555, + "ndcg_at_5": 0.36684, + "precision_at_1": 0.6075, + "precision_at_10": 0.26625, + "precision_at_100": 0.0797, + "precision_at_1000": 0.01678, + "precision_at_3": 0.41833, + "precision_at_5": 0.345, + "recall_at_1": 0.08246, + "recall_at_10": 0.20968, + "recall_at_100": 0.42065, + "recall_at_1000": 0.63671, + "recall_at_3": 0.13039, + "recall_at_5": 0.16042 + } +} \ No newline at end of file diff --git a/evaluation/mteb/EmotionClassification.json b/evaluation/mteb/EmotionClassification.json new file mode 100644 index 0000000000000000000000000000000000000000..b0131392646e898448f4d52d64de16c549880441 --- /dev/null +++ b/evaluation/mteb/EmotionClassification.json @@ -0,0 +1,12 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "accuracy": 0.49215, + "accuracy_stderr": 0.0188680285138644, + "evaluation_time": 46.17, + "f1": 0.44859524511637555, + "f1_stderr": 0.012754862698341127, + "main_score": 0.49215 + } +} \ No newline at end of file diff --git a/evaluation/mteb/FEVER.json b/evaluation/mteb/FEVER.json new file mode 100644 index 0000000000000000000000000000000000000000..5762dffa1f996a8b6119adc9e63dcf844bb19f9d --- /dev/null +++ b/evaluation/mteb/FEVER.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 32873.8, + "map_at_1": 0.56769, + "map_at_10": 0.67302, + "map_at_100": 0.67692, + "map_at_1000": 0.67712, + "map_at_3": 0.65346, + "map_at_5": 0.66574, + "mrr_at_1": 0.61371, + "mrr_at_10": 0.71875, + "mrr_at_100": 0.72195, + "mrr_at_1000": 0.72206, + "mrr_at_3": 0.7004, + "mrr_at_5": 0.71224, + "ndcg_at_1": 0.61371, + "ndcg_at_10": 0.72731, + "ndcg_at_100": 0.74468, + "ndcg_at_1000": 0.74916, + "ndcg_at_3": 0.69077, + "ndcg_at_5": 0.71111, + "precision_at_1": 0.61371, + "precision_at_10": 0.09326, + "precision_at_100": 0.0103, + "precision_at_1000": 0.00108, + "precision_at_3": 0.27303, + "precision_at_5": 0.17525, + "recall_at_1": 0.56769, + "recall_at_10": 0.8506, + "recall_at_100": 0.92767, + "recall_at_1000": 0.95933, + "recall_at_3": 0.75131, + "recall_at_5": 0.8017 + } +} \ No newline at end of file diff --git a/evaluation/mteb/FiQA2018.json b/evaluation/mteb/FiQA2018.json new file mode 100644 index 0000000000000000000000000000000000000000..4d95ab0eb16e7e0c3487739acce12d04314508d7 --- /dev/null +++ b/evaluation/mteb/FiQA2018.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 506.29, + "map_at_1": 0.15753, + "map_at_10": 0.25876, + "map_at_100": 0.27415, + "map_at_1000": 0.27591, + "map_at_3": 0.2217, + "map_at_5": 0.24236, + "mrr_at_1": 0.31019, + "mrr_at_10": 0.39977, + "mrr_at_100": 0.40789, + "mrr_at_1000": 0.40832, + "mrr_at_3": 0.37088, + "mrr_at_5": 0.38655, + "ndcg_at_1": 0.31019, + "ndcg_at_10": 0.33286, + "ndcg_at_100": 0.39529, + "ndcg_at_1000": 0.42934, + "ndcg_at_3": 0.2929, + "ndcg_at_5": 0.30615, + "precision_at_1": 0.31019, + "precision_at_10": 0.09383, + "precision_at_100": 0.01602, + "precision_at_1000": 0.00222, + "precision_at_3": 0.19753, + "precision_at_5": 0.14815, + "recall_at_1": 0.15753, + "recall_at_10": 0.40896, + "recall_at_100": 0.64443, + "recall_at_1000": 0.85218, + "recall_at_3": 0.26526, + "recall_at_5": 0.32453 + } +} \ No newline at end of file diff --git a/evaluation/mteb/HotpotQA.json b/evaluation/mteb/HotpotQA.json new file mode 100644 index 0000000000000000000000000000000000000000..90333e81a54903d3d8903656301c1066bba038d4 --- /dev/null +++ b/evaluation/mteb/HotpotQA.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 24496.84, + "map_at_1": 0.32154, + "map_at_10": 0.43651, + "map_at_100": 0.4441, + "map_at_1000": 0.44487, + "map_at_3": 0.41239, + "map_at_5": 0.42659, + "mrr_at_1": 0.64308, + "mrr_at_10": 0.71225, + "mrr_at_100": 0.7157, + "mrr_at_1000": 0.71591, + "mrr_at_3": 0.6995, + "mrr_at_5": 0.70738, + "ndcg_at_1": 0.64308, + "ndcg_at_10": 0.52835, + "ndcg_at_100": 0.55841, + "ndcg_at_1000": 0.57484, + "ndcg_at_3": 0.49014, + "ndcg_at_5": 0.51016, + "precision_at_1": 0.64308, + "precision_at_10": 0.1077, + "precision_at_100": 0.01315, + "precision_at_1000": 0.00153, + "precision_at_3": 0.30223, + "precision_at_5": 0.19716, + "recall_at_1": 0.32154, + "recall_at_10": 0.53849, + "recall_at_100": 0.6576, + "recall_at_1000": 0.76705, + "recall_at_3": 0.45334, + "recall_at_5": 0.49291 + } +} \ No newline at end of file diff --git a/evaluation/mteb/ImdbClassification.json b/evaluation/mteb/ImdbClassification.json new file mode 100644 index 0000000000000000000000000000000000000000..634ace2554a0cec2dd56621b6b777916cfe1efcf --- /dev/null +++ b/evaluation/mteb/ImdbClassification.json @@ -0,0 +1,14 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "accuracy": 0.635316, + "accuracy_stderr": 0.04211004041793359, + "ap": 0.5890084300359825, + "ap_stderr": 0.03111656030669087, + "evaluation_time": 1823.15, + "f1": 0.6335727889030892, + "f1_stderr": 0.04298181731027657, + "main_score": 0.635316 + } +} \ No newline at end of file diff --git a/evaluation/mteb/MSMARCO.json b/evaluation/mteb/MSMARCO.json new file mode 100644 index 0000000000000000000000000000000000000000..66dc447ce7e6467ff679a11fa0f9f453201e1987 --- /dev/null +++ b/evaluation/mteb/MSMARCO.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "validation": { + "evaluation_time": 43129.71, + "map_at_1": 0.20566, + "map_at_10": 0.32229, + "map_at_100": 0.33445, + "map_at_1000": 0.33501, + "map_at_3": 0.28504, + "map_at_5": 0.30681, + "mrr_at_1": 0.21218, + "mrr_at_10": 0.32816, + "mrr_at_100": 0.33986, + "mrr_at_1000": 0.34035, + "mrr_at_3": 0.2915, + "mrr_at_5": 0.3129, + "ndcg_at_1": 0.21218, + "ndcg_at_10": 0.38832, + "ndcg_at_100": 0.44743, + "ndcg_at_1000": 0.46138, + "ndcg_at_3": 0.31232, + "ndcg_at_5": 0.351, + "precision_at_1": 0.21218, + "precision_at_10": 0.06186, + "precision_at_100": 0.00914, + "precision_at_1000": 0.00103, + "precision_at_3": 0.13314, + "precision_at_5": 0.09943, + "recall_at_1": 0.20566, + "recall_at_10": 0.59192, + "recall_at_100": 0.86626, + "recall_at_1000": 0.97283, + "recall_at_3": 0.38492, + "recall_at_5": 0.4776 + } +} \ No newline at end of file diff --git a/evaluation/mteb/MTOPDomainClassification.json b/evaluation/mteb/MTOPDomainClassification.json new file mode 100644 index 0000000000000000000000000000000000000000..148dd36e9505c0a26736a988db220407fac1e6d4 --- /dev/null +++ b/evaluation/mteb/MTOPDomainClassification.json @@ -0,0 +1,14 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "en": { + "accuracy": 0.9256269949840401, + "accuracy_stderr": 0.0038850782151341893, + "f1": 0.921020975473988, + "f1_stderr": 0.0044060167819252195, + "main_score": 0.9256269949840401 + }, + "evaluation_time": 45.61 + } +} \ No newline at end of file diff --git a/evaluation/mteb/MTOPIntentClassification.json b/evaluation/mteb/MTOPIntentClassification.json new file mode 100644 index 0000000000000000000000000000000000000000..cd7e7ae919ad1ae1e2060360b7ce089bd0c07751 --- /dev/null +++ b/evaluation/mteb/MTOPIntentClassification.json @@ -0,0 +1,14 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "en": { + "accuracy": 0.7184678522571819, + "accuracy_stderr": 0.01111163093063421, + "f1": 0.5365271934859202, + "f1_stderr": 0.010588821247395342, + "main_score": 0.7184678522571819 + }, + "evaluation_time": 122.63 + } +} \ No newline at end of file diff --git a/evaluation/mteb/MassiveIntentClassification.json b/evaluation/mteb/MassiveIntentClassification.json new file mode 100644 index 0000000000000000000000000000000000000000..1446c491f38fb672bd17ec07ff3751a17653e8a9 --- /dev/null +++ b/evaluation/mteb/MassiveIntentClassification.json @@ -0,0 +1,14 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "en": { + "accuracy": 0.6900806993947546, + "accuracy_stderr": 0.012602822825246073, + "f1": 0.6741429618885515, + "f1_stderr": 0.011707310656187842, + "main_score": 0.6900806993947546 + }, + "evaluation_time": 72.53 + } +} \ No newline at end of file diff --git a/evaluation/mteb/MassiveScenarioClassification.json b/evaluation/mteb/MassiveScenarioClassification.json new file mode 100644 index 0000000000000000000000000000000000000000..4b360505c9efb5433774e68a4b1a828aa731961e --- /dev/null +++ b/evaluation/mteb/MassiveScenarioClassification.json @@ -0,0 +1,14 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "en": { + "accuracy": 0.7590114324142568, + "accuracy_stderr": 0.010656499030857346, + "f1": 0.7625183590651454, + "f1_stderr": 0.010649247943952904, + "main_score": 0.7590114324142568 + }, + "evaluation_time": 41.51 + } +} \ No newline at end of file diff --git a/evaluation/mteb/MedrxivClusteringP2P.json b/evaluation/mteb/MedrxivClusteringP2P.json new file mode 100644 index 0000000000000000000000000000000000000000..b31c5ddacba4eddb5d6acf9f196024ec193f167a --- /dev/null +++ b/evaluation/mteb/MedrxivClusteringP2P.json @@ -0,0 +1,9 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 3336.04, + "v_measure": 0.31350109978273394, + "v_measure_std": 0.015425381126297112 + } +} \ No newline at end of file diff --git a/evaluation/mteb/MedrxivClusteringS2S.json b/evaluation/mteb/MedrxivClusteringS2S.json new file mode 100644 index 0000000000000000000000000000000000000000..489e76198c9f2e7278ac2dcf13875997a144c0b0 --- /dev/null +++ b/evaluation/mteb/MedrxivClusteringS2S.json @@ -0,0 +1,9 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 492.85, + "v_measure": 0.2876892369576733, + "v_measure_std": 0.01586647571955603 + } +} \ No newline at end of file diff --git a/evaluation/mteb/NFCorpus.json b/evaluation/mteb/NFCorpus.json new file mode 100644 index 0000000000000000000000000000000000000000..9be2b7a66ff727828338949abac0aefb8d2c4693 --- /dev/null +++ b/evaluation/mteb/NFCorpus.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 168.77, + "map_at_1": 0.05604, + "map_at_10": 0.1238, + "map_at_100": 0.15791, + "map_at_1000": 0.17327, + "map_at_3": 0.0915, + "map_at_5": 0.10599, + "mrr_at_1": 0.45201, + "mrr_at_10": 0.53374, + "mrr_at_100": 0.54089, + "mrr_at_1000": 0.54123, + "mrr_at_3": 0.51445, + "mrr_at_5": 0.5259, + "ndcg_at_1": 0.42879, + "ndcg_at_10": 0.33891, + "ndcg_at_100": 0.31392, + "ndcg_at_1000": 0.4036, + "ndcg_at_3": 0.39076, + "ndcg_at_5": 0.37047, + "precision_at_1": 0.44582, + "precision_at_10": 0.25294, + "precision_at_100": 0.08285, + "precision_at_1000": 0.02148, + "precision_at_3": 0.3612, + "precision_at_5": 0.3195, + "recall_at_1": 0.05604, + "recall_at_10": 0.16239, + "recall_at_100": 0.3216, + "recall_at_1000": 0.64513, + "recall_at_3": 0.10406, + "recall_at_5": 0.12685 + } +} \ No newline at end of file diff --git a/evaluation/mteb/NQ.json b/evaluation/mteb/NQ.json new file mode 100644 index 0000000000000000000000000000000000000000..384d7be9e8958aaa9a125696bc5df2d180f52630 --- /dev/null +++ b/evaluation/mteb/NQ.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 15471.98, + "map_at_1": 0.25881, + "map_at_10": 0.39501, + "map_at_100": 0.40615, + "map_at_1000": 0.40661, + "map_at_3": 0.35559, + "map_at_5": 0.37773, + "mrr_at_1": 0.29229, + "mrr_at_10": 0.41956, + "mrr_at_100": 0.4286, + "mrr_at_1000": 0.42893, + "mrr_at_3": 0.38562, + "mrr_at_5": 0.40542, + "ndcg_at_1": 0.292, + "ndcg_at_10": 0.46703, + "ndcg_at_100": 0.51644, + "ndcg_at_1000": 0.52771, + "ndcg_at_3": 0.39142, + "ndcg_at_5": 0.42892, + "precision_at_1": 0.292, + "precision_at_10": 0.0792, + "precision_at_100": 0.01066, + "precision_at_1000": 0.00117, + "precision_at_3": 0.18105, + "precision_at_5": 0.13036, + "recall_at_1": 0.25881, + "recall_at_10": 0.66266, + "recall_at_100": 0.88116, + "recall_at_1000": 0.96582, + "recall_at_3": 0.46526, + "recall_at_5": 0.55154 + } +} \ No newline at end of file diff --git a/evaluation/mteb/QuoraRetrieval.json b/evaluation/mteb/QuoraRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..5fa2a6a35d1181c7abb6909467993f82609d1131 --- /dev/null +++ b/evaluation/mteb/QuoraRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 1285.77, + "map_at_1": 0.67553, + "map_at_10": 0.8134, + "map_at_100": 0.82002, + "map_at_1000": 0.82027, + "map_at_3": 0.78281, + "map_at_5": 0.80149, + "mrr_at_1": 0.7772, + "mrr_at_10": 0.84733, + "mrr_at_100": 0.84878, + "mrr_at_1000": 0.84879, + "mrr_at_3": 0.83587, + "mrr_at_5": 0.84326, + "ndcg_at_1": 0.7775, + "ndcg_at_10": 0.85603, + "ndcg_at_100": 0.87069, + "ndcg_at_1000": 0.8725, + "ndcg_at_3": 0.82303, + "ndcg_at_5": 0.84037, + "precision_at_1": 0.7775, + "precision_at_10": 0.1304, + "precision_at_100": 0.01507, + "precision_at_1000": 0.00156, + "precision_at_3": 0.35903, + "precision_at_5": 0.23738, + "recall_at_1": 0.67553, + "recall_at_10": 0.93903, + "recall_at_100": 0.99062, + "recall_at_1000": 0.99935, + "recall_at_3": 0.84581, + "recall_at_5": 0.89316 + } +} \ No newline at end of file diff --git a/evaluation/mteb/RedditClustering.json b/evaluation/mteb/RedditClustering.json new file mode 100644 index 0000000000000000000000000000000000000000..8cb13a586113ab3c4ac16b31ee14150d9552d6e3 --- /dev/null +++ b/evaluation/mteb/RedditClustering.json @@ -0,0 +1,9 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 6540.34, + "v_measure": 0.46468877112302354, + "v_measure_std": 0.059743532887098036 + } +} \ No newline at end of file diff --git a/evaluation/mteb/RedditClusteringP2P.json b/evaluation/mteb/RedditClusteringP2P.json new file mode 100644 index 0000000000000000000000000000000000000000..23e88da9eec98fa81b1948a8a1327524042192a0 --- /dev/null +++ b/evaluation/mteb/RedditClusteringP2P.json @@ -0,0 +1,9 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 26567.79, + "v_measure": 0.5416687629824692, + "v_measure_std": 0.11955619105267504 + } +} \ No newline at end of file diff --git a/evaluation/mteb/SCIDOCS.json b/evaluation/mteb/SCIDOCS.json new file mode 100644 index 0000000000000000000000000000000000000000..f67c30fd0fbe8976b69dbdbb3d0209b1e60983e8 --- /dev/null +++ b/evaluation/mteb/SCIDOCS.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 338.56, + "map_at_1": 0.04053, + "map_at_10": 0.09694, + "map_at_100": 0.11387, + "map_at_1000": 0.11654, + "map_at_3": 0.07053, + "map_at_5": 0.08439, + "mrr_at_1": 0.199, + "mrr_at_10": 0.29359, + "mrr_at_100": 0.30484, + "mrr_at_1000": 0.30553, + "mrr_at_3": 0.262, + "mrr_at_5": 0.28115, + "ndcg_at_1": 0.199, + "ndcg_at_10": 0.16575, + "ndcg_at_100": 0.23655, + "ndcg_at_1000": 0.28853, + "ndcg_at_3": 0.15848, + "ndcg_at_5": 0.14026, + "precision_at_1": 0.199, + "precision_at_10": 0.0845, + "precision_at_100": 0.01872, + "precision_at_1000": 0.00313, + "precision_at_3": 0.14667, + "precision_at_5": 0.1232, + "recall_at_1": 0.04053, + "recall_at_10": 0.1717, + "recall_at_100": 0.38025, + "recall_at_1000": 0.63572, + "recall_at_3": 0.08903, + "recall_at_5": 0.12477 + } +} \ No newline at end of file diff --git a/evaluation/mteb/SGPT-2.7B-weightedmean-msmarco-specb-bitfit_results.csv b/evaluation/mteb/SGPT-2.7B-weightedmean-msmarco-specb-bitfit_results.csv new file mode 100644 index 0000000000000000000000000000000000000000..a382156c476e5abdcfc2e9a25daf0014cebfaace --- /dev/null +++ b/evaluation/mteb/SGPT-2.7B-weightedmean-msmarco-specb-bitfit_results.csv @@ -0,0 +1,354 @@ +model,task,dataset,language,metric,value +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,BitextMining,BUCC,,f1, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,BitextMining,Tatoeba,,f1, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,AmazonCounterfactualClassification,en,accuracy,0.6756716417910448 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,AmazonCounterfactualClassification,de,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,AmazonCounterfactualClassification,de,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,AmazonCounterfactualClassification,en-ext,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,AmazonCounterfactualClassification,en-ext,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,AmazonCounterfactualClassification,ja,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,AmazonCounterfactualClassification,ja,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,AmazonPolarityClassification,en,accuracy,0.71439575 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,AmazonReviewsClassification,en,accuracy,0.35748 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,AmazonReviewsClassification,de,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,AmazonReviewsClassification,de,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,AmazonReviewsClassification,es,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,AmazonReviewsClassification,es,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,AmazonReviewsClassification,fr,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,AmazonReviewsClassification,fr,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,AmazonReviewsClassification,ja,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,AmazonReviewsClassification,ja,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,AmazonReviewsClassification,zh,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,AmazonReviewsClassification,zh,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,Banking77Classification,en,accuracy,0.8321753246753246 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,EmotionClassification,en,accuracy,0.49215 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,ImdbClassification,en,accuracy,0.635316 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,af,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,af,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,am,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,am,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ar,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ar,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,az,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,az,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,bn,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,bn,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,cy,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,cy,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,da,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,da,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,de,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,de,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,el,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,el,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,en,accuracy,0.6900806993947546 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,es,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,es,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,fa,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,fa,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,fi,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,fi,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,fr,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,fr,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,he,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,he,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,hi,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,hi,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,hu,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,hu,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,hy,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,hy,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,id,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,id,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,is,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,is,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,it,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,it,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ja,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ja,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,jv,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,jv,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ka,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ka,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,km,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,km,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,kn,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,kn,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ko,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ko,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,lv,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,lv,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ml,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ml,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,mn,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,mn,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ms,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ms,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,my,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,my,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,nb,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,nb,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,nl,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,nl,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,pl,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,pl,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,pt,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,pt,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ro,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ro,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ru,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ru,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,sl,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,sl,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,sq,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,sq,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,sv,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,sv,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,sw,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,sw,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ta,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ta,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,te,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,te,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,th,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,th,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,tl,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,tl,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,tr,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,tr,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ur,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ur,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,vi,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,vi,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,zh-CN,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,zh-CN,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,zh-TW,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,zh-TW,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,af,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,af,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,am,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,am,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ar,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ar,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,az,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,az,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,bn,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,bn,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,cy,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,cy,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,da,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,da,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,de,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,de,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,el,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,el,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,en,accuracy,0.7590114324142568 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,es,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,es,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,fa,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,fa,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,fi,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,fi,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,fr,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,fr,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,he,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,he,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,hi,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,hi,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,hu,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,hu,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,hy,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,hy,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,id,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,id,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,is,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,is,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,it,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,it,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ja,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ja,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,jv,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,jv,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ka,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ka,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,km,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,km,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,kn,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,kn,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ko,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ko,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,lv,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,lv,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ml,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ml,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,mn,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,mn,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ms,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ms,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,my,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,my,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,nb,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,nb,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,nl,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,nl,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,pl,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,pl,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,pt,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,pt,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ro,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ro,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ru,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ru,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,sl,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,sl,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,sq,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,sq,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,sv,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,sv,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,sw,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,sw,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ta,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ta,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,te,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,te,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,th,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,th,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,tl,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,tl,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,tr,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,tr,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ur,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ur,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,vi,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,vi,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,zh-CN,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,zh-CN,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,zh-TW,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,zh-TW,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MTOPDomainClassification,en,accuracy,0.9256269949840401 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MTOPDomainClassification,de,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MTOPDomainClassification,de,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MTOPDomainClassification,es,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MTOPDomainClassification,es,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MTOPDomainClassification,fr,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MTOPDomainClassification,fr,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MTOPDomainClassification,hi,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MTOPDomainClassification,hi,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MTOPDomainClassification,th,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MTOPDomainClassification,th,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MTOPIntentClassification,en,accuracy,0.7184678522571819 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MTOPIntentClassification,de,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MTOPIntentClassification,de,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MTOPIntentClassification,es,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MTOPIntentClassification,es,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MTOPIntentClassification,fr,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MTOPIntentClassification,fr,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MTOPIntentClassification,hi,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MTOPIntentClassification,hi,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MTOPIntentClassification,th,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,MTOPIntentClassification,th,accuracy, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,ToxicConversationsClassification,en,accuracy,0.6884080000000001 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,TweetSentimentExtractionClassification,en,accuracy,0.5668647425014148 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Clustering,ArxivClusteringP2P,en,v_measure,0.44721257146422017 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Clustering,ArxivClusteringS2S,en,v_measure,0.35081451519142065 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Clustering,BiorxivClusteringP2P,en,v_measure,0.3441414219680629 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Clustering,BiorxivClusteringS2S,en,v_measure,0.30533275862270026 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Clustering,MedrxivClusteringP2P,en,v_measure,0.31350109978273394 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Clustering,MedrxivClusteringS2S,en,v_measure,0.2876892369576733 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Clustering,RedditClustering,en,v_measure,0.46468877112302354 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Clustering,RedditClusteringP2P,en,v_measure,0.5416687629824692 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Clustering,StackExchangeClustering,en,v_measure,0.5919409867397648 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Clustering,StackExchangeClusteringP2P,en,v_measure,0.32574403257811496 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Clustering,TwentyNewsgroupsClustering,en,v_measure,0.408911707239219 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,PairClassification,SprintDuplicateQuestions,en,ap,0.9347124923047998 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,PairClassification,TwitterSemEval2015,en,ap,0.6368133990830133 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,PairClassification,TwitterURLCorpus,en,ap,0.8480240716354543 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Reranking,AskUbuntuDupQuestions,en,map,0.5963466199039206 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Reranking,MindSmallReranking,,map, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Reranking,SciDocsRR,en,map,0.7771580844366375 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Reranking,StackOverflowDupQuestions,en,map,0.4961186384154483 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Retrieval,ArguAna,en,ndcg_at_10,0.50491 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Retrieval,ClimateFEVER,en,ndcg_at_10,0.2711 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Retrieval,CQADupstackRetrieval,en,ndcg_at_10,0.3653358333333333 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Retrieval,DBPedia,en,ndcg_at_10,0.34702 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Retrieval,FEVER,en,ndcg_at_10,0.72731 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Retrieval,FiQA2018,en,ndcg_at_10,0.33286 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Retrieval,HotpotQA,en,ndcg_at_10,0.52835 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Retrieval,MSMARCO,en,ndcg_at_10,0.38832 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Retrieval,NFCorpus,en,ndcg_at_10,0.33891 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Retrieval,NQ,en,ndcg_at_10,0.46703 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Retrieval,QuoraRetrieval,en,ndcg_at_10,0.85603 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Retrieval,SCIDOCS,en,ndcg_at_10,0.16575 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Retrieval,SciFact,en,ndcg_at_10,0.70165 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Retrieval,Touche2020,en,ndcg_at_10,0.2344 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Retrieval,TRECCOVID,en,ndcg_at_10,0.7517 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,BIOSSES,en,cosine_spearman,0.8484289705838665 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,SICK-R,en,cosine_spearman,0.6819926431966059 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS12,en,cosine_spearman,0.6699390786191646 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS13,en,cosine_spearman,0.7757623085766706 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS14,en,cosine_spearman,0.7278258293483495 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS15,en,cosine_spearman,0.8261707296911949 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS16,en,cosine_spearman,0.8009839524406284 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS17,ko-ko,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS17,ko-ko,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS17,ar-ar,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS17,ar-ar,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS17,en-ar,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS17,en-ar,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS17,en-de,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS17,en-de,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS17,en-en,cosine_spearman,0.8725017540413703 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS17,en-tr,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS17,en-tr,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS17,es-en,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS17,es-en,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS17,es-es,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS17,es-es,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS17,fr-en,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS17,fr-en,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS17,it-en,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS17,it-en,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS17,nl-en,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS17,nl-en,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,en,cosine_spearman,0.687486910762485 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,de,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,de,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,es,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,es,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,pl,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,pl,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,tr,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,tr,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,ar,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,ar,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,ru,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,ru,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,zh,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,zh,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,fr,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,fr,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,de-en,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,de-en,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,es-en,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,es-en,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,it,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,it,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,pl-en,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,pl-en,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,zh-en,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,zh-en,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,es-it,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,es-it,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,de-fr,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,de-fr,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,de-pl,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,de-pl,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,fr-pl,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STS22,fr-pl,cosine_spearman, +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,STSBenchmark,en,cosine_spearman,0.7920585637461047 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Summarization,SummEval,en,cosine_spearman,0.27870478281195465 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Classification,average,en,accuracy,0.6713040365015015 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Clustering,average,en,v_measure,0.39833144224085476 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,PairClassification,average,en,ap,0.8065166543410891 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,Retrieval,average,en,ndcg_at_10,0.4653783888888888 +SGPT-2.7B-weightedmean-msmarco-specb-bitfit,STS,average,en,cosine_spearman,0.768315074100644 diff --git a/evaluation/mteb/SICK-R.json b/evaluation/mteb/SICK-R.json new file mode 100644 index 0000000000000000000000000000000000000000..22c44c4e7dbd661a68d76fce431bbbb1db67abb0 --- /dev/null +++ b/evaluation/mteb/SICK-R.json @@ -0,0 +1,19 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "cos_sim": { + "pearson": 0.7775487485196769, + "spearman": 0.6819926431966059 + }, + "euclidean": { + "pearson": 0.7169016204991725, + "spearman": 0.6698099673026834 + }, + "evaluation_time": 106.65, + "manhattan": { + "pearson": 0.7162994072488664, + "spearman": 0.6703435950744577 + } + } +} \ No newline at end of file diff --git a/evaluation/mteb/STS12.json b/evaluation/mteb/STS12.json new file mode 100644 index 0000000000000000000000000000000000000000..fe71c63984ba75056dcbcc59404ffe23b5f2e488 --- /dev/null +++ b/evaluation/mteb/STS12.json @@ -0,0 +1,19 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "cos_sim": { + "pearson": 0.7591051402657887, + "spearman": 0.6699390786191646 + }, + "euclidean": { + "pearson": 0.7154128036454578, + "spearman": 0.6925605675649068 + }, + "evaluation_time": 49.4, + "manhattan": { + "pearson": 0.7160981030780171, + "spearman": 0.6927513670128046 + } + } +} \ No newline at end of file diff --git a/evaluation/mteb/STS13.json b/evaluation/mteb/STS13.json new file mode 100644 index 0000000000000000000000000000000000000000..ec236dcf2bd05cae7f401a96fbb79628d383c966 --- /dev/null +++ b/evaluation/mteb/STS13.json @@ -0,0 +1,19 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "cos_sim": { + "pearson": 0.7723835466417793, + "spearman": 0.7757623085766706 + }, + "euclidean": { + "pearson": 0.775090992200725, + "spearman": 0.7788601688144924 + }, + "evaluation_time": 21.2, + "manhattan": { + "pearson": 0.7739045060647423, + "spearman": 0.7777552718279098 + } + } +} \ No newline at end of file diff --git a/evaluation/mteb/STS14.json b/evaluation/mteb/STS14.json new file mode 100644 index 0000000000000000000000000000000000000000..d6fd5e58185c108ff0c610dc50bb05d7bc217801 --- /dev/null +++ b/evaluation/mteb/STS14.json @@ -0,0 +1,19 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "cos_sim": { + "pearson": 0.7791692485139602, + "spearman": 0.7278258293483495 + }, + "euclidean": { + "pearson": 0.7464773017077789, + "spearman": 0.7181662299104619 + }, + "evaluation_time": 52.55, + "manhattan": { + "pearson": 0.7471043337995533, + "spearman": 0.7183960860845646 + } + } +} \ No newline at end of file diff --git a/evaluation/mteb/STS15.json b/evaluation/mteb/STS15.json new file mode 100644 index 0000000000000000000000000000000000000000..9e4b462464b004d9bcf826802a0ca67d2b991b50 --- /dev/null +++ b/evaluation/mteb/STS15.json @@ -0,0 +1,19 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "cos_sim": { + "pearson": 0.8213422113617578, + "spearman": 0.8261707296911949 + }, + "euclidean": { + "pearson": 0.8142487480400861, + "spearman": 0.8217970991273835 + }, + "evaluation_time": 44.15, + "manhattan": { + "pearson": 0.8141985055477845, + "spearman": 0.8215823204362936 + } + } +} \ No newline at end of file diff --git a/evaluation/mteb/STS16.json b/evaluation/mteb/STS16.json new file mode 100644 index 0000000000000000000000000000000000000000..026443fe644674906d9d34eb00e520a96fba1ee6 --- /dev/null +++ b/evaluation/mteb/STS16.json @@ -0,0 +1,19 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "cos_sim": { + "pearson": 0.7907989542843826, + "spearman": 0.8009839524406284 + }, + "euclidean": { + "pearson": 0.7643186028364195, + "spearman": 0.7676720323266472 + }, + "evaluation_time": 19.91, + "manhattan": { + "pearson": 0.7646747474091611, + "spearman": 0.7681797407068668 + } + } +} \ No newline at end of file diff --git a/evaluation/mteb/STS17.json b/evaluation/mteb/STS17.json new file mode 100644 index 0000000000000000000000000000000000000000..e1e55ce5ecda6761931e04a190ad5cc43fd59bf3 --- /dev/null +++ b/evaluation/mteb/STS17.json @@ -0,0 +1,21 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "en-en": { + "cos_sim": { + "pearson": 0.870420983224933, + "spearman": 0.8725017540413703 + }, + "euclidean": { + "pearson": 0.8456384596473421, + "spearman": 0.8472557417564885 + }, + "manhattan": { + "pearson": 0.847329954474549, + "spearman": 0.8475071371008909 + } + }, + "evaluation_time": 5.01 + } +} \ No newline at end of file diff --git a/evaluation/mteb/STS22.json b/evaluation/mteb/STS22.json new file mode 100644 index 0000000000000000000000000000000000000000..307e4c7774aa9b7d3d2ad8272a0ddfce1e0b08b8 --- /dev/null +++ b/evaluation/mteb/STS22.json @@ -0,0 +1,21 @@ +{ + "dataset_version": "6d1ba47164174a496b7fa5d3569dae26a6813b80", + "mteb_version": "0.0.2", + "test": { + "en": { + "cos_sim": { + "pearson": 0.6847031320016423, + "spearman": 0.687486910762485 + }, + "euclidean": { + "pearson": 0.7130330985913915, + "spearman": 0.7159666258520735 + }, + "manhattan": { + "pearson": 0.7144238842790269, + "spearman": 0.7167460706861044 + } + }, + "evaluation_time": 35.98 + } +} \ No newline at end of file diff --git a/evaluation/mteb/STSBenchmark.json b/evaluation/mteb/STSBenchmark.json new file mode 100644 index 0000000000000000000000000000000000000000..ab9fe685d0bbdc654075991b32c4d0eb152769ef --- /dev/null +++ b/evaluation/mteb/STSBenchmark.json @@ -0,0 +1,19 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "cos_sim": { + "pearson": 0.8079514366062676, + "spearman": 0.7920585637461047 + }, + "euclidean": { + "pearson": 0.786591557395699, + "spearman": 0.7786455794285717 + }, + "evaluation_time": 20.42, + "manhattan": { + "pearson": 0.7867754806486864, + "spearman": 0.7788178687200732 + } + } +} \ No newline at end of file diff --git a/evaluation/mteb/SciDocsRR.json b/evaluation/mteb/SciDocsRR.json new file mode 100644 index 0000000000000000000000000000000000000000..153e1f48eb10ab946f3d0a04f4e69421d3b5f74a --- /dev/null +++ b/evaluation/mteb/SciDocsRR.json @@ -0,0 +1,9 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 1031.79, + "map": 0.7771580844366375, + "mrr": 0.9304215845882513 + } +} \ No newline at end of file diff --git a/evaluation/mteb/SciFact.json b/evaluation/mteb/SciFact.json new file mode 100644 index 0000000000000000000000000000000000000000..e0c921200456bbade7ce1d184463931db8db6069 --- /dev/null +++ b/evaluation/mteb/SciFact.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 149.15, + "map_at_1": 0.564, + "map_at_10": 0.65701, + "map_at_100": 0.6632, + "map_at_1000": 0.66341, + "map_at_3": 0.62642, + "map_at_5": 0.64342, + "mrr_at_1": 0.58667, + "mrr_at_10": 0.66453, + "mrr_at_100": 0.66967, + "mrr_at_1000": 0.66988, + "mrr_at_3": 0.64111, + "mrr_at_5": 0.65411, + "ndcg_at_1": 0.58667, + "ndcg_at_10": 0.70165, + "ndcg_at_100": 0.72938, + "ndcg_at_1000": 0.73456, + "ndcg_at_3": 0.6479, + "ndcg_at_5": 0.6728, + "precision_at_1": 0.58667, + "precision_at_10": 0.094, + "precision_at_100": 0.01087, + "precision_at_1000": 0.00113, + "precision_at_3": 0.24889, + "precision_at_5": 0.16667, + "recall_at_1": 0.564, + "recall_at_10": 0.83122, + "recall_at_100": 0.95667, + "recall_at_1000": 0.99667, + "recall_at_3": 0.68378, + "recall_at_5": 0.74683 + } +} \ No newline at end of file diff --git a/evaluation/mteb/SprintDuplicateQuestions.json b/evaluation/mteb/SprintDuplicateQuestions.json new file mode 100644 index 0000000000000000000000000000000000000000..9f2d9d3aa60dd49e170a0e4ad122dcddcdda85a7 --- /dev/null +++ b/evaluation/mteb/SprintDuplicateQuestions.json @@ -0,0 +1,48 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "cos_sim": { + "accuracy": 0.9976831683168317, + "accuracy_threshold": 0.7241246104240417, + "ap": 0.9347124923047998, + "f1": 0.8806122448979592, + "f1_threshold": 0.7241246104240417, + "precision": 0.8989583333333333, + "recall": 0.863 + }, + "dot": { + "accuracy": 0.9957326732673267, + "accuracy_threshold": 2443.231201171875, + "ap": 0.8406577868167208, + "f1": 0.7782629791363417, + "f1_threshold": 2222.54931640625, + "precision": 0.7558906691800189, + "recall": 0.802 + }, + "euclidean": { + "accuracy": 0.9974257425742574, + "accuracy_threshold": 41.9460334777832, + "ap": 0.921904681653555, + "f1": 0.8674821610601428, + "f1_threshold": 41.9460334777832, + "precision": 0.8846153846153846, + "recall": 0.851 + }, + "evaluation_time": 69.95, + "manhattan": { + "accuracy": 0.9974554455445545, + "accuracy_threshold": 1650.22705078125, + "ap": 0.9243377908099479, + "f1": 0.8686765457332654, + "f1_threshold": 1650.22705078125, + "precision": 0.8881922675026124, + "recall": 0.85 + }, + "max": { + "accuracy": 0.9976831683168317, + "ap": 0.9347124923047998, + "f1": 0.8806122448979592 + } + } +} \ No newline at end of file diff --git a/evaluation/mteb/StackExchangeClustering.json b/evaluation/mteb/StackExchangeClustering.json new file mode 100644 index 0000000000000000000000000000000000000000..3f0ad91b1603f500a2b4fc3aa785590a10ffecab --- /dev/null +++ b/evaluation/mteb/StackExchangeClustering.json @@ -0,0 +1,9 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 4474.91, + "v_measure": 0.5919409867397648, + "v_measure_std": 0.044990353083397404 + } +} \ No newline at end of file diff --git a/evaluation/mteb/StackExchangeClusteringP2P.json b/evaluation/mteb/StackExchangeClusteringP2P.json new file mode 100644 index 0000000000000000000000000000000000000000..52f100ca26a61d6c4a5ec461f0fb78dd78e40b47 --- /dev/null +++ b/evaluation/mteb/StackExchangeClusteringP2P.json @@ -0,0 +1,9 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 5563.17, + "v_measure": 0.32574403257811496, + "v_measure_std": 0.014883592444388137 + } +} \ No newline at end of file diff --git a/evaluation/mteb/StackOverflowDupQuestions.json b/evaluation/mteb/StackOverflowDupQuestions.json new file mode 100644 index 0000000000000000000000000000000000000000..ef6ff0b15f98b4878a8f242c72900cfe8cff2d25 --- /dev/null +++ b/evaluation/mteb/StackOverflowDupQuestions.json @@ -0,0 +1,9 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 564.48, + "map": 0.4961186384154483, + "mrr": 0.5055424253034547 + } +} \ No newline at end of file diff --git a/evaluation/mteb/SummEval.json b/evaluation/mteb/SummEval.json new file mode 100644 index 0000000000000000000000000000000000000000..fa70498db76760ad7a4fc98d07b7db7fd2230360 --- /dev/null +++ b/evaluation/mteb/SummEval.json @@ -0,0 +1,15 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "cos_sim": { + "pearson": 0.26047224542079067, + "spearman": 0.27870478281195465 + }, + "dot": { + "pearson": 0.2518242068570122, + "spearman": 0.25116243491984985 + }, + "evaluation_time": 819.63 + } +} \ No newline at end of file diff --git a/evaluation/mteb/TRECCOVID.json b/evaluation/mteb/TRECCOVID.json new file mode 100644 index 0000000000000000000000000000000000000000..9816bc7eeeae1690e391f699508d93440d8563fc --- /dev/null +++ b/evaluation/mteb/TRECCOVID.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 1514.42, + "map_at_1": 0.00223, + "map_at_10": 0.01762, + "map_at_100": 0.09984, + "map_at_1000": 0.24265, + "map_at_3": 0.00631, + "map_at_5": 0.00995, + "mrr_at_1": 0.88, + "mrr_at_10": 0.92833, + "mrr_at_100": 0.92833, + "mrr_at_1000": 0.92833, + "mrr_at_3": 0.92333, + "mrr_at_5": 0.92833, + "ndcg_at_1": 0.83, + "ndcg_at_10": 0.7517, + "ndcg_at_100": 0.55432, + "ndcg_at_1000": 0.49482, + "ndcg_at_3": 0.82184, + "ndcg_at_5": 0.79712, + "precision_at_1": 0.88, + "precision_at_10": 0.786, + "precision_at_100": 0.5656, + "precision_at_1000": 0.22334, + "precision_at_3": 0.86667, + "precision_at_5": 0.836, + "recall_at_1": 0.00223, + "recall_at_10": 0.01988, + "recall_at_100": 0.13301, + "recall_at_1000": 0.46587, + "recall_at_3": 0.00663, + "recall_at_5": 0.01079 + } +} \ No newline at end of file diff --git a/evaluation/mteb/Touche2020.json b/evaluation/mteb/Touche2020.json new file mode 100644 index 0000000000000000000000000000000000000000..d00082700dd8329bd0fbcfbdb23fa44a0017db33 --- /dev/null +++ b/evaluation/mteb/Touche2020.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 2889.28, + "map_at_1": 0.03047, + "map_at_10": 0.08792, + "map_at_100": 0.14631, + "map_at_1000": 0.16127, + "map_at_3": 0.04673, + "map_at_5": 0.05897, + "mrr_at_1": 0.38776, + "mrr_at_10": 0.49271, + "mrr_at_100": 0.50181, + "mrr_at_1000": 0.502, + "mrr_at_3": 0.44558, + "mrr_at_5": 0.47925, + "ndcg_at_1": 0.35714, + "ndcg_at_10": 0.2344, + "ndcg_at_100": 0.35345, + "ndcg_at_1000": 0.46495, + "ndcg_at_3": 0.26146, + "ndcg_at_5": 0.24878, + "precision_at_1": 0.38776, + "precision_at_10": 0.20816, + "precision_at_100": 0.07429, + "precision_at_1000": 0.01494, + "precision_at_3": 0.2585, + "precision_at_5": 0.24082, + "recall_at_1": 0.03047, + "recall_at_10": 0.14975, + "recall_at_100": 0.45943, + "recall_at_1000": 0.80311, + "recall_at_3": 0.05478, + "recall_at_5": 0.08294 + } +} \ No newline at end of file diff --git a/evaluation/mteb/ToxicConversationsClassification.json b/evaluation/mteb/ToxicConversationsClassification.json new file mode 100644 index 0000000000000000000000000000000000000000..49590aa8ddee93386174c9308188b01b3f4bfeec --- /dev/null +++ b/evaluation/mteb/ToxicConversationsClassification.json @@ -0,0 +1,14 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "accuracy": 0.6884080000000001, + "accuracy_stderr": 0.03756498923199632, + "ap": 0.13135219251019847, + "ap_stderr": 0.008882217511035871, + "evaluation_time": 1395.88, + "f1": 0.5284999942199551, + "f1_stderr": 0.0229079257888878, + "main_score": 0.6884080000000001 + } +} \ No newline at end of file diff --git a/evaluation/mteb/TweetSentimentExtractionClassification.json b/evaluation/mteb/TweetSentimentExtractionClassification.json new file mode 100644 index 0000000000000000000000000000000000000000..582ff6c1641c9e7918cbe41269d1f1d1f5925527 --- /dev/null +++ b/evaluation/mteb/TweetSentimentExtractionClassification.json @@ -0,0 +1,12 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "accuracy": 0.5668647425014148, + "accuracy_stderr": 0.018016725536934956, + "evaluation_time": 60.39, + "f1": 0.5697981427365949, + "f1_stderr": 0.01874017580799281, + "main_score": 0.5668647425014148 + } +} \ No newline at end of file diff --git a/evaluation/mteb/TwentyNewsgroupsClustering.json b/evaluation/mteb/TwentyNewsgroupsClustering.json new file mode 100644 index 0000000000000000000000000000000000000000..c608a5e77447151807c242b8bf04799ac38da86d --- /dev/null +++ b/evaluation/mteb/TwentyNewsgroupsClustering.json @@ -0,0 +1,9 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 336.58, + "v_measure": 0.408911707239219, + "v_measure_std": 0.018781046979839584 + } +} \ No newline at end of file diff --git a/evaluation/mteb/TwitterSemEval2015.json b/evaluation/mteb/TwitterSemEval2015.json new file mode 100644 index 0000000000000000000000000000000000000000..1aa6685e35a0d5a6506234cf93dc39c2eb0733c5 --- /dev/null +++ b/evaluation/mteb/TwitterSemEval2015.json @@ -0,0 +1,48 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "cos_sim": { + "accuracy": 0.8304226023722954, + "accuracy_threshold": 0.7418354153633118, + "ap": 0.6368133990830133, + "f1": 0.6034918447048012, + "f1_threshold": 0.643256425857544, + "precision": 0.5343775427176566, + "recall": 0.6931398416886544 + }, + "dot": { + "accuracy": 0.8146271681468678, + "accuracy_threshold": 2533.845947265625, + "ap": 0.5778072296265885, + "f1": 0.5628769265132901, + "f1_threshold": 2075.621826171875, + "precision": 0.487993803253292, + "recall": 0.6649076517150396 + }, + "euclidean": { + "accuracy": 0.8216606067830959, + "accuracy_threshold": 40.57570266723633, + "ap": 0.5997453037120352, + "f1": 0.568560235063663, + "f1_threshold": 46.821205139160156, + "precision": 0.5303791685701233, + "recall": 0.612664907651715 + }, + "evaluation_time": 110.55, + "manhattan": { + "accuracy": 0.8216606067830959, + "accuracy_threshold": 1594.75048828125, + "ap": 0.5998962379571767, + "f1": 0.5698153158451947, + "f1_threshold": 1863.289306640625, + "precision": 0.5141158989598811, + "recall": 0.6390501319261214 + }, + "max": { + "accuracy": 0.8304226023722954, + "ap": 0.6368133990830133, + "f1": 0.6034918447048012 + } + } +} \ No newline at end of file diff --git a/evaluation/mteb/TwitterURLCorpus.json b/evaluation/mteb/TwitterURLCorpus.json new file mode 100644 index 0000000000000000000000000000000000000000..227728b9392cf55e35fa9d4727c6167c3457e59d --- /dev/null +++ b/evaluation/mteb/TwitterURLCorpus.json @@ -0,0 +1,48 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "cos_sim": { + "accuracy": 0.8856871191834517, + "accuracy_threshold": 0.6621477603912354, + "ap": 0.8480240716354543, + "f1": 0.7707765285922384, + "f1_threshold": 0.634358286857605, + "precision": 0.7484947406601379, + "recall": 0.7944256236526024 + }, + "dot": { + "accuracy": 0.8600923662048356, + "accuracy_threshold": 2335.04736328125, + "ap": 0.7865564590120729, + "f1": 0.7275837491090521, + "f1_threshold": 2066.458251953125, + "precision": 0.6772823779193206, + "recall": 0.7859562673236834 + }, + "euclidean": { + "accuracy": 0.8784103698529127, + "accuracy_threshold": 46.53624725341797, + "ap": 0.8350424424952835, + "f1": 0.7574496544549306, + "f1_threshold": 49.904998779296875, + "precision": 0.7319402556369381, + "recall": 0.7848013550970127 + }, + "evaluation_time": 437.33, + "manhattan": { + "accuracy": 0.879225365777933, + "accuracy_threshold": 1849.88671875, + "ap": 0.8349479248597825, + "f1": 0.7567748162447101, + "f1_threshold": 1964.5179443359375, + "precision": 0.7306810035842294, + "recall": 0.7848013550970127 + }, + "max": { + "accuracy": 0.8856871191834517, + "ap": 0.8480240716354543, + "f1": 0.7707765285922384 + } + } +} \ No newline at end of file