|
--- |
|
library_name: sentence-transformers |
|
pipeline_tag: sentence-similarity |
|
tags: |
|
- feature-extraction |
|
- sentence-similarity |
|
- mteb |
|
model-index: |
|
- name: epoch_0_model |
|
results: |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/amazon_counterfactual |
|
name: MTEB AmazonCounterfactualClassification (en) |
|
config: en |
|
split: test |
|
revision: e8379541af4e31359cca9fbcf4b00f2671dba205 |
|
metrics: |
|
- type: accuracy |
|
value: 78.67164179104476 |
|
- type: ap |
|
value: 42.7379383648841 |
|
- type: f1 |
|
value: 72.79997373883408 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/amazon_polarity |
|
name: MTEB AmazonPolarityClassification |
|
config: default |
|
split: test |
|
revision: e2d317d38cd51312af73b3d32a06d1a08b442046 |
|
metrics: |
|
- type: accuracy |
|
value: 90.413775 |
|
- type: ap |
|
value: 87.08812293673202 |
|
- type: f1 |
|
value: 90.39246586225426 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/amazon_reviews_multi |
|
name: MTEB AmazonReviewsClassification (en) |
|
config: en |
|
split: test |
|
revision: 1399c76144fd37290681b995c656ef9b2e06e26d |
|
metrics: |
|
- type: accuracy |
|
value: 47.80799999999999 |
|
- type: f1 |
|
value: 47.25679462673503 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: arguana |
|
name: MTEB ArguAna |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 30.37 |
|
- type: map_at_10 |
|
value: 45.748 |
|
- type: map_at_100 |
|
value: 46.617 |
|
- type: map_at_1000 |
|
value: 46.622 |
|
- type: map_at_3 |
|
value: 40.564 |
|
- type: map_at_5 |
|
value: 43.69 |
|
- type: mrr_at_1 |
|
value: 30.868000000000002 |
|
- type: mrr_at_10 |
|
value: 45.905 |
|
- type: mrr_at_100 |
|
value: 46.787 |
|
- type: mrr_at_1000 |
|
value: 46.792 |
|
- type: mrr_at_3 |
|
value: 40.717999999999996 |
|
- type: mrr_at_5 |
|
value: 43.851 |
|
- type: ndcg_at_1 |
|
value: 30.37 |
|
- type: ndcg_at_10 |
|
value: 54.662 |
|
- type: ndcg_at_100 |
|
value: 58.23700000000001 |
|
- type: ndcg_at_1000 |
|
value: 58.373 |
|
- type: ndcg_at_3 |
|
value: 44.069 |
|
- type: ndcg_at_5 |
|
value: 49.728 |
|
- type: precision_at_1 |
|
value: 30.37 |
|
- type: precision_at_10 |
|
value: 8.321000000000002 |
|
- type: precision_at_100 |
|
value: 0.985 |
|
- type: precision_at_1000 |
|
value: 0.1 |
|
- type: precision_at_3 |
|
value: 18.089 |
|
- type: precision_at_5 |
|
value: 13.613 |
|
- type: recall_at_1 |
|
value: 30.37 |
|
- type: recall_at_10 |
|
value: 83.21499999999999 |
|
- type: recall_at_100 |
|
value: 98.506 |
|
- type: recall_at_1000 |
|
value: 99.57300000000001 |
|
- type: recall_at_3 |
|
value: 54.266999999999996 |
|
- type: recall_at_5 |
|
value: 68.065 |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: mteb/arxiv-clustering-p2p |
|
name: MTEB ArxivClusteringP2P |
|
config: default |
|
split: test |
|
revision: a122ad7f3f0291bf49cc6f4d32aa80929df69d5d |
|
metrics: |
|
- type: v_measure |
|
value: 45.85329429748079 |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: mteb/arxiv-clustering-s2s |
|
name: MTEB ArxivClusteringS2S |
|
config: default |
|
split: test |
|
revision: f910caf1a6075f7329cdf8c1a6135696f37dbd53 |
|
metrics: |
|
- type: v_measure |
|
value: 36.12666783330692 |
|
- task: |
|
type: Reranking |
|
dataset: |
|
type: mteb/askubuntudupquestions-reranking |
|
name: MTEB AskUbuntuDupQuestions |
|
config: default |
|
split: test |
|
revision: 2000358ca161889fa9c082cb41daa8dcfb161a54 |
|
metrics: |
|
- type: map |
|
value: 57.58783867794241 |
|
- type: mrr |
|
value: 71.84078617596622 |
|
- task: |
|
type: STS |
|
dataset: |
|
type: mteb/biosses-sts |
|
name: MTEB BIOSSES |
|
config: default |
|
split: test |
|
revision: d3fb88f8f02e40887cd149695127462bbcf29b4a |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 87.92453139507079 |
|
- type: cos_sim_spearman |
|
value: 85.37122234964886 |
|
- type: euclidean_pearson |
|
value: 86.19345621799168 |
|
- type: euclidean_spearman |
|
value: 85.37122234964886 |
|
- type: manhattan_pearson |
|
value: 86.4685290616604 |
|
- type: manhattan_spearman |
|
value: 85.91400580167537 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/banking77 |
|
name: MTEB Banking77Classification |
|
config: default |
|
split: test |
|
revision: 0fd18e25b25c072e09e0d92ab615fda904d66300 |
|
metrics: |
|
- type: accuracy |
|
value: 83.81818181818181 |
|
- type: f1 |
|
value: 83.76155217378863 |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: mteb/biorxiv-clustering-p2p |
|
name: MTEB BiorxivClusteringP2P |
|
config: default |
|
split: test |
|
revision: 65b79d1d13f80053f67aca9498d9402c2d9f1f40 |
|
metrics: |
|
- type: v_measure |
|
value: 38.46362764203256 |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: mteb/biorxiv-clustering-s2s |
|
name: MTEB BiorxivClusteringS2S |
|
config: default |
|
split: test |
|
revision: 258694dd0231531bc1fd9de6ceb52a0853c6d908 |
|
metrics: |
|
- type: v_measure |
|
value: 33.13807021168658 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: BeIR/cqadupstack |
|
name: MTEB CQADupstackAndroidRetrieval |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 29.725 |
|
- type: map_at_10 |
|
value: 39.654 |
|
- type: map_at_100 |
|
value: 41.022 |
|
- type: map_at_1000 |
|
value: 41.144999999999996 |
|
- type: map_at_3 |
|
value: 36.819 |
|
- type: map_at_5 |
|
value: 38.376 |
|
- type: mrr_at_1 |
|
value: 36.195 |
|
- type: mrr_at_10 |
|
value: 45.171 |
|
- type: mrr_at_100 |
|
value: 45.987 |
|
- type: mrr_at_1000 |
|
value: 46.033 |
|
- type: mrr_at_3 |
|
value: 43.038 |
|
- type: mrr_at_5 |
|
value: 44.196000000000005 |
|
- type: ndcg_at_1 |
|
value: 36.195 |
|
- type: ndcg_at_10 |
|
value: 45.194 |
|
- type: ndcg_at_100 |
|
value: 50.516000000000005 |
|
- type: ndcg_at_1000 |
|
value: 52.739000000000004 |
|
- type: ndcg_at_3 |
|
value: 41.142 |
|
- type: ndcg_at_5 |
|
value: 42.973 |
|
- type: precision_at_1 |
|
value: 36.195 |
|
- type: precision_at_10 |
|
value: 8.312 |
|
- type: precision_at_100 |
|
value: 1.346 |
|
- type: precision_at_1000 |
|
value: 0.182 |
|
- type: precision_at_3 |
|
value: 19.599 |
|
- type: precision_at_5 |
|
value: 13.847999999999999 |
|
- type: recall_at_1 |
|
value: 29.725 |
|
- type: recall_at_10 |
|
value: 55.51199999999999 |
|
- type: recall_at_100 |
|
value: 78.182 |
|
- type: recall_at_1000 |
|
value: 92.727 |
|
- type: recall_at_3 |
|
value: 43.287 |
|
- type: recall_at_5 |
|
value: 48.732 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: BeIR/cqadupstack |
|
name: MTEB CQADupstackEnglishRetrieval |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 30.23 |
|
- type: map_at_10 |
|
value: 40.091 |
|
- type: map_at_100 |
|
value: 41.251 |
|
- type: map_at_1000 |
|
value: 41.384 |
|
- type: map_at_3 |
|
value: 37.247 |
|
- type: map_at_5 |
|
value: 38.865 |
|
- type: mrr_at_1 |
|
value: 38.279999999999994 |
|
- type: mrr_at_10 |
|
value: 46.288000000000004 |
|
- type: mrr_at_100 |
|
value: 47.022999999999996 |
|
- type: mrr_at_1000 |
|
value: 47.068 |
|
- type: mrr_at_3 |
|
value: 44.395 |
|
- type: mrr_at_5 |
|
value: 45.446 |
|
- type: ndcg_at_1 |
|
value: 38.279999999999994 |
|
- type: ndcg_at_10 |
|
value: 45.647 |
|
- type: ndcg_at_100 |
|
value: 49.851 |
|
- type: ndcg_at_1000 |
|
value: 51.991 |
|
- type: ndcg_at_3 |
|
value: 41.795 |
|
- type: ndcg_at_5 |
|
value: 43.578 |
|
- type: precision_at_1 |
|
value: 38.279999999999994 |
|
- type: precision_at_10 |
|
value: 8.522 |
|
- type: precision_at_100 |
|
value: 1.361 |
|
- type: precision_at_1000 |
|
value: 0.185 |
|
- type: precision_at_3 |
|
value: 20.297 |
|
- type: precision_at_5 |
|
value: 14.255 |
|
- type: recall_at_1 |
|
value: 30.23 |
|
- type: recall_at_10 |
|
value: 55.094 |
|
- type: recall_at_100 |
|
value: 72.887 |
|
- type: recall_at_1000 |
|
value: 86.295 |
|
- type: recall_at_3 |
|
value: 43.244 |
|
- type: recall_at_5 |
|
value: 48.507 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: BeIR/cqadupstack |
|
name: MTEB CQADupstackGamingRetrieval |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 40.854 |
|
- type: map_at_10 |
|
value: 52.232 |
|
- type: map_at_100 |
|
value: 53.129000000000005 |
|
- type: map_at_1000 |
|
value: 53.185 |
|
- type: map_at_3 |
|
value: 49.094 |
|
- type: map_at_5 |
|
value: 50.834999999999994 |
|
- type: mrr_at_1 |
|
value: 46.708 |
|
- type: mrr_at_10 |
|
value: 56.021 |
|
- type: mrr_at_100 |
|
value: 56.584 |
|
- type: mrr_at_1000 |
|
value: 56.611999999999995 |
|
- type: mrr_at_3 |
|
value: 53.657 |
|
- type: mrr_at_5 |
|
value: 55.027 |
|
- type: ndcg_at_1 |
|
value: 46.708 |
|
- type: ndcg_at_10 |
|
value: 57.89 |
|
- type: ndcg_at_100 |
|
value: 61.541999999999994 |
|
- type: ndcg_at_1000 |
|
value: 62.754 |
|
- type: ndcg_at_3 |
|
value: 52.632 |
|
- type: ndcg_at_5 |
|
value: 55.104 |
|
- type: precision_at_1 |
|
value: 46.708 |
|
- type: precision_at_10 |
|
value: 9.122 |
|
- type: precision_at_100 |
|
value: 1.187 |
|
- type: precision_at_1000 |
|
value: 0.134 |
|
- type: precision_at_3 |
|
value: 23.072 |
|
- type: precision_at_5 |
|
value: 15.661 |
|
- type: recall_at_1 |
|
value: 40.854 |
|
- type: recall_at_10 |
|
value: 70.98 |
|
- type: recall_at_100 |
|
value: 86.947 |
|
- type: recall_at_1000 |
|
value: 95.62 |
|
- type: recall_at_3 |
|
value: 56.782999999999994 |
|
- type: recall_at_5 |
|
value: 62.980000000000004 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: BeIR/cqadupstack |
|
name: MTEB CQADupstackGisRetrieval |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 26.366 |
|
- type: map_at_10 |
|
value: 33.674 |
|
- type: map_at_100 |
|
value: 34.58 |
|
- type: map_at_1000 |
|
value: 34.662 |
|
- type: map_at_3 |
|
value: 31.596999999999998 |
|
- type: map_at_5 |
|
value: 32.596000000000004 |
|
- type: mrr_at_1 |
|
value: 28.588 |
|
- type: mrr_at_10 |
|
value: 35.912 |
|
- type: mrr_at_100 |
|
value: 36.696 |
|
- type: mrr_at_1000 |
|
value: 36.760999999999996 |
|
- type: mrr_at_3 |
|
value: 33.823 |
|
- type: mrr_at_5 |
|
value: 34.829 |
|
- type: ndcg_at_1 |
|
value: 28.588 |
|
- type: ndcg_at_10 |
|
value: 38.031 |
|
- type: ndcg_at_100 |
|
value: 42.678 |
|
- type: ndcg_at_1000 |
|
value: 44.871 |
|
- type: ndcg_at_3 |
|
value: 33.815 |
|
- type: ndcg_at_5 |
|
value: 35.531 |
|
- type: precision_at_1 |
|
value: 28.588 |
|
- type: precision_at_10 |
|
value: 5.638 |
|
- type: precision_at_100 |
|
value: 0.8380000000000001 |
|
- type: precision_at_1000 |
|
value: 0.106 |
|
- type: precision_at_3 |
|
value: 13.974 |
|
- type: precision_at_5 |
|
value: 9.401 |
|
- type: recall_at_1 |
|
value: 26.366 |
|
- type: recall_at_10 |
|
value: 49.353 |
|
- type: recall_at_100 |
|
value: 71.194 |
|
- type: recall_at_1000 |
|
value: 87.842 |
|
- type: recall_at_3 |
|
value: 37.829 |
|
- type: recall_at_5 |
|
value: 41.976 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: BeIR/cqadupstack |
|
name: MTEB CQADupstackMathematicaRetrieval |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 16.634 |
|
- type: map_at_10 |
|
value: 23.271 |
|
- type: map_at_100 |
|
value: 24.366 |
|
- type: map_at_1000 |
|
value: 24.484 |
|
- type: map_at_3 |
|
value: 21.075 |
|
- type: map_at_5 |
|
value: 22.364 |
|
- type: mrr_at_1 |
|
value: 20.522000000000002 |
|
- type: mrr_at_10 |
|
value: 27.735 |
|
- type: mrr_at_100 |
|
value: 28.691 |
|
- type: mrr_at_1000 |
|
value: 28.762999999999998 |
|
- type: mrr_at_3 |
|
value: 25.518 |
|
- type: mrr_at_5 |
|
value: 26.762000000000004 |
|
- type: ndcg_at_1 |
|
value: 20.522000000000002 |
|
- type: ndcg_at_10 |
|
value: 27.791 |
|
- type: ndcg_at_100 |
|
value: 33.101 |
|
- type: ndcg_at_1000 |
|
value: 36.075 |
|
- type: ndcg_at_3 |
|
value: 23.74 |
|
- type: ndcg_at_5 |
|
value: 25.691000000000003 |
|
- type: precision_at_1 |
|
value: 20.522000000000002 |
|
- type: precision_at_10 |
|
value: 4.963 |
|
- type: precision_at_100 |
|
value: 0.873 |
|
- type: precision_at_1000 |
|
value: 0.128 |
|
- type: precision_at_3 |
|
value: 11.111 |
|
- type: precision_at_5 |
|
value: 8.01 |
|
- type: recall_at_1 |
|
value: 16.634 |
|
- type: recall_at_10 |
|
value: 37.498 |
|
- type: recall_at_100 |
|
value: 60.598 |
|
- type: recall_at_1000 |
|
value: 81.828 |
|
- type: recall_at_3 |
|
value: 26.136 |
|
- type: recall_at_5 |
|
value: 31.211 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: BeIR/cqadupstack |
|
name: MTEB CQADupstackPhysicsRetrieval |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 28.200999999999997 |
|
- type: map_at_10 |
|
value: 37.619 |
|
- type: map_at_100 |
|
value: 38.834999999999994 |
|
- type: map_at_1000 |
|
value: 38.951 |
|
- type: map_at_3 |
|
value: 35.119 |
|
- type: map_at_5 |
|
value: 36.559999999999995 |
|
- type: mrr_at_1 |
|
value: 33.782000000000004 |
|
- type: mrr_at_10 |
|
value: 43.033 |
|
- type: mrr_at_100 |
|
value: 43.761 |
|
- type: mrr_at_1000 |
|
value: 43.818 |
|
- type: mrr_at_3 |
|
value: 40.727999999999994 |
|
- type: mrr_at_5 |
|
value: 42.129 |
|
- type: ndcg_at_1 |
|
value: 33.782000000000004 |
|
- type: ndcg_at_10 |
|
value: 43.178 |
|
- type: ndcg_at_100 |
|
value: 48.27 |
|
- type: ndcg_at_1000 |
|
value: 50.559 |
|
- type: ndcg_at_3 |
|
value: 38.974 |
|
- type: ndcg_at_5 |
|
value: 41.019 |
|
- type: precision_at_1 |
|
value: 33.782000000000004 |
|
- type: precision_at_10 |
|
value: 7.575 |
|
- type: precision_at_100 |
|
value: 1.1820000000000002 |
|
- type: precision_at_1000 |
|
value: 0.154 |
|
- type: precision_at_3 |
|
value: 18.223 |
|
- type: precision_at_5 |
|
value: 12.742999999999999 |
|
- type: recall_at_1 |
|
value: 28.200999999999997 |
|
- type: recall_at_10 |
|
value: 54.089 |
|
- type: recall_at_100 |
|
value: 75.57000000000001 |
|
- type: recall_at_1000 |
|
value: 90.827 |
|
- type: recall_at_3 |
|
value: 42.435 |
|
- type: recall_at_5 |
|
value: 47.652 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: BeIR/cqadupstack |
|
name: MTEB CQADupstackProgrammersRetrieval |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 25.313000000000002 |
|
- type: map_at_10 |
|
value: 34.329 |
|
- type: map_at_100 |
|
value: 35.445 |
|
- type: map_at_1000 |
|
value: 35.556 |
|
- type: map_at_3 |
|
value: 31.659 |
|
- type: map_at_5 |
|
value: 32.981 |
|
- type: mrr_at_1 |
|
value: 30.822 |
|
- type: mrr_at_10 |
|
value: 39.084 |
|
- type: mrr_at_100 |
|
value: 39.97 |
|
- type: mrr_at_1000 |
|
value: 40.025 |
|
- type: mrr_at_3 |
|
value: 36.815 |
|
- type: mrr_at_5 |
|
value: 38.002 |
|
- type: ndcg_at_1 |
|
value: 30.822 |
|
- type: ndcg_at_10 |
|
value: 39.512 |
|
- type: ndcg_at_100 |
|
value: 44.925 |
|
- type: ndcg_at_1000 |
|
value: 47.274 |
|
- type: ndcg_at_3 |
|
value: 35.055 |
|
- type: ndcg_at_5 |
|
value: 36.788 |
|
- type: precision_at_1 |
|
value: 30.822 |
|
- type: precision_at_10 |
|
value: 7.1 |
|
- type: precision_at_100 |
|
value: 1.15 |
|
- type: precision_at_1000 |
|
value: 0.151 |
|
- type: precision_at_3 |
|
value: 16.476 |
|
- type: precision_at_5 |
|
value: 11.461 |
|
- type: recall_at_1 |
|
value: 25.313000000000002 |
|
- type: recall_at_10 |
|
value: 50.178 |
|
- type: recall_at_100 |
|
value: 74.312 |
|
- type: recall_at_1000 |
|
value: 90.50200000000001 |
|
- type: recall_at_3 |
|
value: 37.626 |
|
- type: recall_at_5 |
|
value: 42.34 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: BeIR/cqadupstack |
|
name: MTEB CQADupstackRetrieval |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 25.502250000000004 |
|
- type: map_at_10 |
|
value: 33.655166666666666 |
|
- type: map_at_100 |
|
value: 34.72833333333333 |
|
- type: map_at_1000 |
|
value: 34.84375 |
|
- type: map_at_3 |
|
value: 31.253999999999998 |
|
- type: map_at_5 |
|
value: 32.55075 |
|
- type: mrr_at_1 |
|
value: 29.91975 |
|
- type: mrr_at_10 |
|
value: 37.65441666666667 |
|
- type: mrr_at_100 |
|
value: 38.464416666666665 |
|
- type: mrr_at_1000 |
|
value: 38.52591666666667 |
|
- type: mrr_at_3 |
|
value: 35.57858333333333 |
|
- type: mrr_at_5 |
|
value: 36.71083333333333 |
|
- type: ndcg_at_1 |
|
value: 29.91975 |
|
- type: ndcg_at_10 |
|
value: 38.47316666666667 |
|
- type: ndcg_at_100 |
|
value: 43.256416666666674 |
|
- type: ndcg_at_1000 |
|
value: 45.70658333333333 |
|
- type: ndcg_at_3 |
|
value: 34.350833333333334 |
|
- type: ndcg_at_5 |
|
value: 36.184583333333336 |
|
- type: precision_at_1 |
|
value: 29.91975 |
|
- type: precision_at_10 |
|
value: 6.5489999999999995 |
|
- type: precision_at_100 |
|
value: 1.0553333333333332 |
|
- type: precision_at_1000 |
|
value: 0.14516666666666667 |
|
- type: precision_at_3 |
|
value: 15.579083333333333 |
|
- type: precision_at_5 |
|
value: 10.851083333333332 |
|
- type: recall_at_1 |
|
value: 25.502250000000004 |
|
- type: recall_at_10 |
|
value: 48.7965 |
|
- type: recall_at_100 |
|
value: 69.93500000000002 |
|
- type: recall_at_1000 |
|
value: 87.17049999999999 |
|
- type: recall_at_3 |
|
value: 37.20433333333333 |
|
- type: recall_at_5 |
|
value: 42.00783333333333 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: BeIR/cqadupstack |
|
name: MTEB CQADupstackStatsRetrieval |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 23.777 |
|
- type: map_at_10 |
|
value: 29.932 |
|
- type: map_at_100 |
|
value: 30.778 |
|
- type: map_at_1000 |
|
value: 30.879 |
|
- type: map_at_3 |
|
value: 27.898 |
|
- type: map_at_5 |
|
value: 29.086000000000002 |
|
- type: mrr_at_1 |
|
value: 26.227 |
|
- type: mrr_at_10 |
|
value: 32.443 |
|
- type: mrr_at_100 |
|
value: 33.212 |
|
- type: mrr_at_1000 |
|
value: 33.29 |
|
- type: mrr_at_3 |
|
value: 30.419 |
|
- type: mrr_at_5 |
|
value: 31.616 |
|
- type: ndcg_at_1 |
|
value: 26.227 |
|
- type: ndcg_at_10 |
|
value: 33.774 |
|
- type: ndcg_at_100 |
|
value: 37.917 |
|
- type: ndcg_at_1000 |
|
value: 40.557 |
|
- type: ndcg_at_3 |
|
value: 29.875 |
|
- type: ndcg_at_5 |
|
value: 31.845000000000002 |
|
- type: precision_at_1 |
|
value: 26.227 |
|
- type: precision_at_10 |
|
value: 5.153 |
|
- type: precision_at_100 |
|
value: 0.784 |
|
- type: precision_at_1000 |
|
value: 0.108 |
|
- type: precision_at_3 |
|
value: 12.423 |
|
- type: precision_at_5 |
|
value: 8.773 |
|
- type: recall_at_1 |
|
value: 23.777 |
|
- type: recall_at_10 |
|
value: 43.142 |
|
- type: recall_at_100 |
|
value: 61.68900000000001 |
|
- type: recall_at_1000 |
|
value: 81.37100000000001 |
|
- type: recall_at_3 |
|
value: 32.582 |
|
- type: recall_at_5 |
|
value: 37.403 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: BeIR/cqadupstack |
|
name: MTEB CQADupstackTexRetrieval |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 16.659 |
|
- type: map_at_10 |
|
value: 22.926 |
|
- type: map_at_100 |
|
value: 23.837 |
|
- type: map_at_1000 |
|
value: 23.953 |
|
- type: map_at_3 |
|
value: 21.029999999999998 |
|
- type: map_at_5 |
|
value: 22.019 |
|
- type: mrr_at_1 |
|
value: 19.649 |
|
- type: mrr_at_10 |
|
value: 26.32 |
|
- type: mrr_at_100 |
|
value: 27.143 |
|
- type: mrr_at_1000 |
|
value: 27.222 |
|
- type: mrr_at_3 |
|
value: 24.484 |
|
- type: mrr_at_5 |
|
value: 25.468000000000004 |
|
- type: ndcg_at_1 |
|
value: 19.649 |
|
- type: ndcg_at_10 |
|
value: 26.941 |
|
- type: ndcg_at_100 |
|
value: 31.522 |
|
- type: ndcg_at_1000 |
|
value: 34.538999999999994 |
|
- type: ndcg_at_3 |
|
value: 23.419999999999998 |
|
- type: ndcg_at_5 |
|
value: 24.927 |
|
- type: precision_at_1 |
|
value: 19.649 |
|
- type: precision_at_10 |
|
value: 4.7010000000000005 |
|
- type: precision_at_100 |
|
value: 0.8130000000000001 |
|
- type: precision_at_1000 |
|
value: 0.124 |
|
- type: precision_at_3 |
|
value: 10.735999999999999 |
|
- type: precision_at_5 |
|
value: 7.591 |
|
- type: recall_at_1 |
|
value: 16.659 |
|
- type: recall_at_10 |
|
value: 35.721000000000004 |
|
- type: recall_at_100 |
|
value: 56.43 |
|
- type: recall_at_1000 |
|
value: 78.464 |
|
- type: recall_at_3 |
|
value: 25.878 |
|
- type: recall_at_5 |
|
value: 29.731999999999996 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: BeIR/cqadupstack |
|
name: MTEB CQADupstackUnixRetrieval |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 24.309 |
|
- type: map_at_10 |
|
value: 31.990000000000002 |
|
- type: map_at_100 |
|
value: 32.895 |
|
- type: map_at_1000 |
|
value: 33.0 |
|
- type: map_at_3 |
|
value: 29.848999999999997 |
|
- type: map_at_5 |
|
value: 30.942999999999998 |
|
- type: mrr_at_1 |
|
value: 28.638 |
|
- type: mrr_at_10 |
|
value: 36.036 |
|
- type: mrr_at_100 |
|
value: 36.787 |
|
- type: mrr_at_1000 |
|
value: 36.855 |
|
- type: mrr_at_3 |
|
value: 34.08 |
|
- type: mrr_at_5 |
|
value: 35.073 |
|
- type: ndcg_at_1 |
|
value: 28.638 |
|
- type: ndcg_at_10 |
|
value: 36.588 |
|
- type: ndcg_at_100 |
|
value: 41.152 |
|
- type: ndcg_at_1000 |
|
value: 43.769999999999996 |
|
- type: ndcg_at_3 |
|
value: 32.632 |
|
- type: ndcg_at_5 |
|
value: 34.249 |
|
- type: precision_at_1 |
|
value: 28.638 |
|
- type: precision_at_10 |
|
value: 5.942 |
|
- type: precision_at_100 |
|
value: 0.9249999999999999 |
|
- type: precision_at_1000 |
|
value: 0.127 |
|
- type: precision_at_3 |
|
value: 14.582999999999998 |
|
- type: precision_at_5 |
|
value: 9.944 |
|
- type: recall_at_1 |
|
value: 24.309 |
|
- type: recall_at_10 |
|
value: 46.725 |
|
- type: recall_at_100 |
|
value: 67.11 |
|
- type: recall_at_1000 |
|
value: 85.91499999999999 |
|
- type: recall_at_3 |
|
value: 35.72 |
|
- type: recall_at_5 |
|
value: 39.854 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: BeIR/cqadupstack |
|
name: MTEB CQADupstackWebmastersRetrieval |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 22.997999999999998 |
|
- type: map_at_10 |
|
value: 30.564000000000004 |
|
- type: map_at_100 |
|
value: 32.06 |
|
- type: map_at_1000 |
|
value: 32.282 |
|
- type: map_at_3 |
|
value: 28.12 |
|
- type: map_at_5 |
|
value: 29.395 |
|
- type: mrr_at_1 |
|
value: 27.075 |
|
- type: mrr_at_10 |
|
value: 34.510999999999996 |
|
- type: mrr_at_100 |
|
value: 35.549 |
|
- type: mrr_at_1000 |
|
value: 35.616 |
|
- type: mrr_at_3 |
|
value: 32.444 |
|
- type: mrr_at_5 |
|
value: 33.589999999999996 |
|
- type: ndcg_at_1 |
|
value: 27.075 |
|
- type: ndcg_at_10 |
|
value: 35.582 |
|
- type: ndcg_at_100 |
|
value: 41.308 |
|
- type: ndcg_at_1000 |
|
value: 44.385999999999996 |
|
- type: ndcg_at_3 |
|
value: 31.467 |
|
- type: ndcg_at_5 |
|
value: 33.189 |
|
- type: precision_at_1 |
|
value: 27.075 |
|
- type: precision_at_10 |
|
value: 6.68 |
|
- type: precision_at_100 |
|
value: 1.427 |
|
- type: precision_at_1000 |
|
value: 0.231 |
|
- type: precision_at_3 |
|
value: 14.625 |
|
- type: precision_at_5 |
|
value: 10.356 |
|
- type: recall_at_1 |
|
value: 22.997999999999998 |
|
- type: recall_at_10 |
|
value: 45.196 |
|
- type: recall_at_100 |
|
value: 70.319 |
|
- type: recall_at_1000 |
|
value: 90.766 |
|
- type: recall_at_3 |
|
value: 33.487 |
|
- type: recall_at_5 |
|
value: 38.297 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: BeIR/cqadupstack |
|
name: MTEB CQADupstackWordpressRetrieval |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 20.961 |
|
- type: map_at_10 |
|
value: 27.58 |
|
- type: map_at_100 |
|
value: 28.542 |
|
- type: map_at_1000 |
|
value: 28.644 |
|
- type: map_at_3 |
|
value: 25.541000000000004 |
|
- type: map_at_5 |
|
value: 26.589000000000002 |
|
- type: mrr_at_1 |
|
value: 22.551 |
|
- type: mrr_at_10 |
|
value: 29.298999999999996 |
|
- type: mrr_at_100 |
|
value: 30.17 |
|
- type: mrr_at_1000 |
|
value: 30.248 |
|
- type: mrr_at_3 |
|
value: 27.542 |
|
- type: mrr_at_5 |
|
value: 28.392 |
|
- type: ndcg_at_1 |
|
value: 22.551 |
|
- type: ndcg_at_10 |
|
value: 31.55 |
|
- type: ndcg_at_100 |
|
value: 36.295 |
|
- type: ndcg_at_1000 |
|
value: 38.964 |
|
- type: ndcg_at_3 |
|
value: 27.663 |
|
- type: ndcg_at_5 |
|
value: 29.321 |
|
- type: precision_at_1 |
|
value: 22.551 |
|
- type: precision_at_10 |
|
value: 4.88 |
|
- type: precision_at_100 |
|
value: 0.7779999999999999 |
|
- type: precision_at_1000 |
|
value: 0.11199999999999999 |
|
- type: precision_at_3 |
|
value: 11.83 |
|
- type: precision_at_5 |
|
value: 8.17 |
|
- type: recall_at_1 |
|
value: 20.961 |
|
- type: recall_at_10 |
|
value: 42.07 |
|
- type: recall_at_100 |
|
value: 63.982000000000006 |
|
- type: recall_at_1000 |
|
value: 83.889 |
|
- type: recall_at_3 |
|
value: 31.445 |
|
- type: recall_at_5 |
|
value: 35.410000000000004 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: climate-fever |
|
name: MTEB ClimateFEVER |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 11.314 |
|
- type: map_at_10 |
|
value: 18.983 |
|
- type: map_at_100 |
|
value: 20.851 |
|
- type: map_at_1000 |
|
value: 21.066 |
|
- type: map_at_3 |
|
value: 16.014 |
|
- type: map_at_5 |
|
value: 17.569000000000003 |
|
- type: mrr_at_1 |
|
value: 25.277 |
|
- type: mrr_at_10 |
|
value: 36.657000000000004 |
|
- type: mrr_at_100 |
|
value: 37.646 |
|
- type: mrr_at_1000 |
|
value: 37.686 |
|
- type: mrr_at_3 |
|
value: 33.17 |
|
- type: mrr_at_5 |
|
value: 35.232 |
|
- type: ndcg_at_1 |
|
value: 25.277 |
|
- type: ndcg_at_10 |
|
value: 27.011000000000003 |
|
- type: ndcg_at_100 |
|
value: 34.418 |
|
- type: ndcg_at_1000 |
|
value: 38.089 |
|
- type: ndcg_at_3 |
|
value: 22.026 |
|
- type: ndcg_at_5 |
|
value: 23.866 |
|
- type: precision_at_1 |
|
value: 25.277 |
|
- type: precision_at_10 |
|
value: 8.397 |
|
- type: precision_at_100 |
|
value: 1.6320000000000001 |
|
- type: precision_at_1000 |
|
value: 0.22999999999999998 |
|
- type: precision_at_3 |
|
value: 16.156000000000002 |
|
- type: precision_at_5 |
|
value: 12.612000000000002 |
|
- type: recall_at_1 |
|
value: 11.314 |
|
- type: recall_at_10 |
|
value: 32.474 |
|
- type: recall_at_100 |
|
value: 57.926 |
|
- type: recall_at_1000 |
|
value: 78.387 |
|
- type: recall_at_3 |
|
value: 20.415 |
|
- type: recall_at_5 |
|
value: 25.407999999999998 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: dbpedia-entity |
|
name: MTEB DBPedia |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 8.835999999999999 |
|
- type: map_at_10 |
|
value: 19.73 |
|
- type: map_at_100 |
|
value: 28.011000000000003 |
|
- type: map_at_1000 |
|
value: 29.519000000000002 |
|
- type: map_at_3 |
|
value: 14.249 |
|
- type: map_at_5 |
|
value: 16.472 |
|
- type: mrr_at_1 |
|
value: 67.0 |
|
- type: mrr_at_10 |
|
value: 74.632 |
|
- type: mrr_at_100 |
|
value: 74.97200000000001 |
|
- type: mrr_at_1000 |
|
value: 74.97500000000001 |
|
- type: mrr_at_3 |
|
value: 72.958 |
|
- type: mrr_at_5 |
|
value: 73.908 |
|
- type: ndcg_at_1 |
|
value: 55.875 |
|
- type: ndcg_at_10 |
|
value: 42.071999999999996 |
|
- type: ndcg_at_100 |
|
value: 46.091 |
|
- type: ndcg_at_1000 |
|
value: 52.737 |
|
- type: ndcg_at_3 |
|
value: 47.079 |
|
- type: ndcg_at_5 |
|
value: 43.788 |
|
- type: precision_at_1 |
|
value: 67.0 |
|
- type: precision_at_10 |
|
value: 33.45 |
|
- type: precision_at_100 |
|
value: 10.633 |
|
- type: precision_at_1000 |
|
value: 2.067 |
|
- type: precision_at_3 |
|
value: 49.583 |
|
- type: precision_at_5 |
|
value: 41.25 |
|
- type: recall_at_1 |
|
value: 8.835999999999999 |
|
- type: recall_at_10 |
|
value: 24.872 |
|
- type: recall_at_100 |
|
value: 51.427 |
|
- type: recall_at_1000 |
|
value: 72.17099999999999 |
|
- type: recall_at_3 |
|
value: 15.631999999999998 |
|
- type: recall_at_5 |
|
value: 18.956 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/emotion |
|
name: MTEB EmotionClassification |
|
config: default |
|
split: test |
|
revision: 4f58c6b202a23cf9a4da393831edf4f9183cad37 |
|
metrics: |
|
- type: accuracy |
|
value: 48.80500000000001 |
|
- type: f1 |
|
value: 43.91955883597831 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: fever |
|
name: MTEB FEVER |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 61.480999999999995 |
|
- type: map_at_10 |
|
value: 72.162 |
|
- type: map_at_100 |
|
value: 72.487 |
|
- type: map_at_1000 |
|
value: 72.504 |
|
- type: map_at_3 |
|
value: 70.354 |
|
- type: map_at_5 |
|
value: 71.509 |
|
- type: mrr_at_1 |
|
value: 66.262 |
|
- type: mrr_at_10 |
|
value: 76.605 |
|
- type: mrr_at_100 |
|
value: 76.833 |
|
- type: mrr_at_1000 |
|
value: 76.839 |
|
- type: mrr_at_3 |
|
value: 74.977 |
|
- type: mrr_at_5 |
|
value: 76.06 |
|
- type: ndcg_at_1 |
|
value: 66.262 |
|
- type: ndcg_at_10 |
|
value: 77.323 |
|
- type: ndcg_at_100 |
|
value: 78.685 |
|
- type: ndcg_at_1000 |
|
value: 79.032 |
|
- type: ndcg_at_3 |
|
value: 74.015 |
|
- type: ndcg_at_5 |
|
value: 75.916 |
|
- type: precision_at_1 |
|
value: 66.262 |
|
- type: precision_at_10 |
|
value: 9.757 |
|
- type: precision_at_100 |
|
value: 1.059 |
|
- type: precision_at_1000 |
|
value: 0.11100000000000002 |
|
- type: precision_at_3 |
|
value: 29.032999999999998 |
|
- type: precision_at_5 |
|
value: 18.5 |
|
- type: recall_at_1 |
|
value: 61.480999999999995 |
|
- type: recall_at_10 |
|
value: 88.878 |
|
- type: recall_at_100 |
|
value: 94.719 |
|
- type: recall_at_1000 |
|
value: 97.066 |
|
- type: recall_at_3 |
|
value: 79.95100000000001 |
|
- type: recall_at_5 |
|
value: 84.691 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: fiqa |
|
name: MTEB FiQA2018 |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 19.925 |
|
- type: map_at_10 |
|
value: 31.621 |
|
- type: map_at_100 |
|
value: 33.282000000000004 |
|
- type: map_at_1000 |
|
value: 33.455 |
|
- type: map_at_3 |
|
value: 27.504 |
|
- type: map_at_5 |
|
value: 29.921999999999997 |
|
- type: mrr_at_1 |
|
value: 39.660000000000004 |
|
- type: mrr_at_10 |
|
value: 47.366 |
|
- type: mrr_at_100 |
|
value: 48.179 |
|
- type: mrr_at_1000 |
|
value: 48.219 |
|
- type: mrr_at_3 |
|
value: 45.062000000000005 |
|
- type: mrr_at_5 |
|
value: 46.404 |
|
- type: ndcg_at_1 |
|
value: 39.660000000000004 |
|
- type: ndcg_at_10 |
|
value: 39.019 |
|
- type: ndcg_at_100 |
|
value: 45.286 |
|
- type: ndcg_at_1000 |
|
value: 48.370000000000005 |
|
- type: ndcg_at_3 |
|
value: 35.421 |
|
- type: ndcg_at_5 |
|
value: 36.767 |
|
- type: precision_at_1 |
|
value: 39.660000000000004 |
|
- type: precision_at_10 |
|
value: 10.494 |
|
- type: precision_at_100 |
|
value: 1.7069999999999999 |
|
- type: precision_at_1000 |
|
value: 0.22599999999999998 |
|
- type: precision_at_3 |
|
value: 23.200000000000003 |
|
- type: precision_at_5 |
|
value: 17.253 |
|
- type: recall_at_1 |
|
value: 19.925 |
|
- type: recall_at_10 |
|
value: 45.48 |
|
- type: recall_at_100 |
|
value: 68.585 |
|
- type: recall_at_1000 |
|
value: 87.128 |
|
- type: recall_at_3 |
|
value: 31.913000000000004 |
|
- type: recall_at_5 |
|
value: 38.107 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: hotpotqa |
|
name: MTEB HotpotQA |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 37.961 |
|
- type: map_at_10 |
|
value: 55.010000000000005 |
|
- type: map_at_100 |
|
value: 55.896 |
|
- type: map_at_1000 |
|
value: 55.962 |
|
- type: map_at_3 |
|
value: 52.03 |
|
- type: map_at_5 |
|
value: 53.866 |
|
- type: mrr_at_1 |
|
value: 75.922 |
|
- type: mrr_at_10 |
|
value: 81.655 |
|
- type: mrr_at_100 |
|
value: 81.879 |
|
- type: mrr_at_1000 |
|
value: 81.889 |
|
- type: mrr_at_3 |
|
value: 80.657 |
|
- type: mrr_at_5 |
|
value: 81.291 |
|
- type: ndcg_at_1 |
|
value: 75.922 |
|
- type: ndcg_at_10 |
|
value: 64.119 |
|
- type: ndcg_at_100 |
|
value: 67.25 |
|
- type: ndcg_at_1000 |
|
value: 68.55499999999999 |
|
- type: ndcg_at_3 |
|
value: 59.792 |
|
- type: ndcg_at_5 |
|
value: 62.165000000000006 |
|
- type: precision_at_1 |
|
value: 75.922 |
|
- type: precision_at_10 |
|
value: 13.155 |
|
- type: precision_at_100 |
|
value: 1.5599999999999998 |
|
- type: precision_at_1000 |
|
value: 0.173 |
|
- type: precision_at_3 |
|
value: 37.461 |
|
- type: precision_at_5 |
|
value: 24.351 |
|
- type: recall_at_1 |
|
value: 37.961 |
|
- type: recall_at_10 |
|
value: 65.77300000000001 |
|
- type: recall_at_100 |
|
value: 78.015 |
|
- type: recall_at_1000 |
|
value: 86.685 |
|
- type: recall_at_3 |
|
value: 56.192 |
|
- type: recall_at_5 |
|
value: 60.878 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/imdb |
|
name: MTEB ImdbClassification |
|
config: default |
|
split: test |
|
revision: 3d86128a09e091d6018b6d26cad27f2739fc2db7 |
|
metrics: |
|
- type: accuracy |
|
value: 83.7804 |
|
- type: ap |
|
value: 78.89508987851809 |
|
- type: f1 |
|
value: 83.72392373438922 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: msmarco |
|
name: MTEB MSMARCO |
|
config: default |
|
split: dev |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 23.807000000000002 |
|
- type: map_at_10 |
|
value: 36.411 |
|
- type: map_at_100 |
|
value: 37.574000000000005 |
|
- type: map_at_1000 |
|
value: 37.618 |
|
- type: map_at_3 |
|
value: 32.653 |
|
- type: map_at_5 |
|
value: 34.902 |
|
- type: mrr_at_1 |
|
value: 24.499000000000002 |
|
- type: mrr_at_10 |
|
value: 37.045 |
|
- type: mrr_at_100 |
|
value: 38.135999999999996 |
|
- type: mrr_at_1000 |
|
value: 38.175 |
|
- type: mrr_at_3 |
|
value: 33.326 |
|
- type: mrr_at_5 |
|
value: 35.561 |
|
- type: ndcg_at_1 |
|
value: 24.512999999999998 |
|
- type: ndcg_at_10 |
|
value: 43.328 |
|
- type: ndcg_at_100 |
|
value: 48.779 |
|
- type: ndcg_at_1000 |
|
value: 49.897999999999996 |
|
- type: ndcg_at_3 |
|
value: 35.713 |
|
- type: ndcg_at_5 |
|
value: 39.729 |
|
- type: precision_at_1 |
|
value: 24.512999999999998 |
|
- type: precision_at_10 |
|
value: 6.7379999999999995 |
|
- type: precision_at_100 |
|
value: 0.9450000000000001 |
|
- type: precision_at_1000 |
|
value: 0.104 |
|
- type: precision_at_3 |
|
value: 15.196000000000002 |
|
- type: precision_at_5 |
|
value: 11.158 |
|
- type: recall_at_1 |
|
value: 23.807000000000002 |
|
- type: recall_at_10 |
|
value: 64.488 |
|
- type: recall_at_100 |
|
value: 89.386 |
|
- type: recall_at_1000 |
|
value: 97.968 |
|
- type: recall_at_3 |
|
value: 43.891000000000005 |
|
- type: recall_at_5 |
|
value: 53.535 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/mtop_domain |
|
name: MTEB MTOPDomainClassification (en) |
|
config: en |
|
split: test |
|
revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf |
|
metrics: |
|
- type: accuracy |
|
value: 93.47013223894209 |
|
- type: f1 |
|
value: 93.15020887152107 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/mtop_intent |
|
name: MTEB MTOPIntentClassification (en) |
|
config: en |
|
split: test |
|
revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba |
|
metrics: |
|
- type: accuracy |
|
value: 75.27131782945737 |
|
- type: f1 |
|
value: 58.45703758149779 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/amazon_massive_intent |
|
name: MTEB MassiveIntentClassification (en) |
|
config: en |
|
split: test |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
metrics: |
|
- type: accuracy |
|
value: 72.76395427034298 |
|
- type: f1 |
|
value: 70.6084399610629 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/amazon_massive_scenario |
|
name: MTEB MassiveScenarioClassification (en) |
|
config: en |
|
split: test |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
metrics: |
|
- type: accuracy |
|
value: 76.69804976462676 |
|
- type: f1 |
|
value: 76.61599181962723 |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: mteb/medrxiv-clustering-p2p |
|
name: MTEB MedrxivClusteringP2P |
|
config: default |
|
split: test |
|
revision: e7a26af6f3ae46b30dde8737f02c07b1505bcc73 |
|
metrics: |
|
- type: v_measure |
|
value: 32.7253797676744 |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: mteb/medrxiv-clustering-s2s |
|
name: MTEB MedrxivClusteringS2S |
|
config: default |
|
split: test |
|
revision: 35191c8c0dca72d8ff3efcd72aa802307d469663 |
|
metrics: |
|
- type: v_measure |
|
value: 30.547731924629424 |
|
- task: |
|
type: Reranking |
|
dataset: |
|
type: mteb/mind_small |
|
name: MTEB MindSmallReranking |
|
config: default |
|
split: test |
|
revision: 3bdac13927fdc888b903db93b2ffdbd90b295a69 |
|
metrics: |
|
- type: map |
|
value: 31.286918745183772 |
|
- type: mrr |
|
value: 32.47449315230336 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: nfcorpus |
|
name: MTEB NFCorpus |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 5.894 |
|
- type: map_at_10 |
|
value: 13.405000000000001 |
|
- type: map_at_100 |
|
value: 16.586000000000002 |
|
- type: map_at_1000 |
|
value: 17.919 |
|
- type: map_at_3 |
|
value: 10.066 |
|
- type: map_at_5 |
|
value: 11.679 |
|
- type: mrr_at_1 |
|
value: 45.201 |
|
- type: mrr_at_10 |
|
value: 54.018 |
|
- type: mrr_at_100 |
|
value: 54.581999999999994 |
|
- type: mrr_at_1000 |
|
value: 54.623 |
|
- type: mrr_at_3 |
|
value: 51.6 |
|
- type: mrr_at_5 |
|
value: 53.473000000000006 |
|
- type: ndcg_at_1 |
|
value: 43.189 |
|
- type: ndcg_at_10 |
|
value: 35.306 |
|
- type: ndcg_at_100 |
|
value: 31.505 |
|
- type: ndcg_at_1000 |
|
value: 39.991 |
|
- type: ndcg_at_3 |
|
value: 41.108 |
|
- type: ndcg_at_5 |
|
value: 39.039 |
|
- type: precision_at_1 |
|
value: 44.582 |
|
- type: precision_at_10 |
|
value: 26.161 |
|
- type: precision_at_100 |
|
value: 7.867 |
|
- type: precision_at_1000 |
|
value: 2.043 |
|
- type: precision_at_3 |
|
value: 39.112 |
|
- type: precision_at_5 |
|
value: 34.18 |
|
- type: recall_at_1 |
|
value: 5.894 |
|
- type: recall_at_10 |
|
value: 16.88 |
|
- type: recall_at_100 |
|
value: 30.671 |
|
- type: recall_at_1000 |
|
value: 61.42999999999999 |
|
- type: recall_at_3 |
|
value: 11.022 |
|
- type: recall_at_5 |
|
value: 13.697999999999999 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: nq |
|
name: MTEB NQ |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 38.440999999999995 |
|
- type: map_at_10 |
|
value: 54.187 |
|
- type: map_at_100 |
|
value: 55.022000000000006 |
|
- type: map_at_1000 |
|
value: 55.044000000000004 |
|
- type: map_at_3 |
|
value: 50.174 |
|
- type: map_at_5 |
|
value: 52.61 |
|
- type: mrr_at_1 |
|
value: 42.903000000000006 |
|
- type: mrr_at_10 |
|
value: 56.699 |
|
- type: mrr_at_100 |
|
value: 57.31 |
|
- type: mrr_at_1000 |
|
value: 57.325 |
|
- type: mrr_at_3 |
|
value: 53.63099999999999 |
|
- type: mrr_at_5 |
|
value: 55.596000000000004 |
|
- type: ndcg_at_1 |
|
value: 42.903000000000006 |
|
- type: ndcg_at_10 |
|
value: 61.434 |
|
- type: ndcg_at_100 |
|
value: 64.852 |
|
- type: ndcg_at_1000 |
|
value: 65.36 |
|
- type: ndcg_at_3 |
|
value: 54.193000000000005 |
|
- type: ndcg_at_5 |
|
value: 58.15 |
|
- type: precision_at_1 |
|
value: 42.903000000000006 |
|
- type: precision_at_10 |
|
value: 9.623 |
|
- type: precision_at_100 |
|
value: 1.1560000000000001 |
|
- type: precision_at_1000 |
|
value: 0.12 |
|
- type: precision_at_3 |
|
value: 24.034 |
|
- type: precision_at_5 |
|
value: 16.779 |
|
- type: recall_at_1 |
|
value: 38.440999999999995 |
|
- type: recall_at_10 |
|
value: 80.72399999999999 |
|
- type: recall_at_100 |
|
value: 95.329 |
|
- type: recall_at_1000 |
|
value: 99.059 |
|
- type: recall_at_3 |
|
value: 62.343 |
|
- type: recall_at_5 |
|
value: 71.304 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: quora |
|
name: MTEB QuoraRetrieval |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 70.85000000000001 |
|
- type: map_at_10 |
|
value: 84.54 |
|
- type: map_at_100 |
|
value: 85.148 |
|
- type: map_at_1000 |
|
value: 85.168 |
|
- type: map_at_3 |
|
value: 81.631 |
|
- type: map_at_5 |
|
value: 83.45700000000001 |
|
- type: mrr_at_1 |
|
value: 81.58 |
|
- type: mrr_at_10 |
|
value: 87.732 |
|
- type: mrr_at_100 |
|
value: 87.825 |
|
- type: mrr_at_1000 |
|
value: 87.82600000000001 |
|
- type: mrr_at_3 |
|
value: 86.783 |
|
- type: mrr_at_5 |
|
value: 87.437 |
|
- type: ndcg_at_1 |
|
value: 81.56 |
|
- type: ndcg_at_10 |
|
value: 88.32900000000001 |
|
- type: ndcg_at_100 |
|
value: 89.513 |
|
- type: ndcg_at_1000 |
|
value: 89.63799999999999 |
|
- type: ndcg_at_3 |
|
value: 85.51100000000001 |
|
- type: ndcg_at_5 |
|
value: 87.062 |
|
- type: precision_at_1 |
|
value: 81.56 |
|
- type: precision_at_10 |
|
value: 13.349 |
|
- type: precision_at_100 |
|
value: 1.518 |
|
- type: precision_at_1000 |
|
value: 0.156 |
|
- type: precision_at_3 |
|
value: 37.293 |
|
- type: precision_at_5 |
|
value: 24.502 |
|
- type: recall_at_1 |
|
value: 70.85000000000001 |
|
- type: recall_at_10 |
|
value: 95.351 |
|
- type: recall_at_100 |
|
value: 99.405 |
|
- type: recall_at_1000 |
|
value: 99.958 |
|
- type: recall_at_3 |
|
value: 87.184 |
|
- type: recall_at_5 |
|
value: 91.625 |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: mteb/reddit-clustering |
|
name: MTEB RedditClustering |
|
config: default |
|
split: test |
|
revision: 24640382cdbf8abc73003fb0fa6d111a705499eb |
|
metrics: |
|
- type: v_measure |
|
value: 56.81818576893834 |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: mteb/reddit-clustering-p2p |
|
name: MTEB RedditClusteringP2P |
|
config: default |
|
split: test |
|
revision: 282350215ef01743dc01b456c7f5241fa8937f16 |
|
metrics: |
|
- type: v_measure |
|
value: 61.57033658868022 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: scidocs |
|
name: MTEB SCIDOCS |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 4.468 |
|
- type: map_at_10 |
|
value: 11.109 |
|
- type: map_at_100 |
|
value: 12.921 |
|
- type: map_at_1000 |
|
value: 13.187999999999999 |
|
- type: map_at_3 |
|
value: 8.094999999999999 |
|
- type: map_at_5 |
|
value: 9.664 |
|
- type: mrr_at_1 |
|
value: 22.1 |
|
- type: mrr_at_10 |
|
value: 32.482 |
|
- type: mrr_at_100 |
|
value: 33.558 |
|
- type: mrr_at_1000 |
|
value: 33.623999999999995 |
|
- type: mrr_at_3 |
|
value: 29.25 |
|
- type: mrr_at_5 |
|
value: 31.080000000000002 |
|
- type: ndcg_at_1 |
|
value: 22.1 |
|
- type: ndcg_at_10 |
|
value: 18.695999999999998 |
|
- type: ndcg_at_100 |
|
value: 25.749 |
|
- type: ndcg_at_1000 |
|
value: 30.711 |
|
- type: ndcg_at_3 |
|
value: 17.974 |
|
- type: ndcg_at_5 |
|
value: 15.684000000000001 |
|
- type: precision_at_1 |
|
value: 22.1 |
|
- type: precision_at_10 |
|
value: 9.56 |
|
- type: precision_at_100 |
|
value: 1.966 |
|
- type: precision_at_1000 |
|
value: 0.316 |
|
- type: precision_at_3 |
|
value: 16.667 |
|
- type: precision_at_5 |
|
value: 13.68 |
|
- type: recall_at_1 |
|
value: 4.468 |
|
- type: recall_at_10 |
|
value: 19.373 |
|
- type: recall_at_100 |
|
value: 39.853 |
|
- type: recall_at_1000 |
|
value: 64.118 |
|
- type: recall_at_3 |
|
value: 10.133000000000001 |
|
- type: recall_at_5 |
|
value: 13.877999999999998 |
|
- task: |
|
type: STS |
|
dataset: |
|
type: mteb/sickr-sts |
|
name: MTEB SICK-R |
|
config: default |
|
split: test |
|
revision: a6ea5a8cab320b040a23452cc28066d9beae2cee |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 80.11452150923512 |
|
- type: cos_sim_spearman |
|
value: 77.3007421887329 |
|
- type: euclidean_pearson |
|
value: 78.2493681078981 |
|
- type: euclidean_spearman |
|
value: 77.3007432741821 |
|
- type: manhattan_pearson |
|
value: 78.19716818242554 |
|
- type: manhattan_spearman |
|
value: 77.26439033199102 |
|
- task: |
|
type: STS |
|
dataset: |
|
type: mteb/sts12-sts |
|
name: MTEB STS12 |
|
config: default |
|
split: test |
|
revision: a0d554a64d88156834ff5ae9920b964011b16384 |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 82.70293570563516 |
|
- type: cos_sim_spearman |
|
value: 77.97040896962338 |
|
- type: euclidean_pearson |
|
value: 77.98827330337348 |
|
- type: euclidean_spearman |
|
value: 77.9704358930525 |
|
- type: manhattan_pearson |
|
value: 78.06991702207395 |
|
- type: manhattan_spearman |
|
value: 78.03857843100195 |
|
- task: |
|
type: STS |
|
dataset: |
|
type: mteb/sts13-sts |
|
name: MTEB STS13 |
|
config: default |
|
split: test |
|
revision: 7e90230a92c190f1bf69ae9002b8cea547a64cca |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 77.81236960157503 |
|
- type: cos_sim_spearman |
|
value: 79.38801416063187 |
|
- type: euclidean_pearson |
|
value: 79.35003045476847 |
|
- type: euclidean_spearman |
|
value: 79.38797289536578 |
|
- type: manhattan_pearson |
|
value: 79.33155563344724 |
|
- type: manhattan_spearman |
|
value: 79.3858955436803 |
|
- task: |
|
type: STS |
|
dataset: |
|
type: mteb/sts14-sts |
|
name: MTEB STS14 |
|
config: default |
|
split: test |
|
revision: 6031580fec1f6af667f0bd2da0a551cf4f0b2375 |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 77.35604880089507 |
|
- type: cos_sim_spearman |
|
value: 78.17327332594571 |
|
- type: euclidean_pearson |
|
value: 77.30302038209295 |
|
- type: euclidean_spearman |
|
value: 78.17327332594571 |
|
- type: manhattan_pearson |
|
value: 77.31323781935417 |
|
- type: manhattan_spearman |
|
value: 78.20141256686921 |
|
- task: |
|
type: STS |
|
dataset: |
|
type: mteb/sts15-sts |
|
name: MTEB STS15 |
|
config: default |
|
split: test |
|
revision: ae752c7c21bf194d8b67fd573edf7ae58183cbe3 |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 84.29348597583 |
|
- type: cos_sim_spearman |
|
value: 85.50877410088334 |
|
- type: euclidean_pearson |
|
value: 85.22367284169081 |
|
- type: euclidean_spearman |
|
value: 85.50877410088334 |
|
- type: manhattan_pearson |
|
value: 85.17979979737612 |
|
- type: manhattan_spearman |
|
value: 85.46459282596254 |
|
- task: |
|
type: STS |
|
dataset: |
|
type: mteb/sts16-sts |
|
name: MTEB STS16 |
|
config: default |
|
split: test |
|
revision: 4d8694f8f0e0100860b497b999b3dbed754a0513 |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 83.16190794761513 |
|
- type: cos_sim_spearman |
|
value: 84.94610605287254 |
|
- type: euclidean_pearson |
|
value: 83.95587174131369 |
|
- type: euclidean_spearman |
|
value: 84.94610605287254 |
|
- type: manhattan_pearson |
|
value: 83.99025745366798 |
|
- type: manhattan_spearman |
|
value: 84.98123107148953 |
|
- task: |
|
type: STS |
|
dataset: |
|
type: mteb/sts17-crosslingual-sts |
|
name: MTEB STS17 (en-en) |
|
config: en-en |
|
split: test |
|
revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 85.3047190687711 |
|
- type: cos_sim_spearman |
|
value: 85.86642469958113 |
|
- type: euclidean_pearson |
|
value: 86.74377658528041 |
|
- type: euclidean_spearman |
|
value: 85.86642469958113 |
|
- type: manhattan_pearson |
|
value: 86.56967885987439 |
|
- type: manhattan_spearman |
|
value: 85.63613272583275 |
|
- task: |
|
type: STS |
|
dataset: |
|
type: mteb/sts22-crosslingual-sts |
|
name: MTEB STS22 (en) |
|
config: en |
|
split: test |
|
revision: eea2b4fe26a775864c896887d910b76a8098ad3f |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 64.8298932792099 |
|
- type: cos_sim_spearman |
|
value: 64.27626667878636 |
|
- type: euclidean_pearson |
|
value: 66.01603861201576 |
|
- type: euclidean_spearman |
|
value: 64.27626667878636 |
|
- type: manhattan_pearson |
|
value: 66.31232809448106 |
|
- type: manhattan_spearman |
|
value: 64.46190921631559 |
|
- task: |
|
type: STS |
|
dataset: |
|
type: mteb/stsbenchmark-sts |
|
name: MTEB STSBenchmark |
|
config: default |
|
split: test |
|
revision: b0fddb56ed78048fa8b90373c8a3cfc37b684831 |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 82.73696291316243 |
|
- type: cos_sim_spearman |
|
value: 83.41508337893958 |
|
- type: euclidean_pearson |
|
value: 82.8827053024064 |
|
- type: euclidean_spearman |
|
value: 83.41508337893958 |
|
- type: manhattan_pearson |
|
value: 82.85613329045803 |
|
- type: manhattan_spearman |
|
value: 83.40522047443645 |
|
- task: |
|
type: Reranking |
|
dataset: |
|
type: mteb/scidocs-reranking |
|
name: MTEB SciDocsRR |
|
config: default |
|
split: test |
|
revision: d3c5e1fc0b855ab6097bf1cda04dd73947d7caab |
|
metrics: |
|
- type: map |
|
value: 75.51490079179645 |
|
- type: mrr |
|
value: 92.6809655486126 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: scifact |
|
name: MTEB SciFact |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 58.594 |
|
- type: map_at_10 |
|
value: 67.208 |
|
- type: map_at_100 |
|
value: 67.702 |
|
- type: map_at_1000 |
|
value: 67.73 |
|
- type: map_at_3 |
|
value: 64.815 |
|
- type: map_at_5 |
|
value: 65.946 |
|
- type: mrr_at_1 |
|
value: 61.667 |
|
- type: mrr_at_10 |
|
value: 68.52000000000001 |
|
- type: mrr_at_100 |
|
value: 68.888 |
|
- type: mrr_at_1000 |
|
value: 68.911 |
|
- type: mrr_at_3 |
|
value: 66.833 |
|
- type: mrr_at_5 |
|
value: 67.617 |
|
- type: ndcg_at_1 |
|
value: 61.667 |
|
- type: ndcg_at_10 |
|
value: 71.511 |
|
- type: ndcg_at_100 |
|
value: 73.765 |
|
- type: ndcg_at_1000 |
|
value: 74.40299999999999 |
|
- type: ndcg_at_3 |
|
value: 67.411 |
|
- type: ndcg_at_5 |
|
value: 68.88 |
|
- type: precision_at_1 |
|
value: 61.667 |
|
- type: precision_at_10 |
|
value: 9.433 |
|
- type: precision_at_100 |
|
value: 1.0670000000000002 |
|
- type: precision_at_1000 |
|
value: 0.11199999999999999 |
|
- type: precision_at_3 |
|
value: 26.222 |
|
- type: precision_at_5 |
|
value: 16.866999999999997 |
|
- type: recall_at_1 |
|
value: 58.594 |
|
- type: recall_at_10 |
|
value: 83.439 |
|
- type: recall_at_100 |
|
value: 94.1 |
|
- type: recall_at_1000 |
|
value: 99.0 |
|
- type: recall_at_3 |
|
value: 71.922 |
|
- type: recall_at_5 |
|
value: 75.678 |
|
- task: |
|
type: PairClassification |
|
dataset: |
|
type: mteb/sprintduplicatequestions-pairclassification |
|
name: MTEB SprintDuplicateQuestions |
|
config: default |
|
split: test |
|
revision: d66bd1f72af766a5cc4b0ca5e00c162f89e8cc46 |
|
metrics: |
|
- type: cos_sim_accuracy |
|
value: 99.7990099009901 |
|
- type: cos_sim_ap |
|
value: 94.8316184070519 |
|
- type: cos_sim_f1 |
|
value: 89.75265017667844 |
|
- type: cos_sim_precision |
|
value: 90.62181447502549 |
|
- type: cos_sim_recall |
|
value: 88.9 |
|
- type: dot_accuracy |
|
value: 99.7990099009901 |
|
- type: dot_ap |
|
value: 94.831611518794 |
|
- type: dot_f1 |
|
value: 89.75265017667844 |
|
- type: dot_precision |
|
value: 90.62181447502549 |
|
- type: dot_recall |
|
value: 88.9 |
|
- type: euclidean_accuracy |
|
value: 99.7990099009901 |
|
- type: euclidean_ap |
|
value: 94.83161335144017 |
|
- type: euclidean_f1 |
|
value: 89.75265017667844 |
|
- type: euclidean_precision |
|
value: 90.62181447502549 |
|
- type: euclidean_recall |
|
value: 88.9 |
|
- type: manhattan_accuracy |
|
value: 99.8 |
|
- type: manhattan_ap |
|
value: 94.84210829841739 |
|
- type: manhattan_f1 |
|
value: 89.60905349794238 |
|
- type: manhattan_precision |
|
value: 92.26694915254238 |
|
- type: manhattan_recall |
|
value: 87.1 |
|
- type: max_accuracy |
|
value: 99.8 |
|
- type: max_ap |
|
value: 94.84210829841739 |
|
- type: max_f1 |
|
value: 89.75265017667844 |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: mteb/stackexchange-clustering |
|
name: MTEB StackExchangeClustering |
|
config: default |
|
split: test |
|
revision: 6cbc1f7b2bc0622f2e39d2c77fa502909748c259 |
|
metrics: |
|
- type: v_measure |
|
value: 63.18343792633894 |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: mteb/stackexchange-clustering-p2p |
|
name: MTEB StackExchangeClusteringP2P |
|
config: default |
|
split: test |
|
revision: 815ca46b2622cec33ccafc3735d572c266efdb44 |
|
metrics: |
|
- type: v_measure |
|
value: 33.50944549814364 |
|
- task: |
|
type: Reranking |
|
dataset: |
|
type: mteb/stackoverflowdupquestions-reranking |
|
name: MTEB StackOverflowDupQuestions |
|
config: default |
|
split: test |
|
revision: e185fbe320c72810689fc5848eb6114e1ef5ec69 |
|
metrics: |
|
- type: map |
|
value: 48.89100016028111 |
|
- type: mrr |
|
value: 49.607630931160344 |
|
- task: |
|
type: Summarization |
|
dataset: |
|
type: mteb/summeval |
|
name: MTEB SummEval |
|
config: default |
|
split: test |
|
revision: cda12ad7615edc362dbf25a00fdd61d3b1eaf93c |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 30.628145384101522 |
|
- type: cos_sim_spearman |
|
value: 31.275306930726675 |
|
- type: dot_pearson |
|
value: 30.62814883550051 |
|
- type: dot_spearman |
|
value: 31.275306930726675 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: trec-covid |
|
name: MTEB TRECCOVID |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 0.26 |
|
- type: map_at_10 |
|
value: 2.163 |
|
- type: map_at_100 |
|
value: 12.29 |
|
- type: map_at_1000 |
|
value: 29.221999999999998 |
|
- type: map_at_3 |
|
value: 0.729 |
|
- type: map_at_5 |
|
value: 1.161 |
|
- type: mrr_at_1 |
|
value: 96.0 |
|
- type: mrr_at_10 |
|
value: 98.0 |
|
- type: mrr_at_100 |
|
value: 98.0 |
|
- type: mrr_at_1000 |
|
value: 98.0 |
|
- type: mrr_at_3 |
|
value: 98.0 |
|
- type: mrr_at_5 |
|
value: 98.0 |
|
- type: ndcg_at_1 |
|
value: 89.0 |
|
- type: ndcg_at_10 |
|
value: 82.312 |
|
- type: ndcg_at_100 |
|
value: 61.971 |
|
- type: ndcg_at_1000 |
|
value: 54.065 |
|
- type: ndcg_at_3 |
|
value: 87.87700000000001 |
|
- type: ndcg_at_5 |
|
value: 85.475 |
|
- type: precision_at_1 |
|
value: 96.0 |
|
- type: precision_at_10 |
|
value: 87.4 |
|
- type: precision_at_100 |
|
value: 64.02 |
|
- type: precision_at_1000 |
|
value: 24.093999999999998 |
|
- type: precision_at_3 |
|
value: 94.0 |
|
- type: precision_at_5 |
|
value: 90.8 |
|
- type: recall_at_1 |
|
value: 0.26 |
|
- type: recall_at_10 |
|
value: 2.302 |
|
- type: recall_at_100 |
|
value: 15.148 |
|
- type: recall_at_1000 |
|
value: 50.55 |
|
- type: recall_at_3 |
|
value: 0.744 |
|
- type: recall_at_5 |
|
value: 1.198 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: webis-touche2020 |
|
name: MTEB Touche2020 |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 2.217 |
|
- type: map_at_10 |
|
value: 11.378 |
|
- type: map_at_100 |
|
value: 18.022 |
|
- type: map_at_1000 |
|
value: 19.544 |
|
- type: map_at_3 |
|
value: 6.079 |
|
- type: map_at_5 |
|
value: 8.559 |
|
- type: mrr_at_1 |
|
value: 28.571 |
|
- type: mrr_at_10 |
|
value: 48.423 |
|
- type: mrr_at_100 |
|
value: 49.028 |
|
- type: mrr_at_1000 |
|
value: 49.028 |
|
- type: mrr_at_3 |
|
value: 44.897999999999996 |
|
- type: mrr_at_5 |
|
value: 46.531 |
|
- type: ndcg_at_1 |
|
value: 25.509999999999998 |
|
- type: ndcg_at_10 |
|
value: 27.860000000000003 |
|
- type: ndcg_at_100 |
|
value: 39.34 |
|
- type: ndcg_at_1000 |
|
value: 50.21 |
|
- type: ndcg_at_3 |
|
value: 30.968 |
|
- type: ndcg_at_5 |
|
value: 29.541 |
|
- type: precision_at_1 |
|
value: 28.571 |
|
- type: precision_at_10 |
|
value: 25.918000000000003 |
|
- type: precision_at_100 |
|
value: 8.184 |
|
- type: precision_at_1000 |
|
value: 1.545 |
|
- type: precision_at_3 |
|
value: 35.374 |
|
- type: precision_at_5 |
|
value: 31.837 |
|
- type: recall_at_1 |
|
value: 2.217 |
|
- type: recall_at_10 |
|
value: 18.511 |
|
- type: recall_at_100 |
|
value: 50.178 |
|
- type: recall_at_1000 |
|
value: 83.07600000000001 |
|
- type: recall_at_3 |
|
value: 7.811999999999999 |
|
- type: recall_at_5 |
|
value: 11.684 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/toxic_conversations_50k |
|
name: MTEB ToxicConversationsClassification |
|
config: default |
|
split: test |
|
revision: d7c0de2777da35d6aae2200a62c6e0e5af397c4c |
|
metrics: |
|
- type: accuracy |
|
value: 71.386 |
|
- type: ap |
|
value: 14.58573366644018 |
|
- type: f1 |
|
value: 55.0170316975105 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/tweet_sentiment_extraction |
|
name: MTEB TweetSentimentExtractionClassification |
|
config: default |
|
split: test |
|
revision: d604517c81ca91fe16a244d1248fc021f9ecee7a |
|
metrics: |
|
- type: accuracy |
|
value: 60.868704018109796 |
|
- type: f1 |
|
value: 61.175908652496624 |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: mteb/twentynewsgroups-clustering |
|
name: MTEB TwentyNewsgroupsClustering |
|
config: default |
|
split: test |
|
revision: 6125ec4e24fa026cec8a478383ee943acfbd5449 |
|
metrics: |
|
- type: v_measure |
|
value: 48.72082824812323 |
|
- task: |
|
type: PairClassification |
|
dataset: |
|
type: mteb/twittersemeval2015-pairclassification |
|
name: MTEB TwitterSemEval2015 |
|
config: default |
|
split: test |
|
revision: 70970daeab8776df92f5ea462b6173c0b46fd2d1 |
|
metrics: |
|
- type: cos_sim_accuracy |
|
value: 85.43839780652083 |
|
- type: cos_sim_ap |
|
value: 72.55258980537292 |
|
- type: cos_sim_f1 |
|
value: 66.4145419055752 |
|
- type: cos_sim_precision |
|
value: 61.765373269798054 |
|
- type: cos_sim_recall |
|
value: 71.82058047493403 |
|
- type: dot_accuracy |
|
value: 85.43839780652083 |
|
- type: dot_ap |
|
value: 72.55256370197756 |
|
- type: dot_f1 |
|
value: 66.4145419055752 |
|
- type: dot_precision |
|
value: 61.765373269798054 |
|
- type: dot_recall |
|
value: 71.82058047493403 |
|
- type: euclidean_accuracy |
|
value: 85.43839780652083 |
|
- type: euclidean_ap |
|
value: 72.55259011957311 |
|
- type: euclidean_f1 |
|
value: 66.4145419055752 |
|
- type: euclidean_precision |
|
value: 61.765373269798054 |
|
- type: euclidean_recall |
|
value: 71.82058047493403 |
|
- type: manhattan_accuracy |
|
value: 85.40263455921799 |
|
- type: manhattan_ap |
|
value: 72.47856062032 |
|
- type: manhattan_f1 |
|
value: 66.39413249969942 |
|
- type: manhattan_precision |
|
value: 60.989617848464775 |
|
- type: manhattan_recall |
|
value: 72.84960422163589 |
|
- type: max_accuracy |
|
value: 85.43839780652083 |
|
- type: max_ap |
|
value: 72.55259011957311 |
|
- type: max_f1 |
|
value: 66.4145419055752 |
|
- task: |
|
type: PairClassification |
|
dataset: |
|
type: mteb/twitterurlcorpus-pairclassification |
|
name: MTEB TwitterURLCorpus |
|
config: default |
|
split: test |
|
revision: 8b6510b0b1fa4e4c4f879467980e9be563ec1cdf |
|
metrics: |
|
- type: cos_sim_accuracy |
|
value: 89.24981565568363 |
|
- type: cos_sim_ap |
|
value: 86.38437585690401 |
|
- type: cos_sim_f1 |
|
value: 78.79039565086076 |
|
- type: cos_sim_precision |
|
value: 77.29629629629629 |
|
- type: cos_sim_recall |
|
value: 80.34339390206344 |
|
- type: dot_accuracy |
|
value: 89.24981565568363 |
|
- type: dot_ap |
|
value: 86.38437587564587 |
|
- type: dot_f1 |
|
value: 78.79039565086076 |
|
- type: dot_precision |
|
value: 77.29629629629629 |
|
- type: dot_recall |
|
value: 80.34339390206344 |
|
- type: euclidean_accuracy |
|
value: 89.24981565568363 |
|
- type: euclidean_ap |
|
value: 86.38437691024106 |
|
- type: euclidean_f1 |
|
value: 78.79039565086076 |
|
- type: euclidean_precision |
|
value: 77.29629629629629 |
|
- type: euclidean_recall |
|
value: 80.34339390206344 |
|
- type: manhattan_accuracy |
|
value: 89.25563705514806 |
|
- type: manhattan_ap |
|
value: 86.35729146774388 |
|
- type: manhattan_f1 |
|
value: 78.7238059278837 |
|
- type: manhattan_precision |
|
value: 77.23938653034007 |
|
- type: manhattan_recall |
|
value: 80.26639975361873 |
|
- type: max_accuracy |
|
value: 89.25563705514806 |
|
- type: max_ap |
|
value: 86.38437691024106 |
|
- type: max_f1 |
|
value: 78.79039565086076 |
|
--- |
|
|
|
# nomic-embed-text-v1-ablated: A Reproducible Long Context (8192) Text Embedder |
|
|
|
`nomic-embed-text-v1-ablated` is 8192 context length text encoder that surpasses OpenAI text-embedding-ada-002 performance on short and long tasks. |
|
. |
|
|
|
|
|
| Name | SeqLen | MTEB | LoCo | Jina Long Context | Open Weights | Open Training Code | Open Data | |
|
| :-------------------------------:| :----- | :-------- | :------: | :---------------: | :-----------: | :----------------: | :---------- | |
|
| nomic-embed-text-v1 | 8192 | **62.39** |**85.53** | 54.16 | ✅ | ✅ | ✅ | |
|
| jina-embeddings-v2-base-en | 8192 | 60.39 | 85.45 | 51.90 | ✅ | ❌ | ❌ | |
|
| text-embedding-3-small | 8191 | 62.26 | 82.40 | **58.20** | ❌ | ❌ | ❌ | |
|
| text-embedding-ada-002 | 8191 | 60.99 | 52.7 | 55.25 | ❌ | ❌ | ❌ | |
|
|
|
|
|
If you would like to finetune a model on more data, you can use this model as an initialization |
|
|
|
## Hosted Inference API |
|
|
|
The easiest way to get started with Nomic Embed is through the Nomic Embedding API. |
|
|
|
Generating embeddings with the `nomic` Python client is as easy as |
|
|
|
```python |
|
from nomic import embed |
|
|
|
output = embed.text( |
|
texts=['Nomic Embedding API', '#keepAIOpen'], |
|
model='nomic-embed-text-v1', |
|
task_type='search_document' |
|
) |
|
|
|
print(output) |
|
``` |
|
|
|
For more information, see the [API reference](https://docs.nomic.ai/reference/endpoints/nomic-embed-text) |
|
|
|
## Data Visualization |
|
Click the Nomic Atlas map below to visualize a 5M sample of our contrastive pretraining data! |
|
|
|
|
|
[![image/webp](https://cdn-uploads.huggingface.co/production/uploads/607997c83a565c15675055b3/pjhJhuNyRfPagRd_c_iUz.webp)](https://atlas.nomic.ai/map/nomic-text-embed-v1-5m-sample) |
|
|
|
## Training Details |
|
|
|
We train our embedder using a multi-stage training pipeline. Starting from a long-context [BERT model](https://huggingface.co/nomic-ai/nomic-bert-2048), |
|
the first unsupervised contrastive stage trains on a dataset generated from weakly related text pairs, such as question-answer pairs from forums like StackExchange and Quora, title-body pairs from Amazon reviews, and summarizations from news articles. |
|
|
|
In the second finetuning stage, higher quality labeled datasets such as search queries and answers from web searches are leveraged. Data curation and hard-example mining is crucial in this stage. |
|
|
|
For more details, see the Nomic Embed [Technical Report](https://static.nomic.ai/reports/2024_Nomic_Embed_Text_Technical_Report.pdf) and corresponding [blog post](https://blog.nomic.ai/posts/nomic-embed-text-v1). |
|
|
|
Training data to train the models is released in its entirety. For more details, see the `contrastors` [repository](https://github.com/nomic-ai/contrastors) |
|
|
|
## Usage |
|
|
|
Note `nomic-embed-text` requires prefixes! We support the prefixes `[search_query, search_document, classification, clustering]`. |
|
For retrieval applications, you should prepend `search_document` for all your documents and `search_query` for your queries. |
|
|
|
### Sentence Transformers |
|
```python |
|
from sentence_transformers import SentenceTransformer |
|
|
|
model = SentenceTransformer("nomic-ai/nomic-embed-text-v1-ablated", trust_remote_code=True) |
|
sentences = ['search_query: What is TSNE?', 'search_query Who is Laurens van der Maaten?'] |
|
embeddings = model.encode(sentences) |
|
print(embeddings) |
|
``` |
|
|
|
### Transformers |
|
|
|
```python |
|
import torch |
|
import torch.nn.functional as F |
|
from transformers import AutoTokenizer, AutoModel |
|
|
|
def mean_pooling(model_output, attention_mask): |
|
token_embeddings = model_output[0] |
|
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() |
|
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9) |
|
|
|
sentences = ['search_query: What is TSNE?', 'search_query: Who is Laurens van der Maaten?'] |
|
|
|
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') |
|
model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1-ablated', trust_remote_code=True) |
|
model.eval() |
|
|
|
encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt') |
|
|
|
with torch.no_grad(): |
|
model_output = model(**encoded_input) |
|
|
|
embeddings = mean_pooling(model_output, encoded_input['attention_mask']) |
|
embeddings = F.normalize(embeddings, p=2, dim=1) |
|
print(embeddings) |
|
``` |
|
|
|
The model natively supports scaling of the sequence length past 2048 tokens. To do so, |
|
|
|
```diff |
|
- tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') |
|
+ tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased', model_max_length=8192) |
|
|
|
|
|
- model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1-ablated', trust_remote_code=True) |
|
+ model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1-ablated', trust_remote_code=True, rotary_scaling_factor=2) |
|
``` |
|
|
|
# Join the Nomic Community |
|
|
|
- Nomic: [https://nomic.ai](https://nomic.ai) |
|
- Discord: [https://discord.gg/myY5YDR8z8](https://discord.gg/myY5YDR8z8) |
|
- Twitter: [https://twitter.com/nomic_ai](https://twitter.com/nomic_ai) |
|
|