---
tags:
- mteb
- transformers.js
- transformers
model-index:
- name: mxbai-embed-2d-large-v1
results:
- task:
type: Classification
dataset:
type: mteb/amazon_counterfactual
name: MTEB AmazonCounterfactualClassification (en)
config: en
split: test
revision: e8379541af4e31359cca9fbcf4b00f2671dba205
metrics:
- type: accuracy
value: 74.76119402985074
- type: ap
value: 37.90611182084586
- type: f1
value: 68.80795400445113
- task:
type: Classification
dataset:
type: mteb/amazon_polarity
name: MTEB AmazonPolarityClassification
config: default
split: test
revision: e2d317d38cd51312af73b3d32a06d1a08b442046
metrics:
- type: accuracy
value: 93.255525
- type: ap
value: 90.06886124154308
- type: f1
value: 93.24785420201029
- task:
type: Classification
dataset:
type: mteb/amazon_reviews_multi
name: MTEB AmazonReviewsClassification (en)
config: en
split: test
revision: 1399c76144fd37290681b995c656ef9b2e06e26d
metrics:
- type: accuracy
value: 46.162000000000006
- type: f1
value: 45.66989189593428
- task:
type: Retrieval
dataset:
type: arguana
name: MTEB ArguAna
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 37.980000000000004
- type: map_at_10
value: 54.918
- type: map_at_100
value: 55.401
- type: map_at_1000
value: 55.403000000000006
- type: map_at_3
value: 50.249
- type: map_at_5
value: 53.400000000000006
- type: mrr_at_1
value: 38.834
- type: mrr_at_10
value: 55.24
- type: mrr_at_100
value: 55.737
- type: mrr_at_1000
value: 55.738
- type: mrr_at_3
value: 50.580999999999996
- type: mrr_at_5
value: 53.71
- type: ndcg_at_1
value: 37.980000000000004
- type: ndcg_at_10
value: 63.629000000000005
- type: ndcg_at_100
value: 65.567
- type: ndcg_at_1000
value: 65.61399999999999
- type: ndcg_at_3
value: 54.275
- type: ndcg_at_5
value: 59.91
- type: precision_at_1
value: 37.980000000000004
- type: precision_at_10
value: 9.110999999999999
- type: precision_at_100
value: 0.993
- type: precision_at_1000
value: 0.1
- type: precision_at_3
value: 21.977
- type: precision_at_5
value: 15.903
- type: recall_at_1
value: 37.980000000000004
- type: recall_at_10
value: 91.11
- type: recall_at_100
value: 99.289
- type: recall_at_1000
value: 99.644
- type: recall_at_3
value: 65.932
- type: recall_at_5
value: 79.51599999999999
- task:
type: Clustering
dataset:
type: mteb/arxiv-clustering-p2p
name: MTEB ArxivClusteringP2P
config: default
split: test
revision: a122ad7f3f0291bf49cc6f4d32aa80929df69d5d
metrics:
- type: v_measure
value: 48.28746486562395
- task:
type: Clustering
dataset:
type: mteb/arxiv-clustering-s2s
name: MTEB ArxivClusteringS2S
config: default
split: test
revision: f910caf1a6075f7329cdf8c1a6135696f37dbd53
metrics:
- type: v_measure
value: 42.335244985544165
- task:
type: Reranking
dataset:
type: mteb/askubuntudupquestions-reranking
name: MTEB AskUbuntuDupQuestions
config: default
split: test
revision: 2000358ca161889fa9c082cb41daa8dcfb161a54
metrics:
- type: map
value: 63.771155681602096
- type: mrr
value: 76.55993052807459
- task:
type: STS
dataset:
type: mteb/biosses-sts
name: MTEB BIOSSES
config: default
split: test
revision: d3fb88f8f02e40887cd149695127462bbcf29b4a
metrics:
- type: cos_sim_pearson
value: 89.76152904846916
- type: cos_sim_spearman
value: 88.05622328825284
- type: euclidean_pearson
value: 88.2821986323439
- type: euclidean_spearman
value: 88.05622328825284
- type: manhattan_pearson
value: 87.98419111117559
- type: manhattan_spearman
value: 87.905617446958
- task:
type: Classification
dataset:
type: mteb/banking77
name: MTEB Banking77Classification
config: default
split: test
revision: 0fd18e25b25c072e09e0d92ab615fda904d66300
metrics:
- type: accuracy
value: 86.65259740259741
- type: f1
value: 86.62044951853902
- task:
type: Clustering
dataset:
type: mteb/biorxiv-clustering-p2p
name: MTEB BiorxivClusteringP2P
config: default
split: test
revision: 65b79d1d13f80053f67aca9498d9402c2d9f1f40
metrics:
- type: v_measure
value: 39.7270855384167
- task:
type: Clustering
dataset:
type: mteb/biorxiv-clustering-s2s
name: MTEB BiorxivClusteringS2S
config: default
split: test
revision: 258694dd0231531bc1fd9de6ceb52a0853c6d908
metrics:
- type: v_measure
value: 36.95365397158872
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackAndroidRetrieval
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 30.604
- type: map_at_10
value: 42.126999999999995
- type: map_at_100
value: 43.702999999999996
- type: map_at_1000
value: 43.851
- type: map_at_3
value: 38.663
- type: map_at_5
value: 40.67
- type: mrr_at_1
value: 37.625
- type: mrr_at_10
value: 48.203
- type: mrr_at_100
value: 48.925000000000004
- type: mrr_at_1000
value: 48.979
- type: mrr_at_3
value: 45.494
- type: mrr_at_5
value: 47.288999999999994
- type: ndcg_at_1
value: 37.625
- type: ndcg_at_10
value: 48.649
- type: ndcg_at_100
value: 54.041
- type: ndcg_at_1000
value: 56.233999999999995
- type: ndcg_at_3
value: 43.704
- type: ndcg_at_5
value: 46.172999999999995
- type: precision_at_1
value: 37.625
- type: precision_at_10
value: 9.371
- type: precision_at_100
value: 1.545
- type: precision_at_1000
value: 0.20400000000000001
- type: precision_at_3
value: 21.364
- type: precision_at_5
value: 15.421999999999999
- type: recall_at_1
value: 30.604
- type: recall_at_10
value: 60.94199999999999
- type: recall_at_100
value: 82.893
- type: recall_at_1000
value: 96.887
- type: recall_at_3
value: 46.346
- type: recall_at_5
value: 53.495000000000005
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackEnglishRetrieval
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 29.959000000000003
- type: map_at_10
value: 40.217999999999996
- type: map_at_100
value: 41.337
- type: map_at_1000
value: 41.471999999999994
- type: map_at_3
value: 37.029
- type: map_at_5
value: 38.873000000000005
- type: mrr_at_1
value: 37.325
- type: mrr_at_10
value: 45.637
- type: mrr_at_100
value: 46.243
- type: mrr_at_1000
value: 46.297
- type: mrr_at_3
value: 43.323
- type: mrr_at_5
value: 44.734
- type: ndcg_at_1
value: 37.325
- type: ndcg_at_10
value: 45.864
- type: ndcg_at_100
value: 49.832
- type: ndcg_at_1000
value: 52.056000000000004
- type: ndcg_at_3
value: 41.329
- type: ndcg_at_5
value: 43.547000000000004
- type: precision_at_1
value: 37.325
- type: precision_at_10
value: 8.732
- type: precision_at_100
value: 1.369
- type: precision_at_1000
value: 0.185
- type: precision_at_3
value: 19.936
- type: precision_at_5
value: 14.306
- type: recall_at_1
value: 29.959000000000003
- type: recall_at_10
value: 56.113
- type: recall_at_100
value: 73.231
- type: recall_at_1000
value: 87.373
- type: recall_at_3
value: 42.88
- type: recall_at_5
value: 49.004
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackGamingRetrieval
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 38.679
- type: map_at_10
value: 50.696
- type: map_at_100
value: 51.788000000000004
- type: map_at_1000
value: 51.849999999999994
- type: map_at_3
value: 47.414
- type: map_at_5
value: 49.284
- type: mrr_at_1
value: 44.263000000000005
- type: mrr_at_10
value: 54.03
- type: mrr_at_100
value: 54.752
- type: mrr_at_1000
value: 54.784
- type: mrr_at_3
value: 51.661
- type: mrr_at_5
value: 53.047
- type: ndcg_at_1
value: 44.263000000000005
- type: ndcg_at_10
value: 56.452999999999996
- type: ndcg_at_100
value: 60.736999999999995
- type: ndcg_at_1000
value: 61.982000000000006
- type: ndcg_at_3
value: 51.085
- type: ndcg_at_5
value: 53.715999999999994
- type: precision_at_1
value: 44.263000000000005
- type: precision_at_10
value: 9.129
- type: precision_at_100
value: 1.218
- type: precision_at_1000
value: 0.13699999999999998
- type: precision_at_3
value: 22.8
- type: precision_at_5
value: 15.674
- type: recall_at_1
value: 38.679
- type: recall_at_10
value: 70.1
- type: recall_at_100
value: 88.649
- type: recall_at_1000
value: 97.48
- type: recall_at_3
value: 55.757999999999996
- type: recall_at_5
value: 62.244
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackGisRetrieval
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 25.796999999999997
- type: map_at_10
value: 34.011
- type: map_at_100
value: 35.103
- type: map_at_1000
value: 35.187000000000005
- type: map_at_3
value: 31.218
- type: map_at_5
value: 32.801
- type: mrr_at_1
value: 28.022999999999996
- type: mrr_at_10
value: 36.108000000000004
- type: mrr_at_100
value: 37.094
- type: mrr_at_1000
value: 37.158
- type: mrr_at_3
value: 33.635
- type: mrr_at_5
value: 35.081
- type: ndcg_at_1
value: 28.022999999999996
- type: ndcg_at_10
value: 38.887
- type: ndcg_at_100
value: 44.159
- type: ndcg_at_1000
value: 46.300000000000004
- type: ndcg_at_3
value: 33.623
- type: ndcg_at_5
value: 36.281
- type: precision_at_1
value: 28.022999999999996
- type: precision_at_10
value: 6.010999999999999
- type: precision_at_100
value: 0.901
- type: precision_at_1000
value: 0.11299999999999999
- type: precision_at_3
value: 14.124
- type: precision_at_5
value: 10.034
- type: recall_at_1
value: 25.796999999999997
- type: recall_at_10
value: 51.86300000000001
- type: recall_at_100
value: 75.995
- type: recall_at_1000
value: 91.93299999999999
- type: recall_at_3
value: 37.882
- type: recall_at_5
value: 44.34
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackMathematicaRetrieval
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 15.468000000000002
- type: map_at_10
value: 24.026
- type: map_at_100
value: 25.237
- type: map_at_1000
value: 25.380000000000003
- type: map_at_3
value: 21.342
- type: map_at_5
value: 22.843
- type: mrr_at_1
value: 19.154
- type: mrr_at_10
value: 28.429
- type: mrr_at_100
value: 29.416999999999998
- type: mrr_at_1000
value: 29.491
- type: mrr_at_3
value: 25.746000000000002
- type: mrr_at_5
value: 27.282
- type: ndcg_at_1
value: 19.154
- type: ndcg_at_10
value: 29.512
- type: ndcg_at_100
value: 35.331
- type: ndcg_at_1000
value: 38.435
- type: ndcg_at_3
value: 24.566
- type: ndcg_at_5
value: 26.891
- type: precision_at_1
value: 19.154
- type: precision_at_10
value: 5.647
- type: precision_at_100
value: 0.984
- type: precision_at_1000
value: 0.13899999999999998
- type: precision_at_3
value: 12.065
- type: precision_at_5
value: 8.98
- type: recall_at_1
value: 15.468000000000002
- type: recall_at_10
value: 41.908
- type: recall_at_100
value: 67.17
- type: recall_at_1000
value: 89.05499999999999
- type: recall_at_3
value: 28.436
- type: recall_at_5
value: 34.278
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackPhysicsRetrieval
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 28.116000000000003
- type: map_at_10
value: 39.034
- type: map_at_100
value: 40.461000000000006
- type: map_at_1000
value: 40.563
- type: map_at_3
value: 35.742000000000004
- type: map_at_5
value: 37.762
- type: mrr_at_1
value: 34.264
- type: mrr_at_10
value: 44.173
- type: mrr_at_100
value: 45.111000000000004
- type: mrr_at_1000
value: 45.149
- type: mrr_at_3
value: 41.626999999999995
- type: mrr_at_5
value: 43.234
- type: ndcg_at_1
value: 34.264
- type: ndcg_at_10
value: 45.011
- type: ndcg_at_100
value: 50.91
- type: ndcg_at_1000
value: 52.886
- type: ndcg_at_3
value: 39.757999999999996
- type: ndcg_at_5
value: 42.569
- type: precision_at_1
value: 34.264
- type: precision_at_10
value: 8.114
- type: precision_at_100
value: 1.2890000000000001
- type: precision_at_1000
value: 0.163
- type: precision_at_3
value: 18.864
- type: precision_at_5
value: 13.628000000000002
- type: recall_at_1
value: 28.116000000000003
- type: recall_at_10
value: 57.764
- type: recall_at_100
value: 82.393
- type: recall_at_1000
value: 95.345
- type: recall_at_3
value: 43.35
- type: recall_at_5
value: 50.368
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackProgrammersRetrieval
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 23.557
- type: map_at_10
value: 33.94
- type: map_at_100
value: 35.382000000000005
- type: map_at_1000
value: 35.497
- type: map_at_3
value: 30.635
- type: map_at_5
value: 32.372
- type: mrr_at_1
value: 29.224
- type: mrr_at_10
value: 39.017
- type: mrr_at_100
value: 39.908
- type: mrr_at_1000
value: 39.96
- type: mrr_at_3
value: 36.225
- type: mrr_at_5
value: 37.869
- type: ndcg_at_1
value: 29.224
- type: ndcg_at_10
value: 40.097
- type: ndcg_at_100
value: 46.058
- type: ndcg_at_1000
value: 48.309999999999995
- type: ndcg_at_3
value: 34.551
- type: ndcg_at_5
value: 36.937
- type: precision_at_1
value: 29.224
- type: precision_at_10
value: 7.6259999999999994
- type: precision_at_100
value: 1.226
- type: precision_at_1000
value: 0.161
- type: precision_at_3
value: 16.781
- type: precision_at_5
value: 12.26
- type: recall_at_1
value: 23.557
- type: recall_at_10
value: 53.46300000000001
- type: recall_at_100
value: 78.797
- type: recall_at_1000
value: 93.743
- type: recall_at_3
value: 37.95
- type: recall_at_5
value: 44.121
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackRetrieval
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 24.81583333333333
- type: map_at_10
value: 34.057833333333335
- type: map_at_100
value: 35.29658333333334
- type: map_at_1000
value: 35.418666666666674
- type: map_at_3
value: 31.16416666666667
- type: map_at_5
value: 32.797
- type: mrr_at_1
value: 29.40216666666667
- type: mrr_at_10
value: 38.11191666666667
- type: mrr_at_100
value: 38.983250000000005
- type: mrr_at_1000
value: 39.043
- type: mrr_at_3
value: 35.663333333333334
- type: mrr_at_5
value: 37.08975
- type: ndcg_at_1
value: 29.40216666666667
- type: ndcg_at_10
value: 39.462416666666655
- type: ndcg_at_100
value: 44.74341666666666
- type: ndcg_at_1000
value: 47.12283333333333
- type: ndcg_at_3
value: 34.57383333333334
- type: ndcg_at_5
value: 36.91816666666667
- type: precision_at_1
value: 29.40216666666667
- type: precision_at_10
value: 7.008416666666667
- type: precision_at_100
value: 1.143333333333333
- type: precision_at_1000
value: 0.15391666666666665
- type: precision_at_3
value: 16.011083333333335
- type: precision_at_5
value: 11.506666666666664
- type: recall_at_1
value: 24.81583333333333
- type: recall_at_10
value: 51.39391666666666
- type: recall_at_100
value: 74.52983333333333
- type: recall_at_1000
value: 91.00650000000002
- type: recall_at_3
value: 37.87458333333334
- type: recall_at_5
value: 43.865833333333335
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackStatsRetrieval
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 24.04
- type: map_at_10
value: 30.651
- type: map_at_100
value: 31.561
- type: map_at_1000
value: 31.667
- type: map_at_3
value: 28.358
- type: map_at_5
value: 29.644
- type: mrr_at_1
value: 26.840000000000003
- type: mrr_at_10
value: 33.397
- type: mrr_at_100
value: 34.166999999999994
- type: mrr_at_1000
value: 34.252
- type: mrr_at_3
value: 31.339
- type: mrr_at_5
value: 32.451
- type: ndcg_at_1
value: 26.840000000000003
- type: ndcg_at_10
value: 34.821999999999996
- type: ndcg_at_100
value: 39.155
- type: ndcg_at_1000
value: 41.837999999999994
- type: ndcg_at_3
value: 30.55
- type: ndcg_at_5
value: 32.588
- type: precision_at_1
value: 26.840000000000003
- type: precision_at_10
value: 5.383
- type: precision_at_100
value: 0.827
- type: precision_at_1000
value: 0.11199999999999999
- type: precision_at_3
value: 12.986
- type: precision_at_5
value: 9.11
- type: recall_at_1
value: 24.04
- type: recall_at_10
value: 45.133
- type: recall_at_100
value: 64.519
- type: recall_at_1000
value: 84.397
- type: recall_at_3
value: 33.465
- type: recall_at_5
value: 38.504
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackTexRetrieval
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 15.744
- type: map_at_10
value: 22.557
- type: map_at_100
value: 23.705000000000002
- type: map_at_1000
value: 23.833
- type: map_at_3
value: 20.342
- type: map_at_5
value: 21.584
- type: mrr_at_1
value: 19.133
- type: mrr_at_10
value: 26.316
- type: mrr_at_100
value: 27.285999999999998
- type: mrr_at_1000
value: 27.367
- type: mrr_at_3
value: 24.214
- type: mrr_at_5
value: 25.419999999999998
- type: ndcg_at_1
value: 19.133
- type: ndcg_at_10
value: 27.002
- type: ndcg_at_100
value: 32.544000000000004
- type: ndcg_at_1000
value: 35.624
- type: ndcg_at_3
value: 23.015
- type: ndcg_at_5
value: 24.916
- type: precision_at_1
value: 19.133
- type: precision_at_10
value: 4.952
- type: precision_at_100
value: 0.918
- type: precision_at_1000
value: 0.136
- type: precision_at_3
value: 10.908
- type: precision_at_5
value: 8.004
- type: recall_at_1
value: 15.744
- type: recall_at_10
value: 36.63
- type: recall_at_100
value: 61.58
- type: recall_at_1000
value: 83.648
- type: recall_at_3
value: 25.545
- type: recall_at_5
value: 30.392000000000003
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackUnixRetrieval
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 24.944
- type: map_at_10
value: 33.611000000000004
- type: map_at_100
value: 34.737
- type: map_at_1000
value: 34.847
- type: map_at_3
value: 30.746000000000002
- type: map_at_5
value: 32.357
- type: mrr_at_1
value: 29.198
- type: mrr_at_10
value: 37.632
- type: mrr_at_100
value: 38.53
- type: mrr_at_1000
value: 38.59
- type: mrr_at_3
value: 35.292
- type: mrr_at_5
value: 36.519
- type: ndcg_at_1
value: 29.198
- type: ndcg_at_10
value: 38.946999999999996
- type: ndcg_at_100
value: 44.348
- type: ndcg_at_1000
value: 46.787
- type: ndcg_at_3
value: 33.794999999999995
- type: ndcg_at_5
value: 36.166
- type: precision_at_1
value: 29.198
- type: precision_at_10
value: 6.595
- type: precision_at_100
value: 1.055
- type: precision_at_1000
value: 0.13899999999999998
- type: precision_at_3
value: 15.235999999999999
- type: precision_at_5
value: 10.896
- type: recall_at_1
value: 24.944
- type: recall_at_10
value: 51.284
- type: recall_at_100
value: 75.197
- type: recall_at_1000
value: 92.10000000000001
- type: recall_at_3
value: 37.213
- type: recall_at_5
value: 43.129
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackWebmastersRetrieval
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 21.979000000000003
- type: map_at_10
value: 31.349
- type: map_at_100
value: 32.969
- type: map_at_1000
value: 33.2
- type: map_at_3
value: 28.237000000000002
- type: map_at_5
value: 30.09
- type: mrr_at_1
value: 27.075
- type: mrr_at_10
value: 35.946
- type: mrr_at_100
value: 36.897000000000006
- type: mrr_at_1000
value: 36.951
- type: mrr_at_3
value: 32.971000000000004
- type: mrr_at_5
value: 34.868
- type: ndcg_at_1
value: 27.075
- type: ndcg_at_10
value: 37.317
- type: ndcg_at_100
value: 43.448
- type: ndcg_at_1000
value: 45.940999999999995
- type: ndcg_at_3
value: 32.263
- type: ndcg_at_5
value: 34.981
- type: precision_at_1
value: 27.075
- type: precision_at_10
value: 7.568999999999999
- type: precision_at_100
value: 1.5650000000000002
- type: precision_at_1000
value: 0.241
- type: precision_at_3
value: 15.547
- type: precision_at_5
value: 11.818
- type: recall_at_1
value: 21.979000000000003
- type: recall_at_10
value: 48.522999999999996
- type: recall_at_100
value: 76.51
- type: recall_at_1000
value: 92.168
- type: recall_at_3
value: 34.499
- type: recall_at_5
value: 41.443999999999996
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackWordpressRetrieval
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 18.903
- type: map_at_10
value: 26.473999999999997
- type: map_at_100
value: 27.576
- type: map_at_1000
value: 27.677000000000003
- type: map_at_3
value: 24.244
- type: map_at_5
value: 25.284000000000002
- type: mrr_at_1
value: 20.702
- type: mrr_at_10
value: 28.455000000000002
- type: mrr_at_100
value: 29.469
- type: mrr_at_1000
value: 29.537999999999997
- type: mrr_at_3
value: 26.433
- type: mrr_at_5
value: 27.283
- type: ndcg_at_1
value: 20.702
- type: ndcg_at_10
value: 30.988
- type: ndcg_at_100
value: 36.358000000000004
- type: ndcg_at_1000
value: 39.080999999999996
- type: ndcg_at_3
value: 26.647
- type: ndcg_at_5
value: 28.253
- type: precision_at_1
value: 20.702
- type: precision_at_10
value: 4.972
- type: precision_at_100
value: 0.823
- type: precision_at_1000
value: 0.117
- type: precision_at_3
value: 11.522
- type: precision_at_5
value: 7.9479999999999995
- type: recall_at_1
value: 18.903
- type: recall_at_10
value: 43.004
- type: recall_at_100
value: 67.42399999999999
- type: recall_at_1000
value: 87.949
- type: recall_at_3
value: 31.171
- type: recall_at_5
value: 35.071000000000005
- task:
type: Retrieval
dataset:
type: climate-fever
name: MTEB ClimateFEVER
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 12.942
- type: map_at_10
value: 22.017999999999997
- type: map_at_100
value: 23.968
- type: map_at_1000
value: 24.169
- type: map_at_3
value: 18.282
- type: map_at_5
value: 20.191
- type: mrr_at_1
value: 29.121000000000002
- type: mrr_at_10
value: 40.897
- type: mrr_at_100
value: 41.787
- type: mrr_at_1000
value: 41.819
- type: mrr_at_3
value: 37.535000000000004
- type: mrr_at_5
value: 39.626
- type: ndcg_at_1
value: 29.121000000000002
- type: ndcg_at_10
value: 30.728
- type: ndcg_at_100
value: 38.231
- type: ndcg_at_1000
value: 41.735
- type: ndcg_at_3
value: 25.141000000000002
- type: ndcg_at_5
value: 27.093
- type: precision_at_1
value: 29.121000000000002
- type: precision_at_10
value: 9.674000000000001
- type: precision_at_100
value: 1.775
- type: precision_at_1000
value: 0.243
- type: precision_at_3
value: 18.826999999999998
- type: precision_at_5
value: 14.515
- type: recall_at_1
value: 12.942
- type: recall_at_10
value: 36.692
- type: recall_at_100
value: 62.688
- type: recall_at_1000
value: 82.203
- type: recall_at_3
value: 22.820999999999998
- type: recall_at_5
value: 28.625
- task:
type: Retrieval
dataset:
type: dbpedia-entity
name: MTEB DBPedia
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 8.6
- type: map_at_10
value: 18.672
- type: map_at_100
value: 27.199
- type: map_at_1000
value: 29.032999999999998
- type: map_at_3
value: 13.045000000000002
- type: map_at_5
value: 15.271
- type: mrr_at_1
value: 69
- type: mrr_at_10
value: 75.304
- type: mrr_at_100
value: 75.68
- type: mrr_at_1000
value: 75.688
- type: mrr_at_3
value: 73.708
- type: mrr_at_5
value: 74.333
- type: ndcg_at_1
value: 56.25
- type: ndcg_at_10
value: 40.741
- type: ndcg_at_100
value: 45.933
- type: ndcg_at_1000
value: 53.764
- type: ndcg_at_3
value: 44.664
- type: ndcg_at_5
value: 42.104
- type: precision_at_1
value: 69
- type: precision_at_10
value: 33
- type: precision_at_100
value: 10.75
- type: precision_at_1000
value: 2.1999999999999997
- type: precision_at_3
value: 48.167
- type: precision_at_5
value: 41.099999999999994
- type: recall_at_1
value: 8.6
- type: recall_at_10
value: 24.447
- type: recall_at_100
value: 52.697
- type: recall_at_1000
value: 77.717
- type: recall_at_3
value: 14.13
- type: recall_at_5
value: 17.485999999999997
- task:
type: Classification
dataset:
type: mteb/emotion
name: MTEB EmotionClassification
config: default
split: test
revision: 4f58c6b202a23cf9a4da393831edf4f9183cad37
metrics:
- type: accuracy
value: 49.32
- type: f1
value: 43.92815810776849
- task:
type: Retrieval
dataset:
type: fever
name: MTEB FEVER
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 68.987
- type: map_at_10
value: 78.025
- type: map_at_100
value: 78.28500000000001
- type: map_at_1000
value: 78.3
- type: map_at_3
value: 76.735
- type: map_at_5
value: 77.558
- type: mrr_at_1
value: 74.482
- type: mrr_at_10
value: 82.673
- type: mrr_at_100
value: 82.799
- type: mrr_at_1000
value: 82.804
- type: mrr_at_3
value: 81.661
- type: mrr_at_5
value: 82.369
- type: ndcg_at_1
value: 74.482
- type: ndcg_at_10
value: 82.238
- type: ndcg_at_100
value: 83.245
- type: ndcg_at_1000
value: 83.557
- type: ndcg_at_3
value: 80.066
- type: ndcg_at_5
value: 81.316
- type: precision_at_1
value: 74.482
- type: precision_at_10
value: 10.006
- type: precision_at_100
value: 1.0699999999999998
- type: precision_at_1000
value: 0.11100000000000002
- type: precision_at_3
value: 30.808000000000003
- type: precision_at_5
value: 19.256
- type: recall_at_1
value: 68.987
- type: recall_at_10
value: 90.646
- type: recall_at_100
value: 94.85900000000001
- type: recall_at_1000
value: 96.979
- type: recall_at_3
value: 84.76599999999999
- type: recall_at_5
value: 87.929
- task:
type: Retrieval
dataset:
type: fiqa
name: MTEB FiQA2018
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 20.3
- type: map_at_10
value: 33.499
- type: map_at_100
value: 35.510000000000005
- type: map_at_1000
value: 35.693999999999996
- type: map_at_3
value: 29.083
- type: map_at_5
value: 31.367
- type: mrr_at_1
value: 39.660000000000004
- type: mrr_at_10
value: 49.517
- type: mrr_at_100
value: 50.18899999999999
- type: mrr_at_1000
value: 50.224000000000004
- type: mrr_at_3
value: 46.965
- type: mrr_at_5
value: 48.184
- type: ndcg_at_1
value: 39.660000000000004
- type: ndcg_at_10
value: 41.75
- type: ndcg_at_100
value: 48.477
- type: ndcg_at_1000
value: 51.373999999999995
- type: ndcg_at_3
value: 37.532
- type: ndcg_at_5
value: 38.564
- type: precision_at_1
value: 39.660000000000004
- type: precision_at_10
value: 11.774999999999999
- type: precision_at_100
value: 1.883
- type: precision_at_1000
value: 0.23900000000000002
- type: precision_at_3
value: 25.102999999999998
- type: precision_at_5
value: 18.395
- type: recall_at_1
value: 20.3
- type: recall_at_10
value: 49.633
- type: recall_at_100
value: 73.932
- type: recall_at_1000
value: 91.174
- type: recall_at_3
value: 34.516999999999996
- type: recall_at_5
value: 40.217000000000006
- task:
type: Retrieval
dataset:
type: hotpotqa
name: MTEB HotpotQA
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 34.699999999999996
- type: map_at_10
value: 54.400000000000006
- type: map_at_100
value: 55.45
- type: map_at_1000
value: 55.525999999999996
- type: map_at_3
value: 50.99
- type: map_at_5
value: 53.054
- type: mrr_at_1
value: 69.399
- type: mrr_at_10
value: 76.454
- type: mrr_at_100
value: 76.771
- type: mrr_at_1000
value: 76.783
- type: mrr_at_3
value: 75.179
- type: mrr_at_5
value: 75.978
- type: ndcg_at_1
value: 69.399
- type: ndcg_at_10
value: 63.001
- type: ndcg_at_100
value: 66.842
- type: ndcg_at_1000
value: 68.33500000000001
- type: ndcg_at_3
value: 57.961
- type: ndcg_at_5
value: 60.67700000000001
- type: precision_at_1
value: 69.399
- type: precision_at_10
value: 13.4
- type: precision_at_100
value: 1.6420000000000001
- type: precision_at_1000
value: 0.184
- type: precision_at_3
value: 37.218
- type: precision_at_5
value: 24.478
- type: recall_at_1
value: 34.699999999999996
- type: recall_at_10
value: 67.002
- type: recall_at_100
value: 82.113
- type: recall_at_1000
value: 91.945
- type: recall_at_3
value: 55.827000000000005
- type: recall_at_5
value: 61.195
- task:
type: Classification
dataset:
type: mteb/imdb
name: MTEB ImdbClassification
config: default
split: test
revision: 3d86128a09e091d6018b6d26cad27f2739fc2db7
metrics:
- type: accuracy
value: 90.40480000000001
- type: ap
value: 86.34472513785936
- type: f1
value: 90.3766943422773
- task:
type: Retrieval
dataset:
type: msmarco
name: MTEB MSMARCO
config: default
split: dev
revision: None
metrics:
- type: map_at_1
value: 19.796
- type: map_at_10
value: 31.344
- type: map_at_100
value: 32.525999999999996
- type: map_at_1000
value: 32.582
- type: map_at_3
value: 27.514
- type: map_at_5
value: 29.683
- type: mrr_at_1
value: 20.358
- type: mrr_at_10
value: 31.924999999999997
- type: mrr_at_100
value: 33.056000000000004
- type: mrr_at_1000
value: 33.105000000000004
- type: mrr_at_3
value: 28.149
- type: mrr_at_5
value: 30.303
- type: ndcg_at_1
value: 20.372
- type: ndcg_at_10
value: 38.025999999999996
- type: ndcg_at_100
value: 43.813
- type: ndcg_at_1000
value: 45.21
- type: ndcg_at_3
value: 30.218
- type: ndcg_at_5
value: 34.088
- type: precision_at_1
value: 20.372
- type: precision_at_10
value: 6.123
- type: precision_at_100
value: 0.903
- type: precision_at_1000
value: 0.10200000000000001
- type: precision_at_3
value: 12.918
- type: precision_at_5
value: 9.702
- type: recall_at_1
value: 19.796
- type: recall_at_10
value: 58.644
- type: recall_at_100
value: 85.611
- type: recall_at_1000
value: 96.314
- type: recall_at_3
value: 37.419999999999995
- type: recall_at_5
value: 46.697
- task:
type: Classification
dataset:
type: mteb/mtop_domain
name: MTEB MTOPDomainClassification (en)
config: en
split: test
revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf
metrics:
- type: accuracy
value: 93.0984952120383
- type: f1
value: 92.9409029889071
- task:
type: Classification
dataset:
type: mteb/mtop_intent
name: MTEB MTOPIntentClassification (en)
config: en
split: test
revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba
metrics:
- type: accuracy
value: 73.24441404468764
- type: f1
value: 54.66568676132254
- task:
type: Classification
dataset:
type: mteb/amazon_massive_intent
name: MTEB MassiveIntentClassification (en)
config: en
split: test
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7
metrics:
- type: accuracy
value: 73.86684599865501
- type: f1
value: 72.16086061041996
- task:
type: Classification
dataset:
type: mteb/amazon_massive_scenario
name: MTEB MassiveScenarioClassification (en)
config: en
split: test
revision: 7d571f92784cd94a019292a1f45445077d0ef634
metrics:
- type: accuracy
value: 78.16745124411568
- type: f1
value: 78.76361933295068
- task:
type: Clustering
dataset:
type: mteb/medrxiv-clustering-p2p
name: MTEB MedrxivClusteringP2P
config: default
split: test
revision: e7a26af6f3ae46b30dde8737f02c07b1505bcc73
metrics:
- type: v_measure
value: 33.66329421728342
- task:
type: Clustering
dataset:
type: mteb/medrxiv-clustering-s2s
name: MTEB MedrxivClusteringS2S
config: default
split: test
revision: 35191c8c0dca72d8ff3efcd72aa802307d469663
metrics:
- type: v_measure
value: 32.21637418682758
- task:
type: Reranking
dataset:
type: mteb/mind_small
name: MTEB MindSmallReranking
config: default
split: test
revision: 3bdac13927fdc888b903db93b2ffdbd90b295a69
metrics:
- type: map
value: 31.85308363141191
- type: mrr
value: 33.06713899953772
- task:
type: Retrieval
dataset:
type: nfcorpus
name: MTEB NFCorpus
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 6.392
- type: map_at_10
value: 14.539
- type: map_at_100
value: 18.811
- type: map_at_1000
value: 20.471
- type: map_at_3
value: 10.26
- type: map_at_5
value: 12.224
- type: mrr_at_1
value: 46.749
- type: mrr_at_10
value: 55.72200000000001
- type: mrr_at_100
value: 56.325
- type: mrr_at_1000
value: 56.35
- type: mrr_at_3
value: 53.30200000000001
- type: mrr_at_5
value: 54.742000000000004
- type: ndcg_at_1
value: 44.891999999999996
- type: ndcg_at_10
value: 37.355
- type: ndcg_at_100
value: 35.285
- type: ndcg_at_1000
value: 44.246
- type: ndcg_at_3
value: 41.291
- type: ndcg_at_5
value: 39.952
- type: precision_at_1
value: 46.749
- type: precision_at_10
value: 28.111000000000004
- type: precision_at_100
value: 9.127
- type: precision_at_1000
value: 2.23
- type: precision_at_3
value: 38.803
- type: precision_at_5
value: 35.046
- type: recall_at_1
value: 6.392
- type: recall_at_10
value: 19.066
- type: recall_at_100
value: 37.105
- type: recall_at_1000
value: 69.37299999999999
- type: recall_at_3
value: 11.213
- type: recall_at_5
value: 14.648
- task:
type: Retrieval
dataset:
type: nq
name: MTEB NQ
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 31.387999999999998
- type: map_at_10
value: 47.172
- type: map_at_100
value: 48.158
- type: map_at_1000
value: 48.186
- type: map_at_3
value: 42.952
- type: map_at_5
value: 45.405
- type: mrr_at_1
value: 35.458
- type: mrr_at_10
value: 49.583
- type: mrr_at_100
value: 50.324999999999996
- type: mrr_at_1000
value: 50.344
- type: mrr_at_3
value: 46.195
- type: mrr_at_5
value: 48.258
- type: ndcg_at_1
value: 35.458
- type: ndcg_at_10
value: 54.839000000000006
- type: ndcg_at_100
value: 58.974000000000004
- type: ndcg_at_1000
value: 59.64699999999999
- type: ndcg_at_3
value: 47.012
- type: ndcg_at_5
value: 51.080999999999996
- type: precision_at_1
value: 35.458
- type: precision_at_10
value: 9.056000000000001
- type: precision_at_100
value: 1.137
- type: precision_at_1000
value: 0.12
- type: precision_at_3
value: 21.582
- type: precision_at_5
value: 15.295
- type: recall_at_1
value: 31.387999999999998
- type: recall_at_10
value: 75.661
- type: recall_at_100
value: 93.605
- type: recall_at_1000
value: 98.658
- type: recall_at_3
value: 55.492
- type: recall_at_5
value: 64.85600000000001
- task:
type: Retrieval
dataset:
type: quora
name: MTEB QuoraRetrieval
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 70.547
- type: map_at_10
value: 84.495
- type: map_at_100
value: 85.14
- type: map_at_1000
value: 85.15599999999999
- type: map_at_3
value: 81.606
- type: map_at_5
value: 83.449
- type: mrr_at_1
value: 81.22
- type: mrr_at_10
value: 87.31
- type: mrr_at_100
value: 87.436
- type: mrr_at_1000
value: 87.437
- type: mrr_at_3
value: 86.363
- type: mrr_at_5
value: 87.06
- type: ndcg_at_1
value: 81.24
- type: ndcg_at_10
value: 88.145
- type: ndcg_at_100
value: 89.423
- type: ndcg_at_1000
value: 89.52799999999999
- type: ndcg_at_3
value: 85.435
- type: ndcg_at_5
value: 87
- type: precision_at_1
value: 81.24
- type: precision_at_10
value: 13.381000000000002
- type: precision_at_100
value: 1.529
- type: precision_at_1000
value: 0.157
- type: precision_at_3
value: 37.44
- type: precision_at_5
value: 24.62
- type: recall_at_1
value: 70.547
- type: recall_at_10
value: 95.083
- type: recall_at_100
value: 99.50099999999999
- type: recall_at_1000
value: 99.982
- type: recall_at_3
value: 87.235
- type: recall_at_5
value: 91.701
- task:
type: Clustering
dataset:
type: mteb/reddit-clustering
name: MTEB RedditClustering
config: default
split: test
revision: 24640382cdbf8abc73003fb0fa6d111a705499eb
metrics:
- type: v_measure
value: 57.93101384071724
- task:
type: Clustering
dataset:
type: mteb/reddit-clustering-p2p
name: MTEB RedditClusteringP2P
config: default
split: test
revision: 282350215ef01743dc01b456c7f5241fa8937f16
metrics:
- type: v_measure
value: 62.46951126228829
- task:
type: Retrieval
dataset:
type: scidocs
name: MTEB SCIDOCS
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 5.018000000000001
- type: map_at_10
value: 13.818
- type: map_at_100
value: 16.346
- type: map_at_1000
value: 16.744999999999997
- type: map_at_3
value: 9.456000000000001
- type: map_at_5
value: 11.879000000000001
- type: mrr_at_1
value: 24.8
- type: mrr_at_10
value: 37.092000000000006
- type: mrr_at_100
value: 38.199
- type: mrr_at_1000
value: 38.243
- type: mrr_at_3
value: 33.517
- type: mrr_at_5
value: 35.692
- type: ndcg_at_1
value: 24.8
- type: ndcg_at_10
value: 22.782
- type: ndcg_at_100
value: 32.072
- type: ndcg_at_1000
value: 38.163000000000004
- type: ndcg_at_3
value: 21.046
- type: ndcg_at_5
value: 19.134
- type: precision_at_1
value: 24.8
- type: precision_at_10
value: 12
- type: precision_at_100
value: 2.5420000000000003
- type: precision_at_1000
value: 0.39899999999999997
- type: precision_at_3
value: 20
- type: precision_at_5
value: 17.4
- type: recall_at_1
value: 5.018000000000001
- type: recall_at_10
value: 24.34
- type: recall_at_100
value: 51.613
- type: recall_at_1000
value: 80.95
- type: recall_at_3
value: 12.153
- type: recall_at_5
value: 17.648
- task:
type: STS
dataset:
type: mteb/sickr-sts
name: MTEB SICK-R
config: default
split: test
revision: a6ea5a8cab320b040a23452cc28066d9beae2cee
metrics:
- type: cos_sim_pearson
value: 86.28259142800503
- type: cos_sim_spearman
value: 82.04792579356291
- type: euclidean_pearson
value: 83.7755858026306
- type: euclidean_spearman
value: 82.04789872846196
- type: manhattan_pearson
value: 83.79937122515567
- type: manhattan_spearman
value: 82.05076966288574
- task:
type: STS
dataset:
type: mteb/sts12-sts
name: MTEB STS12
config: default
split: test
revision: a0d554a64d88156834ff5ae9920b964011b16384
metrics:
- type: cos_sim_pearson
value: 87.37773414195387
- type: cos_sim_spearman
value: 78.76929696642694
- type: euclidean_pearson
value: 85.75861298616339
- type: euclidean_spearman
value: 78.76607739031363
- type: manhattan_pearson
value: 85.74412868736295
- type: manhattan_spearman
value: 78.74388526796852
- task:
type: STS
dataset:
type: mteb/sts13-sts
name: MTEB STS13
config: default
split: test
revision: 7e90230a92c190f1bf69ae9002b8cea547a64cca
metrics:
- type: cos_sim_pearson
value: 89.6176449076649
- type: cos_sim_spearman
value: 90.39810997063387
- type: euclidean_pearson
value: 89.753863994154
- type: euclidean_spearman
value: 90.39810989027997
- type: manhattan_pearson
value: 89.67750819879801
- type: manhattan_spearman
value: 90.3286558059104
- task:
type: STS
dataset:
type: mteb/sts14-sts
name: MTEB STS14
config: default
split: test
revision: 6031580fec1f6af667f0bd2da0a551cf4f0b2375
metrics:
- type: cos_sim_pearson
value: 87.7488246203373
- type: cos_sim_spearman
value: 85.44794976383963
- type: euclidean_pearson
value: 87.33205836313964
- type: euclidean_spearman
value: 85.44793954377185
- type: manhattan_pearson
value: 87.30760291906203
- type: manhattan_spearman
value: 85.4308413187653
- task:
type: STS
dataset:
type: mteb/sts15-sts
name: MTEB STS15
config: default
split: test
revision: ae752c7c21bf194d8b67fd573edf7ae58183cbe3
metrics:
- type: cos_sim_pearson
value: 88.6937750952719
- type: cos_sim_spearman
value: 90.01162604967037
- type: euclidean_pearson
value: 89.35321306629116
- type: euclidean_spearman
value: 90.01161406477627
- type: manhattan_pearson
value: 89.31351907042307
- type: manhattan_spearman
value: 89.97264644642166
- task:
type: STS
dataset:
type: mteb/sts16-sts
name: MTEB STS16
config: default
split: test
revision: 4d8694f8f0e0100860b497b999b3dbed754a0513
metrics:
- type: cos_sim_pearson
value: 85.49107564294891
- type: cos_sim_spearman
value: 87.42092493144571
- type: euclidean_pearson
value: 86.88112016705634
- type: euclidean_spearman
value: 87.42092430260175
- type: manhattan_pearson
value: 86.85846210123235
- type: manhattan_spearman
value: 87.40059575522972
- task:
type: STS
dataset:
type: mteb/sts17-crosslingual-sts
name: MTEB STS17 (en-en)
config: en-en
split: test
revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d
metrics:
- type: cos_sim_pearson
value: 88.71766466521638
- type: cos_sim_spearman
value: 88.80244555668372
- type: euclidean_pearson
value: 89.59428700746064
- type: euclidean_spearman
value: 88.80244555668372
- type: manhattan_pearson
value: 89.62272396580352
- type: manhattan_spearman
value: 88.77584531534937
- task:
type: STS
dataset:
type: mteb/sts22-crosslingual-sts
name: MTEB STS22 (en)
config: en
split: test
revision: 6d1ba47164174a496b7fa5d3569dae26a6813b80
metrics:
- type: cos_sim_pearson
value: 67.7743776239708
- type: cos_sim_spearman
value: 68.79768249749681
- type: euclidean_pearson
value: 70.16430919697441
- type: euclidean_spearman
value: 68.79768249749681
- type: manhattan_pearson
value: 70.17205038967042
- type: manhattan_spearman
value: 68.89740094589914
- task:
type: STS
dataset:
type: mteb/stsbenchmark-sts
name: MTEB STSBenchmark
config: default
split: test
revision: b0fddb56ed78048fa8b90373c8a3cfc37b684831
metrics:
- type: cos_sim_pearson
value: 86.9087137484716
- type: cos_sim_spearman
value: 89.19783009521629
- type: euclidean_pearson
value: 88.89888500166009
- type: euclidean_spearman
value: 89.19783009521629
- type: manhattan_pearson
value: 88.88400033783687
- type: manhattan_spearman
value: 89.16299162200889
- task:
type: Reranking
dataset:
type: mteb/scidocs-reranking
name: MTEB SciDocsRR
config: default
split: test
revision: d3c5e1fc0b855ab6097bf1cda04dd73947d7caab
metrics:
- type: map
value: 86.9799916253683
- type: mrr
value: 96.0708200659181
- task:
type: Retrieval
dataset:
type: scifact
name: MTEB SciFact
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 59.928000000000004
- type: map_at_10
value: 69.56400000000001
- type: map_at_100
value: 70.125
- type: map_at_1000
value: 70.148
- type: map_at_3
value: 66.774
- type: map_at_5
value: 68.267
- type: mrr_at_1
value: 62.666999999999994
- type: mrr_at_10
value: 70.448
- type: mrr_at_100
value: 70.94
- type: mrr_at_1000
value: 70.962
- type: mrr_at_3
value: 68.389
- type: mrr_at_5
value: 69.65599999999999
- type: ndcg_at_1
value: 62.666999999999994
- type: ndcg_at_10
value: 74.117
- type: ndcg_at_100
value: 76.248
- type: ndcg_at_1000
value: 76.768
- type: ndcg_at_3
value: 69.358
- type: ndcg_at_5
value: 71.574
- type: precision_at_1
value: 62.666999999999994
- type: precision_at_10
value: 9.933
- type: precision_at_100
value: 1.09
- type: precision_at_1000
value: 0.11299999999999999
- type: precision_at_3
value: 27.222
- type: precision_at_5
value: 17.867
- type: recall_at_1
value: 59.928000000000004
- type: recall_at_10
value: 87.156
- type: recall_at_100
value: 96.167
- type: recall_at_1000
value: 100
- type: recall_at_3
value: 74.117
- type: recall_at_5
value: 79.80000000000001
- task:
type: PairClassification
dataset:
type: mteb/sprintduplicatequestions-pairclassification
name: MTEB SprintDuplicateQuestions
config: default
split: test
revision: d66bd1f72af766a5cc4b0ca5e00c162f89e8cc46
metrics:
- type: cos_sim_accuracy
value: 99.83762376237624
- type: cos_sim_ap
value: 96.05077689253707
- type: cos_sim_f1
value: 91.75879396984925
- type: cos_sim_precision
value: 92.22222222222223
- type: cos_sim_recall
value: 91.3
- type: dot_accuracy
value: 99.83762376237624
- type: dot_ap
value: 96.05082513542375
- type: dot_f1
value: 91.75879396984925
- type: dot_precision
value: 92.22222222222223
- type: dot_recall
value: 91.3
- type: euclidean_accuracy
value: 99.83762376237624
- type: euclidean_ap
value: 96.05077689253707
- type: euclidean_f1
value: 91.75879396984925
- type: euclidean_precision
value: 92.22222222222223
- type: euclidean_recall
value: 91.3
- type: manhattan_accuracy
value: 99.83861386138614
- type: manhattan_ap
value: 96.07646831090695
- type: manhattan_f1
value: 91.86220668996505
- type: manhattan_precision
value: 91.72482552342971
- type: manhattan_recall
value: 92
- type: max_accuracy
value: 99.83861386138614
- type: max_ap
value: 96.07646831090695
- type: max_f1
value: 91.86220668996505
- task:
type: Clustering
dataset:
type: mteb/stackexchange-clustering
name: MTEB StackExchangeClustering
config: default
split: test
revision: 6cbc1f7b2bc0622f2e39d2c77fa502909748c259
metrics:
- type: v_measure
value: 66.40672513062134
- task:
type: Clustering
dataset:
type: mteb/stackexchange-clustering-p2p
name: MTEB StackExchangeClusteringP2P
config: default
split: test
revision: 815ca46b2622cec33ccafc3735d572c266efdb44
metrics:
- type: v_measure
value: 35.31519237029376
- task:
type: Reranking
dataset:
type: mteb/stackoverflowdupquestions-reranking
name: MTEB StackOverflowDupQuestions
config: default
split: test
revision: e185fbe320c72810689fc5848eb6114e1ef5ec69
metrics:
- type: map
value: 53.15764586446943
- type: mrr
value: 53.981596426449364
- task:
type: Summarization
dataset:
type: mteb/summeval
name: MTEB SummEval
config: default
split: test
revision: cda12ad7615edc362dbf25a00fdd61d3b1eaf93c
metrics:
- type: cos_sim_pearson
value: 30.92935724124931
- type: cos_sim_spearman
value: 31.54589922149803
- type: dot_pearson
value: 30.929365687857675
- type: dot_spearman
value: 31.54589922149803
- task:
type: Retrieval
dataset:
type: trec-covid
name: MTEB TRECCOVID
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 0.22100000000000003
- type: map_at_10
value: 1.791
- type: map_at_100
value: 9.404
- type: map_at_1000
value: 22.932
- type: map_at_3
value: 0.601
- type: map_at_5
value: 1.001
- type: mrr_at_1
value: 76
- type: mrr_at_10
value: 85.667
- type: mrr_at_100
value: 85.667
- type: mrr_at_1000
value: 85.667
- type: mrr_at_3
value: 84.667
- type: mrr_at_5
value: 85.667
- type: ndcg_at_1
value: 72
- type: ndcg_at_10
value: 68.637
- type: ndcg_at_100
value: 51.418
- type: ndcg_at_1000
value: 47.75
- type: ndcg_at_3
value: 70.765
- type: ndcg_at_5
value: 71.808
- type: precision_at_1
value: 76
- type: precision_at_10
value: 73.8
- type: precision_at_100
value: 52.68000000000001
- type: precision_at_1000
value: 20.9
- type: precision_at_3
value: 74.667
- type: precision_at_5
value: 78
- type: recall_at_1
value: 0.22100000000000003
- type: recall_at_10
value: 2.027
- type: recall_at_100
value: 12.831000000000001
- type: recall_at_1000
value: 44.996
- type: recall_at_3
value: 0.635
- type: recall_at_5
value: 1.097
- task:
type: Retrieval
dataset:
type: webis-touche2020
name: MTEB Touche2020
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 2.289
- type: map_at_10
value: 10.475
- type: map_at_100
value: 16.993
- type: map_at_1000
value: 18.598
- type: map_at_3
value: 5.891
- type: map_at_5
value: 7.678999999999999
- type: mrr_at_1
value: 32.653
- type: mrr_at_10
value: 49.475
- type: mrr_at_100
value: 50.483
- type: mrr_at_1000
value: 50.499
- type: mrr_at_3
value: 45.918
- type: mrr_at_5
value: 48.469
- type: ndcg_at_1
value: 29.592000000000002
- type: ndcg_at_10
value: 25.891
- type: ndcg_at_100
value: 38.106
- type: ndcg_at_1000
value: 49.873
- type: ndcg_at_3
value: 29.915999999999997
- type: ndcg_at_5
value: 27.982000000000003
- type: precision_at_1
value: 32.653
- type: precision_at_10
value: 22.448999999999998
- type: precision_at_100
value: 7.837
- type: precision_at_1000
value: 1.5730000000000002
- type: precision_at_3
value: 31.293
- type: precision_at_5
value: 27.755000000000003
- type: recall_at_1
value: 2.289
- type: recall_at_10
value: 16.594
- type: recall_at_100
value: 48.619
- type: recall_at_1000
value: 85.467
- type: recall_at_3
value: 7.144
- type: recall_at_5
value: 10.465
- task:
type: Classification
dataset:
type: mteb/toxic_conversations_50k
name: MTEB ToxicConversationsClassification
config: default
split: test
revision: d7c0de2777da35d6aae2200a62c6e0e5af397c4c
metrics:
- type: accuracy
value: 71.5268
- type: ap
value: 14.763212211567907
- type: f1
value: 55.200562727472736
- task:
type: Classification
dataset:
type: mteb/tweet_sentiment_extraction
name: MTEB TweetSentimentExtractionClassification
config: default
split: test
revision: d604517c81ca91fe16a244d1248fc021f9ecee7a
metrics:
- type: accuracy
value: 59.25297113752123
- type: f1
value: 59.55315247947331
- task:
type: Clustering
dataset:
type: mteb/twentynewsgroups-clustering
name: MTEB TwentyNewsgroupsClustering
config: default
split: test
revision: 6125ec4e24fa026cec8a478383ee943acfbd5449
metrics:
- type: v_measure
value: 51.47685515092062
- task:
type: PairClassification
dataset:
type: mteb/twittersemeval2015-pairclassification
name: MTEB TwitterSemEval2015
config: default
split: test
revision: 70970daeab8776df92f5ea462b6173c0b46fd2d1
metrics:
- type: cos_sim_accuracy
value: 86.73183525064076
- type: cos_sim_ap
value: 76.08498196190112
- type: cos_sim_f1
value: 69.4834471209584
- type: cos_sim_precision
value: 67.88321167883211
- type: cos_sim_recall
value: 71.16094986807387
- type: dot_accuracy
value: 86.73183525064076
- type: dot_ap
value: 76.08503499590553
- type: dot_f1
value: 69.4834471209584
- type: dot_precision
value: 67.88321167883211
- type: dot_recall
value: 71.16094986807387
- type: euclidean_accuracy
value: 86.73183525064076
- type: euclidean_ap
value: 76.08500172594562
- type: euclidean_f1
value: 69.4834471209584
- type: euclidean_precision
value: 67.88321167883211
- type: euclidean_recall
value: 71.16094986807387
- type: manhattan_accuracy
value: 86.6960720033379
- type: manhattan_ap
value: 76.00885156192993
- type: manhattan_f1
value: 69.24488725747247
- type: manhattan_precision
value: 68.8118811881188
- type: manhattan_recall
value: 69.68337730870712
- type: max_accuracy
value: 86.73183525064076
- type: max_ap
value: 76.08503499590553
- type: max_f1
value: 69.4834471209584
- task:
type: PairClassification
dataset:
type: mteb/twitterurlcorpus-pairclassification
name: MTEB TwitterURLCorpus
config: default
split: test
revision: 8b6510b0b1fa4e4c4f879467980e9be563ec1cdf
metrics:
- type: cos_sim_accuracy
value: 88.74529436876625
- type: cos_sim_ap
value: 85.53503158777171
- type: cos_sim_f1
value: 77.68167368965773
- type: cos_sim_precision
value: 74.70496232048912
- type: cos_sim_recall
value: 80.9054511857099
- type: dot_accuracy
value: 88.74529436876625
- type: dot_ap
value: 85.5350158446314
- type: dot_f1
value: 77.68167368965773
- type: dot_precision
value: 74.70496232048912
- type: dot_recall
value: 80.9054511857099
- type: euclidean_accuracy
value: 88.74529436876625
- type: euclidean_ap
value: 85.53503846009764
- type: euclidean_f1
value: 77.68167368965773
- type: euclidean_precision
value: 74.70496232048912
- type: euclidean_recall
value: 80.9054511857099
- type: manhattan_accuracy
value: 88.73753250281368
- type: manhattan_ap
value: 85.53197689629393
- type: manhattan_f1
value: 77.58753437213566
- type: manhattan_precision
value: 74.06033456988871
- type: manhattan_recall
value: 81.46750846935633
- type: max_accuracy
value: 88.74529436876625
- type: max_ap
value: 85.53503846009764
- type: max_f1
value: 77.68167368965773
license: apache-2.0
language:
- en
library_name: sentence-transformers
---
The crispy sentence embedding family from Mixedbread.
# 🪆mxbai-embed-2d-large-v1🪆 This is our [2DMSE](https://arxiv.org/abs/2402.14776) sentence embedding model. It supports the adaptive transformer layer and embedding size. Find out more in our [blog post](https://mixedbread.ai/blog/mxbai-embed-2d-large-v1). TLDR: 2D-🪆 allows you to shrink the model and the embeddings layer. Shrinking only the embeddings model yields competetive results to other models like [nomics embeddings model](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5). Shrinking the model to ~50% maintains upto 85% of the performance without further training. ## Quickstart Here, we provide several ways to produce sentence embeddings with adaptive layers and embedding sizes. **For this version, it is recommended to set adaptive layers from 20 to 24.** ### sentence-transformers Currently, the best way to use our models is with the most recent version of sentence-transformers. ```bash python -m pip install -U sentence-transformers ``` ```python from sentence_transformers import models, SentenceTransformer from sentence_transformers.util import cos_sim # 1. load model with `cls` pooling model = SentenceTransformer("mixedbread-ai/mxbai-embed-2d-large-v1") # 2. set adaptive layer and embedding size. # it is recommended to set layers from 20 to 24. new_num_layers = 22 # 1D: set layer size model[0].auto_model.encoder.layer = model[0].auto_model.encoder.layer[:new_num_layers] new_embedding_size = 768 # 2D: set embedding size # 3. encode embeddings = model.encode( [ 'Who is german and likes bread?', 'Everybody in Germany.' ] ) # Similarity of the first sentence with the other two similarities = cos_sim(embeddings[0, :new_embedding_size], embeddings[1, :new_embedding_size]) print('similarities:', similarities) ``` ### angle-emb You can also use the lastest `angle-emb` for inference, as follows: ```bash python -m pip install -U angle-emb ``` ```python from angle_emb import AnglE from sentence_transformers.util import cos_sim # 1. load model model = AnglE.from_pretrained("mixedbread-ai/mxbai-embed-2d-large-v1", pooling_strategy='cls').cuda() # 2. set adaptive layer and embedding size. # it is recommended to set layers from 20 to 24. layer_index = 22 # 1d: layer embedding_size = 768 # 2d: embedding size # 3. encode embeddings = model.encode([ 'Who is german and likes bread?', 'Everybody in Germany.' ], layer_index=layer_index, embedding_size=embedding_size) similarities = cos_sim(embeddings[0], embeddings[1:]) print('similarities:', similarities) ``` ### Transformers.js If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@xenova/transformers) using: ```bash npm i @xenova/transformers ``` You can then use the model to compute embeddings as follows: ```js import { pipeline, cos_sim } from '@xenova/transformers'; // Create a feature-extraction pipeline const extractor = await pipeline('feature-extraction', 'mixedbread-ai/mxbai-embed-2d-large-v1', { quantized: false, // (Optional) remove this line to use the 8-bit quantized model }); // Compute sentence embeddings (with `cls` pooling) const sentences = ['Who is german and likes bread?', 'Everybody in Germany.' ]; const output = await extractor(sentences, { pooling: 'cls' }); // Set embedding size and truncate embeddings const new_embedding_size = 768; const truncated = output.slice(null, [0, new_embedding_size]); // Compute cosine similarity console.log(cos_sim(truncated[0].data, truncated[1].data)); // 0.6979532021425204 ``` ### Using API You can use the model via our API as follows: ```python from mixedbread_ai.client import MixedbreadAI from sklearn.metrics.pairwise import cosine_similarity import os mxbai = MixedbreadAI(api_key="{MIXEDBREAD_API_KEY}") english_sentences = [ 'What is the capital of Australia?', 'Canberra is the capital of Australia.' ] res = mxbai.embeddings( input=english_sentences, model="mixedbread-ai/mxbai-embed-2d-large-v1", dimensions=512, ) embeddings = [entry.embedding for entry in res.data] similarities = cosine_similarity([embeddings[0]], [embeddings[1]]) print(similarities) ``` The API comes with native INT8 and binary quantization support! Check out the [docs](https://mixedbread.ai/docs) for more information. ## Evaluation Please find more information in our [blog post](https://mixedbread.ai/blog/mxbai-embed-2d-large-v1). ## Community Please join our [Discord Community](https://discord.gg/jDfMHzAVfU) and share your feedback and thoughts! We are here to help and also always happy to chat. ## License Apache 2.0