Mollel's picture
Add new SentenceTransformer model.
8d775fe verified
metadata
language: []
library_name: sentence-transformers
tags:
  - sentence-transformers
  - sentence-similarity
  - feature-extraction
  - generated_from_trainer
  - dataset_size:557850
  - loss:MatryoshkaLoss
  - loss:MultipleNegativesRankingLoss
base_model: Alibaba-NLP/gte-base-en-v1.5
datasets: []
metrics:
  - pearson_cosine
  - spearman_cosine
  - pearson_manhattan
  - spearman_manhattan
  - pearson_euclidean
  - spearman_euclidean
  - pearson_dot
  - spearman_dot
  - pearson_max
  - spearman_max
widget:
  - source_sentence: >-
      Mwanamume aliyepangwa vizuri anasimama kwa mguu mmoja karibu na pwani safi
      ya bahari.
    sentences:
      - mtu anacheka wakati wa kufua nguo
      - Mwanamume fulani yuko nje karibu na ufuo wa bahari.
      - Mwanamume fulani ameketi kwenye sofa yake.
  - source_sentence: >-
      Mwanamume mwenye ngozi nyeusi akivuta sigareti karibu na chombo cha taka
      cha kijani.
    sentences:
      - Karibu na chombo cha taka mwanamume huyo alisimama na kuvuta sigareti
      - Kitanda ni chafu.
      - >-
        Alipokuwa kwenye dimbwi la kuogelea mvulana huyo mwenye ugonjwa wa
        albino alijihadhari na jua kupita kiasi
  - source_sentence: >-
      Mwanamume kijana mwenye nywele nyekundu anaketi ukutani akisoma gazeti
      huku mwanamke na msichana mchanga wakipita.
    sentences:
      - >-
        Mwanamume aliyevalia shati la bluu amegonga ukuta kando ya barabara na
        gari la bluu na gari nyekundu lenye maji nyuma.
      - >-
        Mwanamume mchanga anatazama gazeti huku wanawake wawili wakipita karibu
        naye.
      - >-
        Mwanamume huyo mchanga analala huku Mama akimwongoza binti yake kwenye
        bustani.
  - source_sentence: Wasichana wako nje.
    sentences:
      - Wasichana wawili wakisafiri kwenye sehemu ya kusisimua.
      - >-
        Kuna watu watatu wakiongoza gari linaloweza kugeuzwa-geuzwa wakipita
        watu wengine.
      - >-
        Wasichana watatu wamesimama pamoja katika chumba, mmoja anasikiliza,
        mwingine anaandika ukutani na wa tatu anaongea nao.
  - source_sentence: >-
      Mwanamume aliyevalia koti la bluu la kuzuia upepo, amelala uso chini
      kwenye benchi ya bustani, akiwa na chupa ya pombe iliyofungwa kwenye
      mojawapo ya miguu ya benchi.
    sentences:
      - Mwanamume amelala uso chini kwenye benchi ya bustani.
      - Mwanamke anaunganisha uzi katika mipira kando ya rundo la mipira
      - Mwanamume fulani anacheza dansi kwenye klabu hiyo akifungua chupa.
pipeline_tag: sentence-similarity
model-index:
  - name: SentenceTransformer based on Alibaba-NLP/gte-base-en-v1.5
    results:
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test 768
          type: sts-test-768
        metrics:
          - type: pearson_cosine
            value: 0.7043347377864616
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.6964343322647693
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.6909108013214409
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.6918757829517036
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.6929234868177542
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.6937500609344119
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.70124411699517
            name: Pearson Dot
          - type: spearman_dot
            value: 0.6918131755587139
            name: Spearman Dot
          - type: pearson_max
            value: 0.7043347377864616
            name: Pearson Max
          - type: spearman_max
            value: 0.6964343322647693
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test 512
          type: sts-test-512
        metrics:
          - type: pearson_cosine
            value: 0.7024370656682521
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.6960997397306026
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.6937121372484026
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.6942680507505805
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.6958879339072266
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.6965067811247516
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.6739585793600888
            name: Pearson Dot
          - type: spearman_dot
            value: 0.6635969331239819
            name: Spearman Dot
          - type: pearson_max
            value: 0.7024370656682521
            name: Pearson Max
          - type: spearman_max
            value: 0.6965067811247516
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test 256
          type: sts-test-256
        metrics:
          - type: pearson_cosine
            value: 0.6975572102129655
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.6922084123611896
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.7012769244476563
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.7002000478097333
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.7033203116396916
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.7027884000644871
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.6353839704898405
            name: Pearson Dot
          - type: spearman_dot
            value: 0.6242173680909447
            name: Spearman Dot
          - type: pearson_max
            value: 0.7033203116396916
            name: Pearson Max
          - type: spearman_max
            value: 0.7027884000644871
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test 128
          type: sts-test-128
        metrics:
          - type: pearson_cosine
            value: 0.6909605436368886
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.6880114885304113
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.7044693468919807
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.7001174190718876
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.7063530897910422
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.7028721535481625
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.5846530941942547
            name: Pearson Dot
          - type: spearman_dot
            value: 0.5728728042034709
            name: Spearman Dot
          - type: pearson_max
            value: 0.7063530897910422
            name: Pearson Max
          - type: spearman_max
            value: 0.7028721535481625
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test 64
          type: sts-test-64
        metrics:
          - type: pearson_cosine
            value: 0.680996097859508
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.6803001320954455
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.7053262249895214
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.6987184531053297
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.7061173611755747
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.7003828247494553
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.5177214664781289
            name: Pearson Dot
          - type: spearman_dot
            value: 0.5019887605325859
            name: Spearman Dot
          - type: pearson_max
            value: 0.7061173611755747
            name: Pearson Max
          - type: spearman_max
            value: 0.7003828247494553
            name: Spearman Max

SentenceTransformer based on Alibaba-NLP/gte-base-en-v1.5

This is a sentence-transformers model finetuned from Alibaba-NLP/gte-base-en-v1.5. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.

Model Details

Model Description

  • Model Type: Sentence Transformer
  • Base model: Alibaba-NLP/gte-base-en-v1.5
  • Maximum Sequence Length: 8192 tokens
  • Output Dimensionality: 768 tokens
  • Similarity Function: Cosine Similarity

Model Sources

Full Model Architecture

SentenceTransformer(
  (0): Transformer({'max_seq_length': 8192, 'do_lower_case': False}) with Transformer model: NewModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)

Usage

Direct Usage (Sentence Transformers)

First install the Sentence Transformers library:

pip install -U sentence-transformers

Then you can load this model and run inference.

from sentence_transformers import SentenceTransformer

# Download from the 🤗 Hub
model = SentenceTransformer("sartifyllc/swahili-gte-base-en-v1.5-nli-matryoshka")
# Run inference
sentences = [
    'Mwanamume aliyevalia koti la bluu la kuzuia upepo, amelala uso chini kwenye benchi ya bustani, akiwa na chupa ya pombe iliyofungwa kwenye mojawapo ya miguu ya benchi.',
    'Mwanamume amelala uso chini kwenye benchi ya bustani.',
    'Mwanamume fulani anacheza dansi kwenye klabu hiyo akifungua chupa.',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 768]

# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]

Evaluation

Metrics

Semantic Similarity

Metric Value
pearson_cosine 0.7043
spearman_cosine 0.6964
pearson_manhattan 0.6909
spearman_manhattan 0.6919
pearson_euclidean 0.6929
spearman_euclidean 0.6938
pearson_dot 0.7012
spearman_dot 0.6918
pearson_max 0.7043
spearman_max 0.6964

Semantic Similarity

Metric Value
pearson_cosine 0.7024
spearman_cosine 0.6961
pearson_manhattan 0.6937
spearman_manhattan 0.6943
pearson_euclidean 0.6959
spearman_euclidean 0.6965
pearson_dot 0.674
spearman_dot 0.6636
pearson_max 0.7024
spearman_max 0.6965

Semantic Similarity

Metric Value
pearson_cosine 0.6976
spearman_cosine 0.6922
pearson_manhattan 0.7013
spearman_manhattan 0.7002
pearson_euclidean 0.7033
spearman_euclidean 0.7028
pearson_dot 0.6354
spearman_dot 0.6242
pearson_max 0.7033
spearman_max 0.7028

Semantic Similarity

Metric Value
pearson_cosine 0.691
spearman_cosine 0.688
pearson_manhattan 0.7045
spearman_manhattan 0.7001
pearson_euclidean 0.7064
spearman_euclidean 0.7029
pearson_dot 0.5847
spearman_dot 0.5729
pearson_max 0.7064
spearman_max 0.7029

Semantic Similarity

Metric Value
pearson_cosine 0.681
spearman_cosine 0.6803
pearson_manhattan 0.7053
spearman_manhattan 0.6987
pearson_euclidean 0.7061
spearman_euclidean 0.7004
pearson_dot 0.5177
spearman_dot 0.502
pearson_max 0.7061
spearman_max 0.7004

Training Details

Training Hyperparameters

Non-Default Hyperparameters

  • num_train_epochs: 1
  • warmup_ratio: 0.1
  • fp16: True
  • batch_sampler: no_duplicates

All Hyperparameters

Click to expand
  • overwrite_output_dir: False
  • do_predict: False
  • prediction_loss_only: True
  • per_device_train_batch_size: 8
  • per_device_eval_batch_size: 8
  • per_gpu_train_batch_size: None
  • per_gpu_eval_batch_size: None
  • gradient_accumulation_steps: 1
  • eval_accumulation_steps: None
  • learning_rate: 5e-05
  • weight_decay: 0.0
  • adam_beta1: 0.9
  • adam_beta2: 0.999
  • adam_epsilon: 1e-08
  • max_grad_norm: 1.0
  • num_train_epochs: 1
  • max_steps: -1
  • lr_scheduler_type: linear
  • lr_scheduler_kwargs: {}
  • warmup_ratio: 0.1
  • warmup_steps: 0
  • log_level: passive
  • log_level_replica: warning
  • log_on_each_node: True
  • logging_nan_inf_filter: True
  • save_safetensors: True
  • save_on_each_node: False
  • save_only_model: False
  • no_cuda: False
  • use_cpu: False
  • use_mps_device: False
  • seed: 42
  • data_seed: None
  • jit_mode_eval: False
  • use_ipex: False
  • bf16: False
  • fp16: True
  • fp16_opt_level: O1
  • half_precision_backend: auto
  • bf16_full_eval: False
  • fp16_full_eval: False
  • tf32: None
  • local_rank: 0
  • ddp_backend: None
  • tpu_num_cores: None
  • tpu_metrics_debug: False
  • debug: []
  • dataloader_drop_last: False
  • dataloader_num_workers: 0
  • dataloader_prefetch_factor: None
  • past_index: -1
  • disable_tqdm: False
  • remove_unused_columns: True
  • label_names: None
  • load_best_model_at_end: False
  • ignore_data_skip: False
  • fsdp: []
  • fsdp_min_num_params: 0
  • fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
  • fsdp_transformer_layer_cls_to_wrap: None
  • accelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'gradient_accumulation_kwargs': None}
  • deepspeed: None
  • label_smoothing_factor: 0.0
  • optim: adamw_torch
  • optim_args: None
  • adafactor: False
  • group_by_length: False
  • length_column_name: length
  • ddp_find_unused_parameters: None
  • ddp_bucket_cap_mb: None
  • ddp_broadcast_buffers: False
  • dataloader_pin_memory: True
  • dataloader_persistent_workers: False
  • skip_memory_metrics: True
  • use_legacy_prediction_loop: False
  • push_to_hub: False
  • resume_from_checkpoint: None
  • hub_model_id: None
  • hub_strategy: every_save
  • hub_private_repo: False
  • hub_always_push: False
  • gradient_checkpointing: False
  • gradient_checkpointing_kwargs: None
  • include_inputs_for_metrics: False
  • eval_do_concat_batches: True
  • fp16_backend: auto
  • push_to_hub_model_id: None
  • push_to_hub_organization: None
  • mp_parameters:
  • auto_find_batch_size: False
  • full_determinism: False
  • torchdynamo: None
  • ray_scope: last
  • ddp_timeout: 1800
  • torch_compile: False
  • torch_compile_backend: None
  • torch_compile_mode: None
  • dispatch_batches: None
  • split_batches: None
  • include_tokens_per_second: False
  • include_num_input_tokens_seen: False
  • neftune_noise_alpha: None
  • optim_target_modules: None
  • batch_sampler: no_duplicates
  • multi_dataset_batch_sampler: proportional

Training Logs

Click to expand
Epoch Step Training Loss sts-test-128_spearman_cosine sts-test-256_spearman_cosine sts-test-512_spearman_cosine sts-test-64_spearman_cosine sts-test-768_spearman_cosine
0.0029 100 13.2716 - - - - -
0.0057 200 9.83 - - - - -
0.0086 300 9.9047 - - - - -
0.0115 400 7.5137 - - - - -
0.0143 500 7.6419 - - - - -
0.0172 600 6.9603 - - - - -
0.0201 700 7.3009 - - - - -
0.0229 800 7.1397 - - - - -
0.0258 900 8.1352 - - - - -
0.0287 1000 7.5945 - - - - -
0.0315 1100 7.0476 - - - - -
0.0344 1200 5.3356 - - - - -
0.0373 1300 5.1529 - - - - -
0.0402 1400 4.9726 - - - - -
0.0430 1500 5.1683 - - - - -
0.0459 1600 4.7945 - - - - -
0.0488 1700 4.9624 - - - - -
0.0516 1800 4.4254 - - - - -
0.0545 1900 4.4379 - - - - -
0.0574 2000 4.0327 - - - - -
0.0602 2100 3.5138 - - - - -
0.0631 2200 4.5055 - - - - -
0.0660 2300 3.8966 - - - - -
0.0688 2400 4.4884 - - - - -
0.0717 2500 3.5825 - - - - -
0.0746 2600 4.0155 - - - - -
0.0774 2700 4.9842 - - - - -
0.0803 2800 4.7732 - - - - -
0.0832 2900 4.5095 - - - - -
0.0860 3000 4.2526 - - - - -
0.0889 3100 4.033 - - - - -
0.0918 3200 4.0052 - - - - -
0.0946 3300 3.197 - - - - -
0.0975 3400 3.3423 - - - - -
0.1004 3500 2.9528 - - - - -
0.1033 3600 3.9315 - - - - -
0.1061 3700 3.7733 - - - - -
0.1090 3800 3.5153 - - - - -
0.1119 3900 4.1326 - - - - -
0.1147 4000 5.2179 - - - - -
0.1176 4100 6.4314 - - - - -
0.1205 4200 6.3485 - - - - -
0.1233 4300 4.7771 - - - - -
0.1262 4400 4.9055 - - - - -
0.1291 4500 3.9025 - - - - -
0.1319 4600 4.4638 - - - - -
0.1348 4700 5.0049 - - - - -
0.1377 4800 4.3124 - - - - -
0.1405 4900 4.0027 - - - - -
0.1434 5000 4.3173 - - - - -
0.1463 5100 3.6629 - - - - -
0.1491 5200 4.2759 - - - - -
0.1520 5300 3.4621 - - - - -
0.1549 5400 3.9251 - - - - -
0.1577 5500 4.2294 - - - - -
0.1606 5600 3.6244 - - - - -
0.1635 5700 4.283 - - - - -
0.1664 5800 4.4665 - - - - -
0.1692 5900 4.956 - - - - -
0.1721 6000 4.795 - - - - -
0.1750 6100 4.998 - - - - -
0.1778 6200 5.3316 - - - - -
0.1807 6300 5.2247 - - - - -
0.1836 6400 4.6554 - - - - -
0.1864 6500 5.2474 - - - - -
0.1893 6600 5.1168 - - - - -
0.1922 6700 5.1372 - - - - -
0.1950 6800 4.1564 - - - - -
0.1979 6900 4.6997 - - - - -
0.2008 7000 4.1854 - - - - -
0.2036 7100 4.4574 - - - - -
0.2065 7200 4.1859 - - - - -
0.2094 7300 4.8306 - - - - -
0.2122 7400 4.4487 - - - - -
0.2151 7500 4.4606 - - - - -
0.2180 7600 4.4222 - - - - -
0.2208 7700 4.7836 - - - - -
0.2237 7800 4.1475 - - - - -
0.2266 7900 5.1679 - - - - -
0.2294 8000 5.0106 - - - - -
0.2323 8100 4.1899 - - - - -
0.2352 8200 4.9873 - - - - -
0.2381 8300 4.3656 - - - - -
0.2409 8400 4.6117 - - - - -
0.2438 8500 4.1785 - - - - -
0.2467 8600 3.7809 - - - - -
0.2495 8700 4.9116 - - - - -
0.2524 8800 4.553 - - - - -
0.2553 8900 4.3178 - - - - -
0.2581 9000 5.6111 - - - - -
0.2610 9100 5.4219 - - - - -
0.2639 9200 5.5628 - - - - -
0.2667 9300 4.4221 - - - - -
0.2696 9400 4.7988 - - - - -
0.2725 9500 4.9361 - - - - -
0.2753 9600 4.7225 - - - - -
0.2782 9700 4.7258 - - - - -
0.2811 9800 4.7071 - - - - -
0.2839 9900 4.5519 - - - - -
0.2868 10000 4.5354 - - - - -
0.2897 10100 4.3893 - - - - -
0.2925 10200 4.7848 - - - - -
0.2954 10300 4.7195 - - - - -
0.2983 10400 4.0155 - - - - -
0.3012 10500 5.1602 - - - - -
0.3040 10600 4.6345 - - - - -
0.3069 10700 5.39 - - - - -
0.3098 10800 4.7974 - - - - -
0.3126 10900 4.9736 - - - - -
0.3155 11000 5.0949 - - - - -
0.3184 11100 4.6704 - - - - -
0.3212 11200 4.7001 - - - - -
0.3241 11300 4.2913 - - - - -
0.3270 11400 4.7536 - - - - -
0.3298 11500 4.8349 - - - - -
0.3327 11600 4.2567 - - - - -
0.3356 11700 4.6754 - - - - -
0.3384 11800 4.8534 - - - - -
0.3413 11900 4.7486 - - - - -
0.3442 12000 4.9194 - - - - -
0.3470 12100 4.4572 - - - - -
0.3499 12200 4.6173 - - - - -
0.3528 12300 5.1292 - - - - -
0.3556 12400 4.6138 - - - - -
0.3585 12500 4.6884 - - - - -
0.3614 12600 4.4245 - - - - -
0.3643 12700 4.7534 - - - - -
0.3671 12800 4.7027 - - - - -
0.3700 12900 4.5186 - - - - -
0.3729 13000 3.8917 - - - - -
0.3757 13100 4.507 - - - - -
0.3786 13200 5.4866 - - - - -
0.3815 13300 4.0424 - - - - -
0.3843 13400 4.4017 - - - - -
0.3872 13500 4.0016 - - - - -
0.3901 13600 4.0695 - - - - -
0.3929 13700 4.4957 - - - - -
0.3958 13800 4.4655 - - - - -
0.3987 13900 4.5717 - - - - -
0.4015 14000 4.134 - - - - -
0.4044 14100 4.2704 - - - - -
0.4073 14200 4.7712 - - - - -
0.4101 14300 4.3946 - - - - -
0.4130 14400 4.5848 - - - - -
0.4159 14500 4.4655 - - - - -
0.4187 14600 4.278 - - - - -
0.4216 14700 4.2877 - - - - -
0.4245 14800 3.9299 - - - - -
0.4274 14900 4.7078 - - - - -
0.4302 15000 4.8527 - - - - -
0.4331 15100 4.3476 - - - - -
0.4360 15200 4.2012 - - - - -
0.4388 15300 4.1766 - - - - -
0.4417 15400 3.9842 - - - - -
0.4446 15500 4.1244 - - - - -
0.4474 15600 4.7983 - - - - -
0.4503 15700 4.2341 - - - - -
0.4532 15800 4.9829 - - - - -
0.4560 15900 4.0221 - - - - -
0.4589 16000 4.1082 - - - - -
0.4618 16100 3.8922 - - - - -
0.4646 16200 4.5382 - - - - -
0.4675 16300 4.4428 - - - - -
0.4704 16400 3.9087 - - - - -
0.4732 16500 3.7465 - - - - -
0.4761 16600 4.149 - - - - -
0.4790 16700 4.5691 - - - - -
0.4818 16800 3.8776 - - - - -
0.4847 16900 3.7354 - - - - -
0.4876 17000 4.25 - - - - -
0.4904 17100 4.4119 - - - - -
0.4933 17200 4.2319 - - - - -
0.4962 17300 4.3736 - - - - -
0.4991 17400 4.5345 - - - - -
0.5019 17500 4.1824 - - - - -
0.5048 17600 4.0033 - - - - -
0.5077 17700 4.277 - - - - -
0.5105 17800 4.3553 - - - - -
0.5134 17900 3.9528 - - - - -
0.5163 18000 4.068 - - - - -
0.5191 18100 4.0464 - - - - -
0.5220 18200 4.1665 - - - - -
0.5249 18300 3.7445 - - - - -
0.5277 18400 4.2248 - - - - -
0.5306 18500 3.9295 - - - - -
0.5335 18600 3.546 - - - - -
0.5363 18700 3.7463 - - - - -
0.5392 18800 3.9798 - - - - -
0.5421 18900 4.4773 - - - - -
0.5449 19000 4.3534 - - - - -
0.5478 19100 4.2347 - - - - -
0.5507 19200 3.8113 - - - - -
0.5535 19300 4.4689 - - - - -
0.5564 19400 4.2188 - - - - -
0.5593 19500 4.1266 - - - - -
0.5622 19600 3.9222 - - - - -
0.5650 19700 4.38 - - - - -
0.5679 19800 4.4557 - - - - -
0.5708 19900 4.7566 - - - - -
0.5736 20000 3.8922 - - - - -
0.5765 20100 4.0263 - - - - -
0.5794 20200 3.9258 - - - - -
0.5822 20300 4.3767 - - - - -
0.5851 20400 4.1211 - - - - -
0.5880 20500 4.3083 - - - - -
0.5908 20600 4.4544 - - - - -
0.5937 20700 4.0118 - - - - -
0.5966 20800 3.9136 - - - - -
0.5994 20900 3.8614 - - - - -
0.6023 21000 3.8057 - - - - -
0.6052 21100 4.4934 - - - - -
0.6080 21200 3.9206 - - - - -
0.6109 21300 4.43 - - - - -
0.6138 21400 4.0576 - - - - -
0.6166 21500 3.9019 - - - - -
0.6195 21600 4.4216 - - - - -
0.6224 21700 4.0959 - - - - -
0.6253 21800 3.8756 - - - - -
0.6281 21900 4.7791 - - - - -
0.6310 22000 3.6284 - - - - -
0.6339 22100 4.5534 - - - - -
0.6367 22200 4.18 - - - - -
0.6396 22300 4.3002 - - - - -
0.6425 22400 3.7162 - - - - -
0.6453 22500 4.8495 - - - - -
0.6482 22600 4.2966 - - - - -
0.6511 22700 3.7718 - - - - -
0.6539 22800 4.2257 - - - - -
0.6568 22900 3.9821 - - - - -
0.6597 23000 4.0853 - - - - -
0.6625 23100 3.6124 - - - - -
0.6654 23200 3.732 - - - - -
0.6683 23300 4.3821 - - - - -
0.6711 23400 4.229 - - - - -
0.6740 23500 4.2589 - - - - -
0.6769 23600 4.4975 - - - - -
0.6797 23700 3.8062 - - - - -
0.6826 23800 3.6924 - - - - -
0.6855 23900 3.7736 - - - - -
0.6883 24000 3.7815 - - - - -
0.6912 24100 4.1192 - - - - -
0.6941 24200 4.2336 - - - - -
0.6970 24300 4.1145 - - - - -
0.6998 24400 4.0681 - - - - -
0.7027 24500 4.0492 - - - - -
0.7056 24600 3.7831 - - - - -
0.7084 24700 4.2445 - - - - -
0.7113 24800 3.9308 - - - - -
0.7142 24900 3.8705 - - - - -
0.7170 25000 3.6998 - - - - -
0.7199 25100 3.4736 - - - - -
0.7228 25200 3.9971 - - - - -
0.7256 25300 3.8292 - - - - -
0.7285 25400 3.8499 - - - - -
0.7314 25500 3.8732 - - - - -
0.7342 25600 3.9409 - - - - -
0.7371 25700 4.4416 - - - - -
0.7400 25800 3.663 - - - - -
0.7428 25900 3.9786 - - - - -
0.7457 26000 4.1781 - - - - -
0.7486 26100 3.692 - - - - -
0.7514 26200 3.2601 - - - - -
0.7543 26300 7.1759 - - - - -
0.7572 26400 7.0459 - - - - -
0.7601 26500 6.1797 - - - - -
0.7629 26600 6.2055 - - - - -
0.7658 26700 6.1403 - - - - -
0.7687 26800 5.703 - - - - -
0.7715 26900 6.1283 - - - - -
0.7744 27000 5.71 - - - - -
0.7773 27100 5.3105 - - - - -
0.7801 27200 5.4202 - - - - -
0.7830 27300 5.2964 - - - - -
0.7859 27400 5.4852 - - - - -
0.7887 27500 5.241 - - - - -
0.7916 27600 5.4322 - - - - -
0.7945 27700 5.6285 - - - - -
0.7973 27800 5.0215 - - - - -
0.8002 27900 5.2433 - - - - -
0.8031 28000 4.9617 - - - - -
0.8059 28100 4.9479 - - - - -
0.8088 28200 4.9077 - - - - -
0.8117 28300 4.853 - - - - -
0.8145 28400 4.6727 - - - - -
0.8174 28500 4.9987 - - - - -
0.8203 28600 4.8405 - - - - -
0.8232 28700 4.9627 - - - - -
0.8260 28800 4.5608 - - - - -
0.8289 28900 5.0802 - - - - -
0.8318 29000 4.9069 - - - - -
0.8346 29100 4.8605 - - - - -
0.8375 29200 4.6424 - - - - -
0.8404 29300 4.7813 - - - - -
0.8432 29400 4.5925 - - - - -
0.8461 29500 4.7081 - - - - -
0.8490 29600 4.4319 - - - - -
0.8518 29700 4.7291 - - - - -
0.8547 29800 4.749 - - - - -
0.8576 29900 4.6148 - - - - -
0.8604 30000 4.2549 - - - - -
0.8633 30100 4.3415 - - - - -
0.8662 30200 4.1999 - - - - -
0.8690 30300 4.4298 - - - - -
0.8719 30400 4.3612 - - - - -
0.8748 30500 4.4834 - - - - -
0.8776 30600 4.4774 - - - - -
0.8805 30700 4.2524 - - - - -
0.8834 30800 4.5562 - - - - -
0.8863 30900 4.5261 - - - - -
0.8891 31000 4.0262 - - - - -
0.8920 31100 4.1109 - - - - -
0.8949 31200 4.1955 - - - - -
0.8977 31300 4.3169 - - - - -
0.9006 31400 4.5862 - - - - -
0.9035 31500 4.5503 - - - - -
0.9063 31600 4.2587 - - - - -
0.9092 31700 4.0028 - - - - -
0.9121 31800 4.3575 - - - - -
0.9149 31900 4.1033 - - - - -
0.9178 32000 4.2877 - - - - -
0.9207 32100 3.9537 - - - - -
0.9235 32200 4.107 - - - - -
0.9264 32300 4.3288 - - - - -
0.9293 32400 4.102 - - - - -
0.9321 32500 4.1751 - - - - -
0.9350 32600 3.7919 - - - - -
0.9379 32700 4.0939 - - - - -
0.9407 32800 4.1822 - - - - -
0.9436 32900 3.959 - - - - -
0.9465 33000 3.9173 - - - - -
0.9493 33100 4.3087 - - - - -
0.9522 33200 4.1239 - - - - -
0.9551 33300 4.1012 - - - - -
0.9580 33400 3.9988 - - - - -
0.9608 33500 4.1478 - - - - -
0.9637 33600 4.1669 - - - - -
0.9666 33700 4.0398 - - - - -
0.9694 33800 3.9814 - - - - -
0.9723 33900 4.3764 - - - - -
0.9752 34000 4.2847 - - - - -
0.9780 34100 3.9461 - - - - -
0.9809 34200 4.3377 - - - - -
0.9838 34300 3.8114 - - - - -
0.9866 34400 4.0827 - - - - -
0.9895 34500 4.0014 - - - - -
0.9924 34600 4.3964 - - - - -
0.9952 34700 3.9103 - - - - -
0.9981 34800 4.0363 - - - - -
1.0 34866 - 0.6880 0.6922 0.6961 0.6803 0.6964

Framework Versions

  • Python: 3.11.9
  • Sentence Transformers: 3.0.1
  • Transformers: 4.40.1
  • PyTorch: 2.3.0+cu121
  • Accelerate: 0.29.3
  • Datasets: 2.19.0
  • Tokenizers: 0.19.1

Citation

BibTeX

Sentence Transformers

@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "https://arxiv.org/abs/1908.10084",
}

MatryoshkaLoss

@misc{kusupati2024matryoshka,
    title={Matryoshka Representation Learning}, 
    author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
    year={2024},
    eprint={2205.13147},
    archivePrefix={arXiv},
    primaryClass={cs.LG}
}

MultipleNegativesRankingLoss

@misc{henderson2017efficient,
    title={Efficient Natural Language Response Suggestion for Smart Reply}, 
    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
    year={2017},
    eprint={1705.00652},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}