diff --git "a/app.py" "b/app.py"
--- "a/app.py"
+++ "b/app.py"
@@ -1,40 +1,1263 @@
-from functools import reduce
+from functools import partial, reduce
+import json
+import os
import re
+from datasets import load_dataset
import gradio as gr
+from huggingface_hub import HfApi, hf_hub_download
+from huggingface_hub.repocard import metadata_load
import pandas as pd
+from tqdm.autonotebook import tqdm
-from envs import REPO_ID
-from refresh import BOARDS_CONFIG, TASKS, TASKS_CONFIG, TASK_DESCRIPTIONS, PRETTY_NAMES, load_results, make_clickable_model
-from refresh import PROPRIETARY_MODELS, SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS, CROSS_ENCODERS, BI_ENCODERS, INSTRUCT_MODELS, NOINSTRUCT_MODELS, EXTERNAL_MODEL_TO_LINK
+from utils.model_size import get_model_parameters_memory
+TASKS = [
+ "BitextMining",
+ "Classification",
+ "Clustering",
+ "PairClassification",
+ "Reranking",
+ "Retrieval",
+ "STS",
+ "Summarization",
+]
+
+TASK_LIST_BITEXT_MINING = ['BUCC (de-en)', 'BUCC (fr-en)', 'BUCC (ru-en)', 'BUCC (zh-en)', 'Tatoeba (afr-eng)', 'Tatoeba (amh-eng)', 'Tatoeba (ang-eng)', 'Tatoeba (ara-eng)', 'Tatoeba (arq-eng)', 'Tatoeba (arz-eng)', 'Tatoeba (ast-eng)', 'Tatoeba (awa-eng)', 'Tatoeba (aze-eng)', 'Tatoeba (bel-eng)', 'Tatoeba (ben-eng)', 'Tatoeba (ber-eng)', 'Tatoeba (bos-eng)', 'Tatoeba (bre-eng)', 'Tatoeba (bul-eng)', 'Tatoeba (cat-eng)', 'Tatoeba (cbk-eng)', 'Tatoeba (ceb-eng)', 'Tatoeba (ces-eng)', 'Tatoeba (cha-eng)', 'Tatoeba (cmn-eng)', 'Tatoeba (cor-eng)', 'Tatoeba (csb-eng)', 'Tatoeba (cym-eng)', 'Tatoeba (dan-eng)', 'Tatoeba (deu-eng)', 'Tatoeba (dsb-eng)', 'Tatoeba (dtp-eng)', 'Tatoeba (ell-eng)', 'Tatoeba (epo-eng)', 'Tatoeba (est-eng)', 'Tatoeba (eus-eng)', 'Tatoeba (fao-eng)', 'Tatoeba (fin-eng)', 'Tatoeba (fra-eng)', 'Tatoeba (fry-eng)', 'Tatoeba (gla-eng)', 'Tatoeba (gle-eng)', 'Tatoeba (glg-eng)', 'Tatoeba (gsw-eng)', 'Tatoeba (heb-eng)', 'Tatoeba (hin-eng)', 'Tatoeba (hrv-eng)', 'Tatoeba (hsb-eng)', 'Tatoeba (hun-eng)', 'Tatoeba (hye-eng)', 'Tatoeba (ido-eng)', 'Tatoeba (ile-eng)', 'Tatoeba (ina-eng)', 'Tatoeba (ind-eng)', 'Tatoeba (isl-eng)', 'Tatoeba (ita-eng)', 'Tatoeba (jav-eng)', 'Tatoeba (jpn-eng)', 'Tatoeba (kab-eng)', 'Tatoeba (kat-eng)', 'Tatoeba (kaz-eng)', 'Tatoeba (khm-eng)', 'Tatoeba (kor-eng)', 'Tatoeba (kur-eng)', 'Tatoeba (kzj-eng)', 'Tatoeba (lat-eng)', 'Tatoeba (lfn-eng)', 'Tatoeba (lit-eng)', 'Tatoeba (lvs-eng)', 'Tatoeba (mal-eng)', 'Tatoeba (mar-eng)', 'Tatoeba (max-eng)', 'Tatoeba (mhr-eng)', 'Tatoeba (mkd-eng)', 'Tatoeba (mon-eng)', 'Tatoeba (nds-eng)', 'Tatoeba (nld-eng)', 'Tatoeba (nno-eng)', 'Tatoeba (nob-eng)', 'Tatoeba (nov-eng)', 'Tatoeba (oci-eng)', 'Tatoeba (orv-eng)', 'Tatoeba (pam-eng)', 'Tatoeba (pes-eng)', 'Tatoeba (pms-eng)', 'Tatoeba (pol-eng)', 'Tatoeba (por-eng)', 'Tatoeba (ron-eng)', 'Tatoeba (rus-eng)', 'Tatoeba (slk-eng)', 'Tatoeba (slv-eng)', 'Tatoeba (spa-eng)', 'Tatoeba (sqi-eng)', 'Tatoeba (srp-eng)', 'Tatoeba (swe-eng)', 'Tatoeba (swg-eng)', 'Tatoeba (swh-eng)', 'Tatoeba (tam-eng)', 'Tatoeba (tat-eng)', 'Tatoeba (tel-eng)', 'Tatoeba (tgl-eng)', 'Tatoeba (tha-eng)', 'Tatoeba (tuk-eng)', 'Tatoeba (tur-eng)', 'Tatoeba (tzl-eng)', 'Tatoeba (uig-eng)', 'Tatoeba (ukr-eng)', 'Tatoeba (urd-eng)', 'Tatoeba (uzb-eng)', 'Tatoeba (vie-eng)', 'Tatoeba (war-eng)', 'Tatoeba (wuu-eng)', 'Tatoeba (xho-eng)', 'Tatoeba (yid-eng)', 'Tatoeba (yue-eng)', 'Tatoeba (zsm-eng)']
+TASK_LIST_BITEXT_MINING_DA = ["BornholmBitextMining"]
+
+TASK_LIST_CLASSIFICATION = [
+ "AmazonCounterfactualClassification (en)",
+ "AmazonPolarityClassification",
+ "AmazonReviewsClassification (en)",
+ "Banking77Classification",
+ "EmotionClassification",
+ "ImdbClassification",
+ "MassiveIntentClassification (en)",
+ "MassiveScenarioClassification (en)",
+ "MTOPDomainClassification (en)",
+ "MTOPIntentClassification (en)",
+ "ToxicConversationsClassification",
+ "TweetSentimentExtractionClassification",
+]
+
+TASK_LIST_CLASSIFICATION_DA = [
+ "AngryTweetsClassification",
+ "DanishPoliticalCommentsClassification",
+ "DKHateClassification",
+ "LccSentimentClassification",
+ "MassiveIntentClassification (da)",
+ "MassiveScenarioClassification (da)",
+ "NordicLangClassification",
+ "ScalaDaClassification",
+]
+
+TASK_LIST_CLASSIFICATION_FR = [
+ "AmazonReviewsClassification (fr)",
+ "MasakhaNEWSClassification (fra)",
+ "MassiveIntentClassification (fr)",
+ "MassiveScenarioClassification (fr)",
+ "MTOPDomainClassification (fr)",
+ "MTOPIntentClassification (fr)",
+]
+
+TASK_LIST_CLASSIFICATION_NB = [
+ "NoRecClassification",
+ "NordicLangClassification",
+ "NorwegianParliament",
+ "MassiveIntentClassification (nb)",
+ "MassiveScenarioClassification (nb)",
+ "ScalaNbClassification",
+]
+
+TASK_LIST_CLASSIFICATION_PL = [
+ "AllegroReviews",
+ "CBD",
+ "MassiveIntentClassification (pl)",
+ "MassiveScenarioClassification (pl)",
+ "PAC",
+ "PolEmo2.0-IN",
+ "PolEmo2.0-OUT",
+]
+
+TASK_LIST_CLASSIFICATION_SV = [
+ "DalajClassification",
+ "MassiveIntentClassification (sv)",
+ "MassiveScenarioClassification (sv)",
+ "NordicLangClassification",
+ "ScalaSvClassification",
+ "SweRecClassification",
+]
+
+TASK_LIST_CLASSIFICATION_ZH = [
+ "AmazonReviewsClassification (zh)",
+ "IFlyTek",
+ "JDReview",
+ "MassiveIntentClassification (zh-CN)",
+ "MassiveScenarioClassification (zh-CN)",
+ "MultilingualSentiment",
+ "OnlineShopping",
+ "TNews",
+ "Waimai",
+]
+
+TASK_LIST_CLASSIFICATION_OTHER = ['AmazonCounterfactualClassification (de)', 'AmazonCounterfactualClassification (ja)', 'AmazonReviewsClassification (de)', 'AmazonReviewsClassification (es)', 'AmazonReviewsClassification (fr)', 'AmazonReviewsClassification (ja)', 'AmazonReviewsClassification (zh)', 'MTOPDomainClassification (de)', 'MTOPDomainClassification (es)', 'MTOPDomainClassification (fr)', 'MTOPDomainClassification (hi)', 'MTOPDomainClassification (th)', 'MTOPIntentClassification (de)', 'MTOPIntentClassification (es)', 'MTOPIntentClassification (fr)', 'MTOPIntentClassification (hi)', 'MTOPIntentClassification (th)', 'MassiveIntentClassification (af)', 'MassiveIntentClassification (am)', 'MassiveIntentClassification (ar)', 'MassiveIntentClassification (az)', 'MassiveIntentClassification (bn)', 'MassiveIntentClassification (cy)', 'MassiveIntentClassification (de)', 'MassiveIntentClassification (el)', 'MassiveIntentClassification (es)', 'MassiveIntentClassification (fa)', 'MassiveIntentClassification (fi)', 'MassiveIntentClassification (fr)', 'MassiveIntentClassification (he)', 'MassiveIntentClassification (hi)', 'MassiveIntentClassification (hu)', 'MassiveIntentClassification (hy)', 'MassiveIntentClassification (id)', 'MassiveIntentClassification (is)', 'MassiveIntentClassification (it)', 'MassiveIntentClassification (ja)', 'MassiveIntentClassification (jv)', 'MassiveIntentClassification (ka)', 'MassiveIntentClassification (km)', 'MassiveIntentClassification (kn)', 'MassiveIntentClassification (ko)', 'MassiveIntentClassification (lv)', 'MassiveIntentClassification (ml)', 'MassiveIntentClassification (mn)', 'MassiveIntentClassification (ms)', 'MassiveIntentClassification (my)', 'MassiveIntentClassification (nl)', 'MassiveIntentClassification (pt)', 'MassiveIntentClassification (ro)', 'MassiveIntentClassification (ru)', 'MassiveIntentClassification (sl)', 'MassiveIntentClassification (sq)', 'MassiveIntentClassification (sw)', 'MassiveIntentClassification (ta)', 'MassiveIntentClassification (te)', 'MassiveIntentClassification (th)', 'MassiveIntentClassification (tl)', 'MassiveIntentClassification (tr)', 'MassiveIntentClassification (ur)', 'MassiveIntentClassification (vi)', 'MassiveIntentClassification (zh-TW)', 'MassiveScenarioClassification (af)', 'MassiveScenarioClassification (am)', 'MassiveScenarioClassification (ar)', 'MassiveScenarioClassification (az)', 'MassiveScenarioClassification (bn)', 'MassiveScenarioClassification (cy)', 'MassiveScenarioClassification (de)', 'MassiveScenarioClassification (el)', 'MassiveScenarioClassification (es)', 'MassiveScenarioClassification (fa)', 'MassiveScenarioClassification (fi)', 'MassiveScenarioClassification (fr)', 'MassiveScenarioClassification (he)', 'MassiveScenarioClassification (hi)', 'MassiveScenarioClassification (hu)', 'MassiveScenarioClassification (hy)', 'MassiveScenarioClassification (id)', 'MassiveScenarioClassification (is)', 'MassiveScenarioClassification (it)', 'MassiveScenarioClassification (ja)', 'MassiveScenarioClassification (jv)', 'MassiveScenarioClassification (ka)', 'MassiveScenarioClassification (km)', 'MassiveScenarioClassification (kn)', 'MassiveScenarioClassification (ko)', 'MassiveScenarioClassification (lv)', 'MassiveScenarioClassification (ml)', 'MassiveScenarioClassification (mn)', 'MassiveScenarioClassification (ms)', 'MassiveScenarioClassification (my)', 'MassiveScenarioClassification (nl)', 'MassiveScenarioClassification (pt)', 'MassiveScenarioClassification (ro)', 'MassiveScenarioClassification (ru)', 'MassiveScenarioClassification (sl)', 'MassiveScenarioClassification (sq)', 'MassiveScenarioClassification (sw)', 'MassiveScenarioClassification (ta)', 'MassiveScenarioClassification (te)', 'MassiveScenarioClassification (th)', 'MassiveScenarioClassification (tl)', 'MassiveScenarioClassification (tr)', 'MassiveScenarioClassification (ur)', 'MassiveScenarioClassification (vi)', 'MassiveScenarioClassification (zh-TW)']
+
+TASK_LIST_CLUSTERING = [
+ "ArxivClusteringP2P",
+ "ArxivClusteringS2S",
+ "BiorxivClusteringP2P",
+ "BiorxivClusteringS2S",
+ "MedrxivClusteringP2P",
+ "MedrxivClusteringS2S",
+ "RedditClustering",
+ "RedditClusteringP2P",
+ "StackExchangeClustering",
+ "StackExchangeClusteringP2P",
+ "TwentyNewsgroupsClustering",
+]
+
+
+TASK_LIST_CLUSTERING_DE = [
+ "BlurbsClusteringP2P",
+ "BlurbsClusteringS2S",
+ "TenKGnadClusteringP2P",
+ "TenKGnadClusteringS2S",
+]
+
+TASK_LIST_CLUSTERING_FR = [
+ "AlloProfClusteringP2P",
+ "AlloProfClusteringS2S",
+ "HALClusteringS2S",
+ "MLSUMClusteringP2P",
+ "MLSUMClusteringS2S",
+ "MasakhaNEWSClusteringP2P (fra)",
+ "MasakhaNEWSClusteringS2S (fra)",
+]
+
+TASK_LIST_CLUSTERING_PL = [
+ "8TagsClustering",
+]
+
+TASK_LIST_CLUSTERING_ZH = [
+ "CLSClusteringP2P",
+ "CLSClusteringS2S",
+ "ThuNewsClusteringP2P",
+ "ThuNewsClusteringS2S",
+]
+
+TASK_LIST_PAIR_CLASSIFICATION = [
+ "SprintDuplicateQuestions",
+ "TwitterSemEval2015",
+ "TwitterURLCorpus",
+]
+
+TASK_LIST_PAIR_CLASSIFICATION_FR = [
+ "OpusparcusPC (fr)",
+ "PawsX (fr)",
+]
+
+TASK_LIST_PAIR_CLASSIFICATION_PL = [
+ "CDSC-E",
+ "PPC",
+ "PSC",
+ "SICK-E-PL",
+]
+
+TASK_LIST_PAIR_CLASSIFICATION_ZH = [
+ "Cmnli",
+ "Ocnli",
+]
+
+TASK_LIST_RERANKING = [
+ "AskUbuntuDupQuestions",
+ "MindSmallReranking",
+ "SciDocsRR",
+ "StackOverflowDupQuestions",
+]
+
+TASK_LIST_RERANKING_FR = [
+ "AlloprofReranking",
+ "SyntecReranking",
+]
+
+TASK_LIST_RERANKING_ZH = [
+ "CMedQAv1",
+ "CMedQAv2",
+ "MMarcoReranking",
+ "T2Reranking",
+]
+
+TASK_LIST_RETRIEVAL = [
+ "ArguAna",
+ "ClimateFEVER",
+ "CQADupstackRetrieval",
+ "DBPedia",
+ "FEVER",
+ "FiQA2018",
+ "HotpotQA",
+ "MSMARCO",
+ "NFCorpus",
+ "NQ",
+ "QuoraRetrieval",
+ "SCIDOCS",
+ "SciFact",
+ "Touche2020",
+ "TRECCOVID",
+]
+
+TASK_LIST_RETRIEVAL_FR = [
+ "AlloprofRetrieval",
+ "BSARDRetrieval",
+ "MintakaRetrieval (fr)",
+# "MultiLongDocRetrieval",
+ "SyntecRetrieval",
+ "XPQARetrieval (fr)",
+]
+
+TASK_LIST_RETRIEVAL_LAW = [
+ "AILACasedocs",
+ "AILAStatutes",
+ "GerDaLIRSmall",
+ "LeCaRDv2",
+ "LegalBenchConsumerContractsQA",
+ "LegalBenchCorporateLobbying",
+ "LegalQuAD",
+ "LegalSummarization",
+]
+
+TASK_LIST_RETRIEVAL_PL = [
+ "ArguAna-PL",
+ "DBPedia-PL",
+ "FiQA-PL",
+ "HotpotQA-PL",
+ "MSMARCO-PL",
+ "NFCorpus-PL",
+ "NQ-PL",
+ "Quora-PL",
+ "SCIDOCS-PL",
+ "SciFact-PL",
+ "TRECCOVID-PL",
+]
+
+TASK_LIST_RETRIEVAL_ZH = [
+ "CmedqaRetrieval",
+ "CovidRetrieval",
+ "DuRetrieval",
+ "EcomRetrieval",
+ "MedicalRetrieval",
+ "MMarcoRetrieval",
+ "T2Retrieval",
+ "VideoRetrieval",
+]
+
+TASK_LIST_RETRIEVAL_NORM = TASK_LIST_RETRIEVAL + [
+ "CQADupstackAndroidRetrieval",
+ "CQADupstackEnglishRetrieval",
+ "CQADupstackGamingRetrieval",
+ "CQADupstackGisRetrieval",
+ "CQADupstackMathematicaRetrieval",
+ "CQADupstackPhysicsRetrieval",
+ "CQADupstackProgrammersRetrieval",
+ "CQADupstackStatsRetrieval",
+ "CQADupstackTexRetrieval",
+ "CQADupstackUnixRetrieval",
+ "CQADupstackWebmastersRetrieval",
+ "CQADupstackWordpressRetrieval"
+]
+
+TASK_LIST_STS = [
+ "BIOSSES",
+ "SICK-R",
+ "STS12",
+ "STS13",
+ "STS14",
+ "STS15",
+ "STS16",
+ "STS17 (en-en)",
+ "STS22 (en)",
+ "STSBenchmark",
+]
+
+TASK_LIST_STS_FR = [
+ "STS22 (fr)",
+ "STSBenchmarkMultilingualSTS (fr)",
+ "SICKFr",
+]
+
+TASK_LIST_STS_PL = [
+ "CDSC-R",
+ "SICK-R-PL",
+ "STS22 (pl)",
+]
+
+TASK_LIST_STS_ZH = [
+ "AFQMC",
+ "ATEC",
+ "BQ",
+ "LCQMC",
+ "PAWSX",
+ "QBQTC",
+ "STS22 (zh)",
+ "STSB",
+]
+
+TASK_LIST_STS_OTHER = ["STS17 (ar-ar)", "STS17 (en-ar)", "STS17 (en-de)", "STS17 (en-tr)", "STS17 (es-en)", "STS17 (es-es)", "STS17 (fr-en)", "STS17 (it-en)", "STS17 (ko-ko)", "STS17 (nl-en)", "STS22 (ar)", "STS22 (de)", "STS22 (de-en)", "STS22 (de-fr)", "STS22 (de-pl)", "STS22 (es)", "STS22 (es-en)", "STS22 (es-it)", "STS22 (fr)", "STS22 (fr-pl)", "STS22 (it)", "STS22 (pl)", "STS22 (pl-en)", "STS22 (ru)", "STS22 (tr)", "STS22 (zh-en)", "STSBenchmark",]
+
+TASK_LIST_SUMMARIZATION = ["SummEval",]
+
+TASK_LIST_SUMMARIZATION_FR = ["SummEvalFr"]
+
+TASK_LIST_EN = TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS + TASK_LIST_SUMMARIZATION
+TASK_LIST_FR = TASK_LIST_CLASSIFICATION_FR + TASK_LIST_CLUSTERING_FR + TASK_LIST_PAIR_CLASSIFICATION_FR + TASK_LIST_RERANKING_FR + TASK_LIST_RETRIEVAL_FR + TASK_LIST_STS_FR + TASK_LIST_SUMMARIZATION_FR
+TASK_LIST_PL = TASK_LIST_CLASSIFICATION_PL + TASK_LIST_CLUSTERING_PL + TASK_LIST_PAIR_CLASSIFICATION_PL + TASK_LIST_RETRIEVAL_PL + TASK_LIST_STS_PL
+TASK_LIST_ZH = TASK_LIST_CLASSIFICATION_ZH + TASK_LIST_CLUSTERING_ZH + TASK_LIST_PAIR_CLASSIFICATION_ZH + TASK_LIST_RERANKING_ZH + TASK_LIST_RETRIEVAL_ZH + TASK_LIST_STS_ZH
+
+TASK_TO_METRIC = {
+ "BitextMining": "f1",
+ "Clustering": "v_measure",
+ "Classification": "accuracy",
+ "PairClassification": "cos_sim_ap",
+ "Reranking": "map",
+ "Retrieval": "ndcg_at_10",
+ "STS": "cos_sim_spearman",
+ "Summarization": "cos_sim_spearman",
+}
+
+def make_clickable_model(model_name, link=None):
+ if link is None:
+ link = "https://huggingface.co/" + model_name
+ # Remove user from model name
+ return (
+ f'{model_name.split("/")[-1]}'
+ )
+
+# Models without metadata, thus we cannot fetch their results naturally
+EXTERNAL_MODELS = [
+ "Baichuan-text-embedding",
+ "Cohere-embed-english-v3.0",
+ "Cohere-embed-multilingual-v3.0",
+ "Cohere-embed-multilingual-light-v3.0",
+ "DanskBERT",
+ "LASER2",
+ "LaBSE",
+ "OpenSearch-text-hybrid",
+ "all-MiniLM-L12-v2",
+ "all-MiniLM-L6-v2",
+ "all-mpnet-base-v2",
+ "allenai-specter",
+ "bert-base-10lang-cased",
+ "bert-base-15lang-cased",
+ "bert-base-25lang-cased",
+ "bert-base-multilingual-cased",
+ "bert-base-multilingual-uncased",
+ "bert-base-swedish-cased",
+ "bert-base-uncased",
+ "bge-base-zh-v1.5",
+ "bge-large-en-v1.5",
+ "bge-large-zh-v1.5",
+ "bge-large-zh-noinstruct",
+ "bge-small-zh-v1.5",
+ "contriever-base-msmarco",
+ "cross-en-de-roberta-sentence-transformer",
+ "dfm-encoder-large-v1",
+ "dfm-sentence-encoder-large-1",
+ "distiluse-base-multilingual-cased-v2",
+ "e5-base",
+ "e5-large",
+ "e5-mistral-7b-instruct",
+ "e5-small",
+ "electra-small-nordic",
+ "electra-small-swedish-cased-discriminator",
+ "flaubert_base_cased",
+ "flaubert_base_uncased",
+ "flaubert_large_cased",
+ "gbert-base",
+ "gbert-large",
+ "gelectra-base",
+ "gelectra-large",
+ "glove.6B.300d",
+ "google-gecko.text-embedding-preview-0409",
+ "google-gecko-256.text-embedding-preview-0409",
+ "gottbert-base",
+ "gtr-t5-base",
+ "gtr-t5-large",
+ "gtr-t5-xl",
+ "gtr-t5-xxl",
+ "herbert-base-retrieval-v2",
+ "komninos",
+ "luotuo-bert-medium",
+ "m3e-base",
+ "m3e-large",
+ "mistral-embed",
+ "msmarco-bert-co-condensor",
+ "multi-qa-MiniLM-L6-cos-v1",
+ "multilingual-e5-base",
+ "multilingual-e5-large",
+ "multilingual-e5-small",
+ "nb-bert-base",
+ "nb-bert-large",
+ "nomic-embed-text-v1.5-64",
+ "nomic-embed-text-v1.5-128",
+ "nomic-embed-text-v1.5-256",
+ "nomic-embed-text-v1.5-512",
+ "norbert3-base",
+ "norbert3-large",
+ "paraphrase-multilingual-MiniLM-L12-v2",
+ "paraphrase-multilingual-mpnet-base-v2",
+ "sentence-bert-swedish-cased",
+ "sentence-camembert-base",
+ "sentence-camembert-large",
+ "sentence-croissant-llm-base",
+ "sentence-t5-base",
+ "sentence-t5-large",
+ "sentence-t5-xl",
+ "sentence-t5-xxl",
+ "silver-retriever-base-v1",
+ "sup-simcse-bert-base-uncased",
+ "st-polish-paraphrase-from-distilroberta",
+ "st-polish-paraphrase-from-mpnet",
+ "text2vec-base-chinese",
+ "text2vec-base-multilingual",
+ "text2vec-large-chinese",
+ "text-embedding-3-small",
+ "text-embedding-3-large",
+ "text-embedding-3-large-256",
+ "text-embedding-ada-002",
+ "text-similarity-ada-001",
+ "text-similarity-babbage-001",
+ "text-similarity-curie-001",
+ "text-similarity-davinci-001",
+ "text-search-ada-doc-001",
+ "text-search-ada-001",
+ "text-search-babbage-001",
+ "text-search-curie-001",
+ "text-search-davinci-001",
+ "titan-embed-text-v1",
+ "udever-bloom-1b1",
+ "udever-bloom-560m",
+ "universal-sentence-encoder-multilingual-3",
+ "universal-sentence-encoder-multilingual-large-3",
+ "unsup-simcse-bert-base-uncased",
+ "use-cmlm-multilingual",
+ "voyage-2",
+ "voyage-code-2",
+ "voyage-law-2",
+ "voyage-lite-01-instruct",
+ "voyage-lite-02-instruct",
+ "xlm-roberta-base",
+ "xlm-roberta-large",
+]
+
+EXTERNAL_MODEL_TO_LINK = {
+ "Cohere-embed-english-v3.0": "https://huggingface.co/Cohere/Cohere-embed-english-v3.0",
+ "Cohere-embed-multilingual-v3.0": "https://huggingface.co/Cohere/Cohere-embed-multilingual-v3.0",
+ "Cohere-embed-multilingual-light-v3.0": "https://huggingface.co/Cohere/Cohere-embed-multilingual-light-v3.0",
+ "allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
+ "allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
+ "all-MiniLM-L12-v2": "https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2",
+ "all-MiniLM-L6-v2": "https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2",
+ "all-mpnet-base-v2": "https://huggingface.co/sentence-transformers/all-mpnet-base-v2",
+ "Baichuan-text-embedding": "https://platform.baichuan-ai.com/docs/text-Embedding",
+ "bert-base-10lang-cased": "https://huggingface.co/Geotrend/bert-base-10lang-cased",
+ "bert-base-15lang-cased": "https://huggingface.co/Geotrend/bert-base-15lang-cased",
+ "bert-base-25lang-cased": "https://huggingface.co/Geotrend/bert-base-25lang-cased",
+ "bert-base-multilingual-cased": "https://huggingface.co/google-bert/bert-base-multilingual-cased",
+ "bert-base-multilingual-uncased": "https://huggingface.co/google-bert/bert-base-multilingual-uncased",
+ "bert-base-swedish-cased": "https://huggingface.co/KB/bert-base-swedish-cased",
+ "bert-base-uncased": "https://huggingface.co/bert-base-uncased",
+ "bge-base-zh-v1.5": "https://huggingface.co/BAAI/bge-base-zh-v1.5",
+ "bge-large-en-v1.5": "https://huggingface.co/BAAI/bge-large-en-v1.5",
+ "bge-large-zh-v1.5": "https://huggingface.co/BAAI/bge-large-zh-v1.5",
+ "bge-large-zh-noinstruct": "https://huggingface.co/BAAI/bge-large-zh-noinstruct",
+ "bge-small-zh-v1.5": "https://huggingface.co/BAAI/bge-small-zh-v1.5",
+ "camembert-base": "https://huggingface.co/almanach/camembert-base",
+ "camembert-large": "https://huggingface.co/almanach/camembert-large",
+ "contriever-base-msmarco": "https://huggingface.co/nthakur/contriever-base-msmarco",
+ "cross-en-de-roberta-sentence-transformer": "https://huggingface.co/T-Systems-onsite/cross-en-de-roberta-sentence-transformer",
+ "DanskBERT": "https://huggingface.co/vesteinn/DanskBERT",
+ "distilbert-base-25lang-cased": "https://huggingface.co/Geotrend/distilbert-base-25lang-cased",
+ "distilbert-base-en-fr-cased": "https://huggingface.co/Geotrend/distilbert-base-en-fr-cased",
+ "distilbert-base-en-fr-es-pt-it-cased": "https://huggingface.co/Geotrend/distilbert-base-en-fr-es-pt-it-cased",
+ "distilbert-base-fr-cased": "https://huggingface.co/Geotrend/distilbert-base-fr-cased",
+ "distilbert-base-uncased": "https://huggingface.co/distilbert-base-uncased",
+ "distiluse-base-multilingual-cased-v2": "https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2",
+ "dfm-encoder-large-v1": "https://huggingface.co/chcaa/dfm-encoder-large-v1",
+ "dfm-sentence-encoder-large-1": "https://huggingface.co/chcaa/dfm-encoder-large-v1",
+ "e5-base": "https://huggingface.co/intfloat/e5-base",
+ "e5-large": "https://huggingface.co/intfloat/e5-large",
+ "e5-mistral-7b-instruct": "https://huggingface.co/intfloat/e5-mistral-7b-instruct",
+ "e5-small": "https://huggingface.co/intfloat/e5-small",
+ "electra-small-nordic": "https://huggingface.co/jonfd/electra-small-nordic",
+ "electra-small-swedish-cased-discriminator": "https://huggingface.co/KBLab/electra-small-swedish-cased-discriminator",
+ "flaubert_base_cased": "https://huggingface.co/flaubert/flaubert_base_cased",
+ "flaubert_base_uncased": "https://huggingface.co/flaubert/flaubert_base_uncased",
+ "flaubert_large_cased": "https://huggingface.co/flaubert/flaubert_large_cased",
+ "gbert-base": "https://huggingface.co/deepset/gbert-base",
+ "gbert-large": "https://huggingface.co/deepset/gbert-large",
+ "gelectra-base": "https://huggingface.co/deepset/gelectra-base",
+ "gelectra-large": "https://huggingface.co/deepset/gelectra-large",
+ "glove.6B.300d": "https://huggingface.co/sentence-transformers/average_word_embeddings_glove.6B.300d",
+ "google-gecko.text-embedding-preview-0409": "https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings/get-text-embeddings#latest_models",
+ "google-gecko-256.text-embedding-preview-0409": "https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings/get-text-embeddings#latest_models",
+ "gottbert-base": "https://huggingface.co/uklfr/gottbert-base",
+ "gtr-t5-base": "https://huggingface.co/sentence-transformers/gtr-t5-base",
+ "gtr-t5-large": "https://huggingface.co/sentence-transformers/gtr-t5-large",
+ "gtr-t5-xl": "https://huggingface.co/sentence-transformers/gtr-t5-xl",
+ "gtr-t5-xxl": "https://huggingface.co/sentence-transformers/gtr-t5-xxl",
+ "herbert-base-retrieval-v2": "https://huggingface.co/ipipan/herbert-base-retrieval-v2",
+ "komninos": "https://huggingface.co/sentence-transformers/average_word_embeddings_komninos",
+ "luotuo-bert-medium": "https://huggingface.co/silk-road/luotuo-bert-medium",
+ "LASER2": "https://github.com/facebookresearch/LASER",
+ "LaBSE": "https://huggingface.co/sentence-transformers/LaBSE",
+ "m3e-base": "https://huggingface.co/moka-ai/m3e-base",
+ "m3e-large": "https://huggingface.co/moka-ai/m3e-large",
+ "mistral-embed": "https://docs.mistral.ai/guides/embeddings",
+ "msmarco-bert-co-condensor": "https://huggingface.co/sentence-transformers/msmarco-bert-co-condensor",
+ "multi-qa-MiniLM-L6-cos-v1": "https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
+ "multilingual-e5-base": "https://huggingface.co/intfloat/multilingual-e5-base",
+ "multilingual-e5-large": "https://huggingface.co/intfloat/multilingual-e5-large",
+ "multilingual-e5-small": "https://huggingface.co/intfloat/multilingual-e5-small",
+ "nb-bert-base": "https://huggingface.co/NbAiLab/nb-bert-base",
+ "nb-bert-large": "https://huggingface.co/NbAiLab/nb-bert-large",
+ "nomic-embed-text-v1.5-64": "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5",
+ "nomic-embed-text-v1.5-128": "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5",
+ "nomic-embed-text-v1.5-256": "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5",
+ "nomic-embed-text-v1.5-512": "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5",
+ "norbert3-base": "https://huggingface.co/ltg/norbert3-base",
+ "norbert3-large": "https://huggingface.co/ltg/norbert3-large",
+ "OpenSearch-text-hybrid": "https://help.aliyun.com/zh/open-search/vector-search-edition/hybrid-retrieval",
+ "paraphrase-multilingual-mpnet-base-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
+ "paraphrase-multilingual-MiniLM-L12-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
+ "sentence-camembert-base": "https://huggingface.co/dangvantuan/sentence-camembert-base",
+ "sentence-camembert-large": "https://huggingface.co/dangvantuan/sentence-camembert-large",
+ "sentence-croissant-llm-base": "https://huggingface.co/Wissam42/sentence-croissant-llm-base",
+ "sentence-bert-swedish-cased": "https://huggingface.co/KBLab/sentence-bert-swedish-cased",
+ "sentence-t5-base": "https://huggingface.co/sentence-transformers/sentence-t5-base",
+ "sentence-t5-large": "https://huggingface.co/sentence-transformers/sentence-t5-large",
+ "sentence-t5-xl": "https://huggingface.co/sentence-transformers/sentence-t5-xl",
+ "sentence-t5-xxl": "https://huggingface.co/sentence-transformers/sentence-t5-xxl",
+ "silver-retriever-base-v1": "https://huggingface.co/ipipan/silver-retriever-base-v1",
+ "sup-simcse-bert-base-uncased": "https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased",
+ "st-polish-paraphrase-from-distilroberta": "https://huggingface.co/sdadas/st-polish-paraphrase-from-distilroberta",
+ "st-polish-paraphrase-from-mpnet": "https://huggingface.co/sdadas/st-polish-paraphrase-from-mpnet",
+ "text2vec-base-chinese": "https://huggingface.co/shibing624/text2vec-base-chinese",
+ "text2vec-large-chinese": "https://huggingface.co/GanymedeNil/text2vec-large-chinese",
+ "text-embedding-3-small": "https://openai.com/blog/new-embedding-models-and-api-updates",
+ "text-embedding-3-large": "https://openai.com/blog/new-embedding-models-and-api-updates",
+ "text-embedding-3-large-256": "https://openai.com/blog/new-embedding-models-and-api-updates",
+ "text-embedding-ada-002": "https://openai.com/blog/new-and-improved-embedding-model",
+ "text-similarity-ada-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
+ "text-similarity-babbage-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
+ "text-similarity-curie-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
+ "text-similarity-davinci-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
+ "text-search-ada-doc-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
+ "text-search-ada-query-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
+ "text-search-ada-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
+ "text-search-curie-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
+ "text-search-babbage-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
+ "text-search-davinci-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
+ "titan-embed-text-v1": "https://docs.aws.amazon.com/bedrock/latest/userguide/embeddings.html",
+ "udever-bloom-1b1": "https://huggingface.co/izhx/udever-bloom-1b1",
+ "udever-bloom-560m": "https://huggingface.co/izhx/udever-bloom-560m",
+ "universal-sentence-encoder-multilingual-3": "https://huggingface.co/vprelovac/universal-sentence-encoder-multilingual-3",
+ "universal-sentence-encoder-multilingual-large-3": "https://huggingface.co/vprelovac/universal-sentence-encoder-multilingual-large-3",
+ "unsup-simcse-bert-base-uncased": "https://huggingface.co/princeton-nlp/unsup-simcse-bert-base-uncased",
+ "use-cmlm-multilingual": "https://huggingface.co/sentence-transformers/use-cmlm-multilingual",
+ "voyage-2": "https://docs.voyageai.com/embeddings/",
+ "voyage-code-2": "https://docs.voyageai.com/embeddings/",
+ "voyage-law-2": "https://docs.voyageai.com/embeddings/",
+ "voyage-lite-01-instruct": "https://docs.voyageai.com/embeddings/",
+ "voyage-lite-02-instruct": "https://docs.voyageai.com/embeddings/",
+ "xlm-roberta-base": "https://huggingface.co/xlm-roberta-base",
+ "xlm-roberta-large": "https://huggingface.co/xlm-roberta-large",
+}
+
+EXTERNAL_MODEL_TO_DIM = {
+ "Cohere-embed-english-v3.0": 1024,
+ "Cohere-embed-multilingual-v3.0": 1024,
+ "Cohere-embed-multilingual-light-v3.0": 384,
+ "all-MiniLM-L12-v2": 384,
+ "all-MiniLM-L6-v2": 384,
+ "all-mpnet-base-v2": 768,
+ "allenai-specter": 768,
+ "Baichuan-text-embedding": 1024,
+ "bert-base-10lang-cased": 768,
+ "bert-base-15lang-cased": 768,
+ "bert-base-25lang-cased": 768,
+ "bert-base-multilingual-cased": 768,
+ "bert-base-multilingual-uncased": 768,
+ "bert-base-swedish-cased": 768,
+ "bert-base-uncased": 768,
+ "bge-base-zh-v1.5": 768,
+ "bge-large-en-v1.5": 1024,
+ "bge-large-zh-v1.5": 1024,
+ "bge-large-zh-noinstruct": 1024,
+ "bge-small-zh-v1.5": 512,
+ "camembert-base": 512,
+ "camembert-large": 768,
+ "contriever-base-msmarco": 768,
+ "cross-en-de-roberta-sentence-transformer": 768,
+ "DanskBERT": 768,
+ "distilbert-base-25lang-cased": 768,
+ "distilbert-base-en-fr-cased": 768,
+ "distilbert-base-en-fr-es-pt-it-cased": 768,
+ "distilbert-base-fr-cased": 768,
+ "distilbert-base-uncased": 768,
+ "distiluse-base-multilingual-cased-v2": 512,
+ "dfm-encoder-large-v1": 1024,
+ "dfm-sentence-encoder-large-1": 1024,
+ "e5-base": 768,
+ "e5-large": 1024,
+ "e5-mistral-7b-instruct": 4096,
+ "e5-small": 384,
+ "electra-small-nordic": 256,
+ "electra-small-swedish-cased-discriminator": 256,
+ "flaubert_base_cased": 768,
+ "flaubert_base_uncased": 768,
+ "flaubert_large_cased": 1024,
+ "luotuo-bert-medium": 768,
+ "LASER2": 1024,
+ "LaBSE": 768,
+ "gbert-base": 768,
+ "gbert-large": 1024,
+ "gelectra-base": 768,
+ "gelectra-large": 1024,
+ "glove.6B.300d": 300,
+ "google-gecko.text-embedding-preview-0409": 768,
+ "google-gecko-256.text-embedding-preview-0409": 256,
+ "gottbert-base": 768,
+ "gtr-t5-base": 768,
+ "gtr-t5-large": 768,
+ "gtr-t5-xl": 768,
+ "gtr-t5-xxl": 768,
+ "herbert-base-retrieval-v2": 768,
+ "komninos": 300,
+ "m3e-base": 768,
+ "m3e-large": 768,
+ "mistral-embed": 1024,
+ "msmarco-bert-co-condensor": 768,
+ "multi-qa-MiniLM-L6-cos-v1": 384,
+ "multilingual-e5-base": 768,
+ "multilingual-e5-small": 384,
+ "multilingual-e5-large": 1024,
+ "nb-bert-base": 768,
+ "nb-bert-large": 1024,
+ "nomic-embed-text-v1.5-64": 64,
+ "nomic-embed-text-v1.5-128": 128,
+ "nomic-embed-text-v1.5-256": 256,
+ "nomic-embed-text-v1.5-512": 512,
+ "norbert3-base": 768,
+ "norbert3-large": 1024,
+ "OpenSearch-text-hybrid": 1792,
+ "paraphrase-multilingual-MiniLM-L12-v2": 384,
+ "paraphrase-multilingual-mpnet-base-v2": 768,
+ "sentence-camembert-base": 768,
+ "sentence-camembert-large": 1024,
+ "sentence-croissant-llm-base": 2048,
+ "sentence-bert-swedish-cased": 768,
+ "sentence-t5-base": 768,
+ "sentence-t5-large": 768,
+ "sentence-t5-xl": 768,
+ "sentence-t5-xxl": 768,
+ "silver-retriever-base-v1": 768,
+ "sup-simcse-bert-base-uncased": 768,
+ "st-polish-paraphrase-from-distilroberta": 768,
+ "st-polish-paraphrase-from-mpnet": 768,
+ "text2vec-base-chinese": 768,
+ "text2vec-large-chinese": 1024,
+ "text-embedding-3-large": 3072,
+ "text-embedding-3-large-256": 256,
+ "text-embedding-3-small": 1536,
+ "text-embedding-ada-002": 1536,
+ "text-similarity-ada-001": 1024,
+ "text-similarity-babbage-001": 2048,
+ "text-similarity-curie-001": 4096,
+ "text-similarity-davinci-001": 12288,
+ "text-search-ada-doc-001": 1024,
+ "text-search-ada-query-001": 1024,
+ "text-search-ada-001": 1024,
+ "text-search-babbage-001": 2048,
+ "text-search-curie-001": 4096,
+ "text-search-davinci-001": 12288,
+ "titan-embed-text-v1": 1536,
+ "udever-bloom-1b1": 1536,
+ "udever-bloom-560m": 1024,
+ "universal-sentence-encoder-multilingual-3": 512,
+ "universal-sentence-encoder-multilingual-large-3": 512,
+ "unsup-simcse-bert-base-uncased": 768,
+ "use-cmlm-multilingual": 768,
+ "voyage-2": 1024,
+ "voyage-code-2": 1536,
+ "voyage-law-2": 1024,
+ "voyage-lite-01-instruct": 1024,
+ "voyage-lite-02-instruct": 1024,
+ "xlm-roberta-base": 768,
+ "xlm-roberta-large": 1024,
+}
+
+EXTERNAL_MODEL_TO_SEQLEN = {
+ "Cohere-embed-english-v3.0": 512,
+ "Cohere-embed-multilingual-v3.0": 512,
+ "Cohere-embed-multilingual-light-v3.0": 512,
+ "all-MiniLM-L12-v2": 512,
+ "all-MiniLM-L6-v2": 512,
+ "all-mpnet-base-v2": 514,
+ "allenai-specter": 512,
+ "Baichuan-text-embedding": 512,
+ "bert-base-10lang-cased": 512,
+ "bert-base-15lang-cased": 512,
+ "bert-base-25lang-cased": 512,
+ "bert-base-multilingual-cased": 512,
+ "bert-base-multilingual-uncased": 512,
+ "bert-base-swedish-cased": 512,
+ "bert-base-uncased": 512,
+ "bge-base-zh-v1.5": 512,
+ "bge-large-en-v1.5": 512,
+ "bge-large-zh-v1.5": 512,
+ "bge-large-zh-noinstruct": 512,
+ "bge-small-zh-v1.5": 512,
+ "camembert-base": 512,
+ "camembert-large": 512,
+ "contriever-base-msmarco": 512,
+ "cross-en-de-roberta-sentence-transformer": 514,
+ "distilbert-base-25lang-cased": 512,
+ "distilbert-base-en-fr-cased": 512,
+ "distilbert-base-en-fr-es-pt-it-cased": 512,
+ "distilbert-base-fr-cased": 512,
+ "distilbert-base-uncased": 512,
+ "DanskBERT": 514,
+ "dfm-encoder-large-v1": 512,
+ "dfm-sentence-encoder-large-1": 512,
+ "distiluse-base-multilingual-cased-v2": 512,
+ "e5-base": 512,
+ "e5-large": 512,
+ "e5-mistral-7b-instruct": 32768,
+ "e5-small": 512,
+ "electra-small-nordic": 512,
+ "electra-small-swedish-cased-discriminator": 512,
+ "flaubert_base_cased": 512,
+ "flaubert_base_uncased": 512,
+ "flaubert_large_cased": 512,
+ "gbert-base": 512,
+ "gbert-large": 512,
+ "gelectra-base": 512,
+ "gelectra-large": 512,
+ "google-gecko.text-embedding-preview-0409": 2048,
+ "google-gecko-256.text-embedding-preview-0409": 2048,
+ "gottbert-base": 512,
+ "glove.6B.300d": "N/A",
+ "gtr-t5-base": 512,
+ "gtr-t5-large": 512,
+ "gtr-t5-xl": 512,
+ "gtr-t5-xxl": 512,
+ "herbert-base-retrieval-v2": 514,
+ "komninos": "N/A",
+ "luotuo-bert-medium": 512,
+ "LASER2": "N/A",
+ "LaBSE": 512,
+ "m3e-base": 512,
+ "m3e-large": 512,
+# "mistral-embed": "?",
+ "msmarco-bert-co-condensor": 512,
+ "multi-qa-MiniLM-L6-cos-v1": 512,
+ "multilingual-e5-base": 514,
+ "multilingual-e5-large": 514,
+ "multilingual-e5-small": 512,
+ "nb-bert-base": 512,
+ "nb-bert-large": 512,
+ "nomic-embed-text-v1.5-64": 8192,
+ "nomic-embed-text-v1.5-128": 8192,
+ "nomic-embed-text-v1.5-256": 8192,
+ "nomic-embed-text-v1.5-512": 8192,
+ "norbert3-base": 512,
+ "norbert3-large": 512,
+ "OpenSearch-text-hybrid": 512,
+ "paraphrase-multilingual-MiniLM-L12-v2": 512,
+ "paraphrase-multilingual-mpnet-base-v2": 514,
+ "sentence-camembert-base": 512,
+ "sentence-camembert-large": 512,
+ "sentence-croissant-llm-base": 2048,
+ "sentence-bert-swedish-cased": 512,
+ "sentence-t5-base": 512,
+ "sentence-t5-large": 512,
+ "sentence-t5-xl": 512,
+ "sentence-t5-xxl": 512,
+ "silver-retriever-base-v1": 514,
+ "sup-simcse-bert-base-uncased": 512,
+ "st-polish-paraphrase-from-distilroberta": 514,
+ "st-polish-paraphrase-from-mpnet": 514,
+ "text2vec-base-chinese": 512,
+ "text2vec-large-chinese": 512,
+ "text-embedding-3-large": 8191,
+ "text-embedding-3-large-256": 8191,
+ "text-embedding-3-small": 8191,
+ "text-embedding-ada-002": 8191,
+ "text-similarity-ada-001": 2046,
+ "text-similarity-babbage-001": 2046,
+ "text-similarity-curie-001": 2046,
+ "text-similarity-davinci-001": 2046,
+ "text-search-ada-doc-001": 2046,
+ "text-search-ada-query-001": 2046,
+ "text-search-ada-001": 2046,
+ "text-search-babbage-001": 2046,
+ "text-search-curie-001": 2046,
+ "text-search-davinci-001": 2046,
+ "titan-embed-text-v1": 8000,
+ "udever-bloom-1b1": 2048,
+ "udever-bloom-560m": 2048,
+ "universal-sentence-encoder-multilingual-3": 512,
+ "universal-sentence-encoder-multilingual-large-3": 512,
+ "use-cmlm-multilingual": 512,
+ "unsup-simcse-bert-base-uncased": 512,
+ "voyage-2": 1024,
+ "voyage-code-2": 16000,
+ "voyage-law-2": 4000,
+ "voyage-lite-01-instruct": 4000,
+ "voyage-lite-02-instruct": 4000,
+ "xlm-roberta-base": 514,
+ "xlm-roberta-large": 514,
+}
+
+EXTERNAL_MODEL_TO_SIZE = {
+ "allenai-specter": 110,
+ "all-MiniLM-L12-v2": 33,
+ "all-MiniLM-L6-v2": 23,
+ "all-mpnet-base-v2": 110,
+ "bert-base-10lang-cased": 138,
+ "bert-base-15lang-cased": 138,
+ "bert-base-25lang-cased": 138,
+ "bert-base-multilingual-cased": 179,
+ "bert-base-multilingual-uncased": 168,
+ "bert-base-uncased": 110,
+ "bert-base-swedish-cased": 125,
+ "bge-base-zh-v1.5": 102,
+ "bge-large-zh-v1.5": 326,
+ "bge-large-zh-noinstruct": 326,
+ "bge-small-zh-v1.5": 24,
+ "camembert-base": 111,
+ "camembert-large": 338,
+ "cross-en-de-roberta-sentence-transformer": 278,
+ "contriever-base-msmarco": 110,
+ "distilbert-base-25lang-cased": 110,
+ "distilbert-base-en-fr-cased": 110,
+ "distilbert-base-en-fr-es-pt-it-cased": 110,
+ "distilbert-base-fr-cased": 110,
+ "distilbert-base-uncased": 110,
+ "DanskBERT": 125,
+ "distiluse-base-multilingual-cased-v2": 135,
+ "dfm-encoder-large-v1": 355,
+ "dfm-sentence-encoder-large-1": 355,
+ "e5-base": 110,
+ "e5-large": 335,
+ "e5-mistral-7b-instruct": 7111,
+ "e5-small": 33,
+ "electra-small-nordic": 23,
+ "electra-small-swedish-cased-discriminator": 16,
+ "flaubert_base_cased": 138,
+ "flaubert_base_uncased": 138,
+ "flaubert_large_cased": 372,
+ "gbert-base": 110,
+ "gbert-large": 337,
+ "gelectra-base": 110,
+ "gelectra-large": 335,
+ "glove.6B.300d": 120,
+ "google-gecko.text-embedding-preview-0409": 1200,
+ "google-gecko-256.text-embedding-preview-0409": 1200,
+ "gottbert-base": 127,
+ "gtr-t5-base": 110,
+ "gtr-t5-large": 168,
+ "gtr-t5-xl": 1240,
+ "gtr-t5-xxl": 4865,
+ "herbert-base-retrieval-v2": 125,
+ "komninos": 134,
+ "luotuo-bert-medium": 328,
+ "LASER2": 43,
+ "LaBSE": 471,
+ "m3e-base": 102,
+ "m3e-large": 102,
+ "msmarco-bert-co-condensor": 110,
+ "multi-qa-MiniLM-L6-cos-v1": 23,
+ "multilingual-e5-base": 278,
+ "multilingual-e5-small": 118,
+ "multilingual-e5-large": 560,
+ "nb-bert-base": 179,
+ "nb-bert-large": 355,
+ "nomic-embed-text-v1.5-64": 138,
+ "nomic-embed-text-v1.5-128": 138,
+ "nomic-embed-text-v1.5-256": 138,
+ "nomic-embed-text-v1.5-512": 138,
+ "norbert3-base": 131,
+ "norbert3-large": 368,
+ "paraphrase-multilingual-mpnet-base-v2": 278,
+ "paraphrase-multilingual-MiniLM-L12-v2": 118,
+ "sentence-camembert-base": 110,
+ "sentence-camembert-large": 337,
+ "sentence-croissant-llm-base": 1280,
+ "sentence-bert-swedish-cased": 125,
+ "sentence-t5-base": 110,
+ "sentence-t5-large": 168,
+ "sentence-t5-xl": 1240,
+ "sentence-t5-xxl": 4865,
+ "silver-retriever-base-v1": 125,
+ "sup-simcse-bert-base-uncased": 110,
+ "st-polish-paraphrase-from-distilroberta": 125,
+ "st-polish-paraphrase-from-mpnet": 125,
+ "text2vec-base-chinese": 102,
+ "text2vec-large-chinese": 326,
+ "unsup-simcse-bert-base-uncased": 110,
+ "use-cmlm-multilingual": 472,
+ #"voyage-law-2": 1220,
+ "voyage-lite-02-instruct": 1220,
+ "xlm-roberta-base": 279,
+ "xlm-roberta-large": 560,
+}
PROPRIETARY_MODELS = {
- make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}"))
+ "Cohere-embed-english-v3.0",
+ "Cohere-embed-multilingual-v3.0",
+ "Cohere-embed-multilingual-light-v3.0",
+ "Baichuan-text-embedding",
+ "mistral-embed",
+ "OpenSearch-text-hybrid",
+ "text-embedding-3-small",
+ "text-embedding-3-large",
+ "text-embedding-3-large-256",
+ "text-embedding-ada-002",
+ "text-similarity-ada-001",
+ "text-similarity-babbage-001",
+ "text-similarity-curie-001",
+ "text-similarity-davinci-001",
+ "text-search-ada-doc-001",
+ "text-search-ada-query-001",
+ "text-search-ada-001",
+ "text-search-curie-001",
+ "text-search-babbage-001",
+ "text-search-davinci-001",
+ "titan-embed-text-v1",
+ "voyage-2",
+ "voyage-code-2",
+ "voyage-law-2",
+ "voyage-lite-01-instruct",
+ "voyage-lite-02-instruct",
+ "google-gecko.text-embedding-preview-0409",
+ "google-gecko-256.text-embedding-preview-0409",
+}
+PROPRIETARY_MODELS = {
+ make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, "https://huggingface.co/spaces/mteb/leaderboard"))
for model in PROPRIETARY_MODELS
}
+
SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = {
- make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}"))
- for model in SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS
-}
-INSTRUCT_MODELS = {
- make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}"))
- for model in INSTRUCT_MODELS
-}
-NOINSTRUCT_MODELS = {
- make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}"))
- for model in NOINSTRUCT_MODELS
+ "allenai-specter",
+ "allenai-specter",
+ "all-MiniLM-L12-v2",
+ "all-MiniLM-L6-v2",
+ "all-mpnet-base-v2",
+ "bert-base-10lang-cased",
+ "bert-base-15lang-cased",
+ "bert-base-25lang-cased",
+ "bert-base-multilingual-cased",
+ "bert-base-multilingual-uncased",
+ "bert-base-swedish-cased",
+ "bert-base-uncased",
+ "bge-base-zh-v1.5",
+ "bge-large-zh-v1.5",
+ "bge-large-zh-noinstruct",
+ "bge-small-zh-v1.5",
+ "camembert-base",
+ "camembert-large",
+ "contriever-base-msmarco",
+ "cross-en-de-roberta-sentence-transformer",
+ "DanskBERT",
+ "distilbert-base-25lang-cased",
+ "distilbert-base-en-fr-cased",
+ "distilbert-base-en-fr-es-pt-it-cased",
+ "distilbert-base-fr-cased",
+ "distilbert-base-uncased",
+ "distiluse-base-multilingual-cased-v2",
+ "dfm-encoder-large-v1",
+ "dfm-sentence-encoder-large-1",
+ "e5-base",
+ "e5-large",
+ "e5-mistral-7b-instruct",
+ "e5-small",
+ "electra-small-nordic",
+ "electra-small-swedish-cased-discriminator",
+ "flaubert_base_cased",
+ "flaubert_base_uncased",
+ "flaubert_large_cased",
+ "gbert-base",
+ "gbert-large",
+ "gelectra-base",
+ "gelectra-large",
+ "glove.6B.300d",
+ "gottbert-base",
+ "gtr-t5-base",
+ "gtr-t5-large",
+ "gtr-t5-xl",
+ "gtr-t5-xxl",
+ "herbert-base-retrieval-v2",
+ "komninos",
+ "luotuo-bert-medium",
+ "LaBSE",
+ "m3e-base",
+ "m3e-large",
+ "msmarco-bert-co-condensor",
+ "multi-qa-MiniLM-L6-cos-v1",
+ "multilingual-e5-base",
+ "multilingual-e5-large",
+ "multilingual-e5-small",
+ "nb-bert-base",
+ "nb-bert-large",
+ "nomic-embed-text-v1.5-64",
+ "nomic-embed-text-v1.5-128",
+ "nomic-embed-text-v1.5-256",
+ "nomic-embed-text-v1.5-512",
+ "norbert3-base",
+ "norbert3-large",
+ "paraphrase-multilingual-mpnet-base-v2",
+ "paraphrase-multilingual-MiniLM-L12-v2",
+ "sentence-camembert-base",
+ "sentence-camembert-large",
+ "sentence-croissant-llm-base",
+ "sentence-bert-swedish-cased",
+ "sentence-t5-base",
+ "sentence-t5-large",
+ "sentence-t5-xl",
+ "sentence-t5-xxl",
+ "silver-retriever-base-v1",
+ "sup-simcse-bert-base-uncased",
+ "st-polish-paraphrase-from-distilroberta",
+ "st-polish-paraphrase-from-mpnet",
+ "text2vec-base-chinese",
+ "text2vec-large-chinese",
+ "udever-bloom-1b1",
+ "udever-bloom-560m",
+ "universal-sentence-encoder-multilingual-3",
+ "universal-sentence-encoder-multilingual-large-3",
+ "unsup-simcse-bert-base-uncased",
+ "use-cmlm-multilingual",
+ "xlm-roberta-base",
+ "xlm-roberta-large",
}
-CROSS_ENCODERS = {
- make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}"))
- for model in CROSS_ENCODERS
+SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = {
+ make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, "https://huggingface.co/spaces/mteb/leaderboard"))
+ for model in SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS
}
-BI_ENCODERS = {
- make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}"))
- for model in BI_ENCODERS
+
+MODELS_TO_SKIP = {
+ "baseplate/instructor-large-1", # Duplicate
+ "radames/e5-large", # Duplicate
+ "gentlebowl/instructor-large-safetensors", # Duplicate
+ "Consensus/instructor-base", # Duplicate
+ "GovCompete/instructor-xl", # Duplicate
+ "GovCompete/e5-large-v2", # Duplicate
+ "t12e/instructor-base", # Duplicate
+ "michaelfeil/ct2fast-e5-large-v2",
+ "michaelfeil/ct2fast-e5-large",
+ "michaelfeil/ct2fast-e5-small-v2",
+ "newsrx/instructor-xl-newsrx",
+ "newsrx/instructor-large-newsrx",
+ "fresha/e5-large-v2-endpoint",
+ "ggrn/e5-small-v2",
+ "michaelfeil/ct2fast-e5-small",
+ "jncraton/e5-small-v2-ct2-int8",
+ "anttip/ct2fast-e5-small-v2-hfie",
+ "newsrx/instructor-large",
+ "newsrx/instructor-xl",
+ "dmlls/all-mpnet-base-v2",
+ "cgldo/semanticClone",
+ "Malmuk1/e5-large-v2_Sharded",
+ "jncraton/gte-small-ct2-int8",
+ "Einas/einas_ashkar",
+ "gruber/e5-small-v2-ggml",
+ "jncraton/bge-small-en-ct2-int8",
+ "vectoriseai/bge-small-en",
+ "recipe/embeddings",
+ "dhairya0907/thenlper-get-large",
+ "Narsil/bge-base-en",
+ "kozistr/fused-large-en",
+ "sionic-ai/sionic-ai-v2", # Wait for https://huggingface.co/sionic-ai/sionic-ai-v2/discussions/1
+ "sionic-ai/sionic-ai-v1", # Wait for https://huggingface.co/sionic-ai/sionic-ai-v2/discussions/1
+ "BAAI/bge-large-en", # Deprecated in favor of v1.5
+ "BAAI/bge-base-en", # Deprecated in favor of v1.5
+ "BAAI/bge-small-en", # Deprecated in favor of v1.5
+ "d0rj/e5-large-en-ru",
+ "d0rj/e5-base-en-ru",
+ "d0rj/e5-small-en-ru",
+ "aident-ai/bge-base-en-onnx",
+ "barisaydin/bge-base-en",
+ "barisaydin/gte-large",
+ "barisaydin/gte-base",
+ "barisaydin/gte-small",
+ "barisaydin/bge-small-en",
+ "odunola/e5-base-v2",
+ "goldenrooster/multilingual-e5-large",
+ "davidpeer/gte-small",
+ "barisaydin/bge-large-en",
+ "jamesgpt1/english-large-v1",
+ "vectoriseai/bge-large-en-v1.5",
+ "vectoriseai/bge-base-en-v1.5",
+ "vectoriseai/instructor-large",
+ "vectoriseai/instructor-base",
+ "vectoriseai/gte-large",
+ "vectoriseai/gte-base",
+ "vectoriseai/e5-large-v2",
+ "vectoriseai/bge-small-en-v1.5",
+ "vectoriseai/e5-base-v2",
+ "vectoriseai/e5-large",
+ "vectoriseai/multilingual-e5-large",
+ "vectoriseai/gte-small",
+ "vectoriseai/ember-v1",
+ "vectoriseai/e5-base",
+ "vectoriseai/e5-small-v2",
+ "michaelfeil/ct2fast-bge-large-en-v1.5",
+ "michaelfeil/ct2fast-bge-large-en-v1.5",
+ "michaelfeil/ct2fast-bge-base-en-v1.5",
+ "michaelfeil/ct2fast-gte-large",
+ "michaelfeil/ct2fast-gte-base",
+ "michaelfeil/ct2fast-bge-small-en-v1.5",
+ "rizki/bgr-tf",
+ "ef-zulla/e5-multi-sml-torch",
+ "cherubhao/yogamodel",
+ "morgendigital/multilingual-e5-large-quantized",
+ "jncraton/gte-tiny-ct2-int8",
+ "Research2NLP/electrical_stella",
+ "Intel/bge-base-en-v1.5-sts-int8-static",
+ "Intel/bge-base-en-v1.5-sts-int8-dynamic",
+ "Intel/bge-base-en-v1.5-sst2",
+ "Intel/bge-base-en-v1.5-sst2-int8-static",
+ "Intel/bge-base-en-v1.5-sst2-int8-dynamic",
+ "Intel/bge-small-en-v1.5-sst2",
+ "Intel/bge-small-en-v1.5-sst2-int8-dynamic",
+ "Intel/bge-small-en-v1.5-sst2-int8-static",
+ "binqiangliu/EmbeddingModlebgelargeENv1.5",
+ "DecisionOptimizationSystem/DeepFeatEmbeddingLargeContext",
+ "woody72/multilingual-e5-base",
+ "Severian/embed",
+ "Frazic/udever-bloom-3b-sentence",
+ "jamesgpt1/zzz",
+ "karrar-alwaili/UAE-Large-V1",
+ "odunola/UAE-Large-VI",
+ "shubham-bgi/UAE-Large",
+ "retrainai/instructor-xl",
+ "weakit-v/bge-base-en-v1.5-onnx",
+ "ieasybooks/multilingual-e5-large-onnx",
+ "gizmo-ai/Cohere-embed-multilingual-v3.0",
+ "jingyeom/korean_embedding_model",
+ "barisaydin/text2vec-base-multilingual",
+ "mlx-community/multilingual-e5-large-mlx",
+ "mlx-community/multilingual-e5-base-mlx",
+ "mlx-community/multilingual-e5-small-mlx",
+ "maiyad/multilingual-e5-small",
+ "khoa-klaytn/bge-base-en-v1.5-angle",
+ "khoa-klaytn/bge-small-en-v1.5-angle",
+ "mixamrepijey/instructor-small",
+ "mixamrepijey/instructor-models",
+ "lsf1000/bge-evaluation", # Empty
+ "giulio98/placeholder", # Empty
+ "Severian/nomic", # Copy
+ "atian-chapters/Chapters-SFR-Embedding-Mistral", # Copy
+ "rlsChapters/Chapters-SFR-Embedding-Mistral", # Copy
+ "TitanML/jina-v2-base-en-embed", # Copy
+ "MaziyarPanahi/GritLM-8x7B-GGUF", # GGUF variant
+ "Geolumina/instructor-xl", # Duplicate
+ "krilecy/e5-mistral-7b-instruct",
+ "beademiguelperez/sentence-transformers-multilingual-e5-small",
+ "arcdev/SFR-Embedding-Mistral",
+ "arcdev/e5-mistral-7b-instruct",
+ "Koat/gte-tiny",
+ "SmartComponents/bge-micro-v2",
}
+def add_lang(examples):
+ if not(examples["eval_language"]):
+ examples["mteb_dataset_name_with_lang"] = examples["mteb_dataset_name"]
+ else:
+ examples["mteb_dataset_name_with_lang"] = examples["mteb_dataset_name"] + f' ({examples["eval_language"]})'
+ return examples
+
+def norm(names): return set([name.split(" ")[0] for name in names])
+
+def add_task(examples):
+ # Could be added to the dataset loading script instead
+ if examples["mteb_dataset_name"] in norm(TASK_LIST_CLASSIFICATION + TASK_LIST_CLASSIFICATION_DA + TASK_LIST_CLASSIFICATION_FR + TASK_LIST_CLASSIFICATION_NB + TASK_LIST_CLASSIFICATION_PL + TASK_LIST_CLASSIFICATION_SV + TASK_LIST_CLASSIFICATION_ZH):
+ examples["mteb_task"] = "Classification"
+ elif examples["mteb_dataset_name"] in norm(TASK_LIST_CLUSTERING + TASK_LIST_CLUSTERING_DE + TASK_LIST_CLUSTERING_FR + TASK_LIST_CLUSTERING_PL + TASK_LIST_CLUSTERING_ZH):
+ examples["mteb_task"] = "Clustering"
+ elif examples["mteb_dataset_name"] in norm(TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_PAIR_CLASSIFICATION_FR + TASK_LIST_PAIR_CLASSIFICATION_PL + TASK_LIST_PAIR_CLASSIFICATION_ZH):
+ examples["mteb_task"] = "PairClassification"
+ elif examples["mteb_dataset_name"] in norm(TASK_LIST_RERANKING + TASK_LIST_RERANKING_FR + TASK_LIST_RERANKING_ZH):
+ examples["mteb_task"] = "Reranking"
+ elif examples["mteb_dataset_name"] in norm(TASK_LIST_RETRIEVAL_NORM + TASK_LIST_RETRIEVAL_FR + TASK_LIST_RETRIEVAL_PL + TASK_LIST_RETRIEVAL_ZH + TASK_LIST_RETRIEVAL_LAW):
+ examples["mteb_task"] = "Retrieval"
+ elif examples["mteb_dataset_name"] in norm(TASK_LIST_STS + TASK_LIST_STS_FR + TASK_LIST_STS_PL + TASK_LIST_STS_ZH):
+ examples["mteb_task"] = "STS"
+ elif examples["mteb_dataset_name"] in norm(TASK_LIST_SUMMARIZATION + TASK_LIST_SUMMARIZATION_FR):
+ examples["mteb_task"] = "Summarization"
+ elif examples["mteb_dataset_name"] in norm(TASK_LIST_BITEXT_MINING + TASK_LIST_BITEXT_MINING_DA):
+ examples["mteb_task"] = "BitextMining"
+ else:
+ print("WARNING: Task not found for dataset", examples["mteb_dataset_name"])
+ examples["mteb_task"] = "Unknown"
+ return examples
+
+if os.path.exists("EXTERNAL_MODEL_RESULTS.json"):
+ with open("EXTERNAL_MODEL_RESULTS.json") as f:
+ EXTERNAL_MODEL_RESULTS = json.load(f)
+ # Update with models not contained
+ models_to_run = []
+ for model in EXTERNAL_MODELS:
+ if model not in EXTERNAL_MODEL_RESULTS:
+ models_to_run.append(model)
+ EXTERNAL_MODEL_RESULTS[model] = {k: {v: []} for k, v in TASK_TO_METRIC.items()}
+else:
+ EXTERNAL_MODEL_RESULTS = {model: {k: {v: []} for k, v in TASK_TO_METRIC.items()} for model in EXTERNAL_MODELS}
+ models_to_run = EXTERNAL_MODELS
+
+pbar = tqdm(models_to_run, desc="Fetching external model results")
+for model in pbar:
+ pbar.set_description(f"Fetching external model results for {model!r}")
+ ds = load_dataset("mteb/results", model, trust_remote_code=True)
+ # For local debugging:
+ #, download_mode='force_redownload', verification_mode="no_checks")
+ ds = ds.map(add_lang)
+ ds = ds.map(add_task)
+ base_dict = {"Model": make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, "https://huggingface.co/spaces/mteb/leaderboard"))}
+ # For now only one metric per task - Could add more metrics lateron
+ for task, metric in TASK_TO_METRIC.items():
+ ds_dict = ds.filter(lambda x: (x["mteb_task"] == task) and (x["metric"] == metric))["test"].to_dict()
+ ds_dict = {k: round(v, 2) for k, v in zip(ds_dict["mteb_dataset_name_with_lang"], ds_dict["score"])}
+ EXTERNAL_MODEL_RESULTS[model][task][metric].append({**base_dict, **ds_dict})
+
+# Save & cache EXTERNAL_MODEL_RESULTS
+with open("EXTERNAL_MODEL_RESULTS.json", "w") as f:
+ json.dump(EXTERNAL_MODEL_RESULTS, f)
+
+def get_dim_seq_size(model):
+ filenames = [sib.rfilename for sib in model.siblings]
+ dim, seq = "", ""
+ for filename in filenames:
+ if re.match("\d+_Pooling/config.json", filename):
+ st_config_path = hf_hub_download(model.modelId, filename=filename)
+ dim = json.load(open(st_config_path)).get("word_embedding_dimension", "")
+ break
+ for filename in filenames:
+ if re.match("\d+_Dense/config.json", filename):
+ st_config_path = hf_hub_download(model.modelId, filename=filename)
+ dim = json.load(open(st_config_path)).get("out_features", dim)
+ if "config.json" in filenames:
+ config_path = hf_hub_download(model.modelId, filename="config.json")
+ config = json.load(open(config_path))
+ if not dim:
+ dim = config.get("hidden_dim", config.get("hidden_size", config.get("d_model", "")))
+ seq = config.get("n_positions", config.get("max_position_embeddings", config.get("n_ctx", config.get("seq_length", ""))))
+ # Get model file size without downloading. Parameters in million parameters and memory in GB
+ parameters, memory = get_model_parameters_memory(model)
+ return dim, seq, parameters, memory
+
def make_datasets_clickable(df):
"""Does not work"""
if "BornholmBitextMining" in df.columns:
@@ -43,6 +1266,405 @@ def make_datasets_clickable(df):
columns={f'BornholmBitextMining': 'BornholmBitextMining',})
return df
+def add_rank(df):
+ cols_to_rank = [col for col in df.columns if col not in ["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Embedding Dimensions", "Max Tokens"]]
+ if len(cols_to_rank) == 1:
+ df.sort_values(cols_to_rank[0], ascending=False, inplace=True)
+ else:
+ df.insert(len(df.columns) - len(cols_to_rank), "Average", df[cols_to_rank].mean(axis=1, skipna=False))
+ df.sort_values("Average", ascending=False, inplace=True)
+ df.insert(0, "Rank", list(range(1, len(df) + 1)))
+ df = df.round(2)
+ # Fill NaN after averaging
+ df.fillna("", inplace=True)
+ return df
+
+def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_emb_dim=True, task_to_metric=TASK_TO_METRIC, rank=True):
+ api = HfApi()
+ models = api.list_models(filter="mteb")
+ # Initialize list to models that we cannot fetch metadata from
+ df_list = []
+ for model in EXTERNAL_MODEL_RESULTS:
+ results_list = [res for task in tasks for res in EXTERNAL_MODEL_RESULTS[model][task][task_to_metric[task]]]
+ if len(datasets) > 0:
+ res = {k: v for d in results_list for k, v in d.items() if (k == "Model") or any([x in k for x in datasets])}
+ elif langs:
+ # Would be cleaner to rely on an extra language column instead
+ langs_format = [f"({lang})" for lang in langs]
+ res = {k: v for d in results_list for k, v in d.items() if any([k.split(" ")[-1] in (k, x) for x in langs_format])}
+ else:
+ res = {k: v for d in results_list for k, v in d.items()}
+ # Model & at least one result
+ if len(res) > 1:
+ if add_emb_dim:
+ res["Model Size (Million Parameters)"] = EXTERNAL_MODEL_TO_SIZE.get(model, "")
+ res["Memory Usage (GB, fp32)"] = round(res["Model Size (Million Parameters)"] * 1e6 * 4 / 1024**3, 2) if res["Model Size (Million Parameters)"] != "" else ""
+ res["Embedding Dimensions"] = EXTERNAL_MODEL_TO_DIM.get(model, "")
+ res["Max Tokens"] = EXTERNAL_MODEL_TO_SEQLEN.get(model, "")
+ df_list.append(res)
+
+ for model in models:
+ if model.modelId in MODELS_TO_SKIP: continue
+ print("MODEL", model)
+ readme_path = hf_hub_download(model.modelId, filename="README.md")
+ meta = metadata_load(readme_path)
+ if "model-index" not in meta:
+ continue
+ # meta['model-index'][0]["results"] is list of elements like:
+ # {
+ # "task": {"type": "Classification"},
+ # "dataset": {
+ # "type": "mteb/amazon_massive_intent",
+ # "name": "MTEB MassiveIntentClassification (nb)",
+ # "config": "nb",
+ # "split": "test",
+ # },
+ # "metrics": [
+ # {"type": "accuracy", "value": 39.81506388702084},
+ # {"type": "f1", "value": 38.809586587791664},
+ # ],
+ # },
+ # Use "get" instead of dict indexing to skip incompat metadata instead of erroring out
+ if len(datasets) > 0:
+ task_results = [sub_res for sub_res in meta["model-index"][0]["results"] if (sub_res.get("task", {}).get("type", "") in tasks) and any([x in sub_res.get("dataset", {}).get("name", "") for x in datasets])]
+ elif langs:
+ task_results = [sub_res for sub_res in meta["model-index"][0]["results"] if (sub_res.get("task", {}).get("type", "") in tasks) and (sub_res.get("dataset", {}).get("config", "default") in ("default", *langs))]
+ else:
+ task_results = [sub_res for sub_res in meta["model-index"][0]["results"] if (sub_res.get("task", {}).get("type", "") in tasks)]
+ out = [{res["dataset"]["name"].replace("MTEB ", ""): [round(score["value"], 2) for score in res["metrics"] if score["type"] == task_to_metric.get(res["task"]["type"])][0]} for res in task_results]
+ out = {k: v for d in out for k, v in d.items()}
+ out["Model"] = make_clickable_model(model.modelId)
+ # Model & at least one result
+ if len(out) > 1:
+ if add_emb_dim:
+ try:
+ # Fails on gated repos, so we only include scores for them
+ out["Embedding Dimensions"], out["Max Tokens"], out["Model Size (Million Parameters)"], out["Memory Usage (GB, fp32)"] = get_dim_seq_size(model)
+ except:
+ pass
+ df_list.append(out)
+ if model.library_name == "sentence-transformers" or "sentence-transformers" in model.tags or "modules.json" in {file.rfilename for file in model.siblings}:
+ SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS.add(out["Model"])
+ df = pd.DataFrame(df_list)
+ # If there are any models that are the same, merge them
+ # E.g. if out["Model"] has the same value in two places, merge & take whichever one is not NaN else just take the first one
+ df = df.groupby("Model", as_index=False).first()
+ # Put 'Model' column first
+ cols = sorted(list(df.columns))
+ cols.insert(0, cols.pop(cols.index("Model")))
+ df = df[cols]
+ if rank:
+ df = add_rank(df)
+ if fillna:
+ df.fillna("", inplace=True)
+ return df
+
+def get_mteb_average():
+ global DATA_OVERALL, DATA_CLASSIFICATION_EN, DATA_CLUSTERING, DATA_PAIR_CLASSIFICATION, DATA_RERANKING, DATA_RETRIEVAL, DATA_STS_EN, DATA_SUMMARIZATION
+ DATA_OVERALL = get_mteb_data(
+ tasks=[
+ "Classification",
+ "Clustering",
+ "PairClassification",
+ "Reranking",
+ "Retrieval",
+ "STS",
+ "Summarization",
+ ],
+ datasets=TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS + TASK_LIST_SUMMARIZATION,
+ fillna=False,
+ add_emb_dim=True,
+ rank=False,
+ )
+ # Debugging:
+ # DATA_OVERALL.to_csv("overall.csv")
+
+ DATA_OVERALL.insert(1, f"Average ({len(TASK_LIST_EN)} datasets)", DATA_OVERALL[TASK_LIST_EN].mean(axis=1, skipna=False))
+ DATA_OVERALL.insert(2, f"Classification Average ({len(TASK_LIST_CLASSIFICATION)} datasets)", DATA_OVERALL[TASK_LIST_CLASSIFICATION].mean(axis=1, skipna=False))
+ DATA_OVERALL.insert(3, f"Clustering Average ({len(TASK_LIST_CLUSTERING)} datasets)", DATA_OVERALL[TASK_LIST_CLUSTERING].mean(axis=1, skipna=False))
+ DATA_OVERALL.insert(4, f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION)} datasets)", DATA_OVERALL[TASK_LIST_PAIR_CLASSIFICATION].mean(axis=1, skipna=False))
+ DATA_OVERALL.insert(5, f"Reranking Average ({len(TASK_LIST_RERANKING)} datasets)", DATA_OVERALL[TASK_LIST_RERANKING].mean(axis=1, skipna=False))
+ DATA_OVERALL.insert(6, f"Retrieval Average ({len(TASK_LIST_RETRIEVAL)} datasets)", DATA_OVERALL[TASK_LIST_RETRIEVAL].mean(axis=1, skipna=False))
+ DATA_OVERALL.insert(7, f"STS Average ({len(TASK_LIST_STS)} datasets)", DATA_OVERALL[TASK_LIST_STS].mean(axis=1, skipna=False))
+ DATA_OVERALL.insert(8, f"Summarization Average ({len(TASK_LIST_SUMMARIZATION)} dataset)", DATA_OVERALL[TASK_LIST_SUMMARIZATION].mean(axis=1, skipna=False))
+ DATA_OVERALL.sort_values(f"Average ({len(TASK_LIST_EN)} datasets)", ascending=False, inplace=True)
+ # Start ranking from 1
+ DATA_OVERALL.insert(0, "Rank", list(range(1, len(DATA_OVERALL) + 1)))
+
+ DATA_OVERALL = DATA_OVERALL.round(2)
+
+ DATA_CLASSIFICATION_EN = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_CLASSIFICATION])
+ # Only keep rows with at least one score in addition to the "Model" & rank column
+ DATA_CLASSIFICATION_EN = DATA_CLASSIFICATION_EN[DATA_CLASSIFICATION_EN.iloc[:, 4:].ne("").any(axis=1)]
+
+ DATA_CLUSTERING = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_CLUSTERING])
+ DATA_CLUSTERING = DATA_CLUSTERING[DATA_CLUSTERING.iloc[:, 4:].ne("").any(axis=1)]
+
+ DATA_PAIR_CLASSIFICATION = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_PAIR_CLASSIFICATION])
+ DATA_PAIR_CLASSIFICATION = DATA_PAIR_CLASSIFICATION[DATA_PAIR_CLASSIFICATION.iloc[:, 4:].ne("").any(axis=1)]
+
+ DATA_RERANKING = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_RERANKING])
+ DATA_RERANKING = DATA_RERANKING[DATA_RERANKING.iloc[:, 4:].ne("").any(axis=1)]
+
+ DATA_RETRIEVAL = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_RETRIEVAL])
+ DATA_RETRIEVAL = DATA_RETRIEVAL[DATA_RETRIEVAL.iloc[:, 4:].ne("").any(axis=1)]
+
+ DATA_STS_EN = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_STS])
+ DATA_STS_EN = DATA_STS_EN[DATA_STS_EN.iloc[:, 4:].ne("").any(axis=1)]
+
+ DATA_SUMMARIZATION = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_SUMMARIZATION])
+ DATA_SUMMARIZATION = DATA_SUMMARIZATION[DATA_SUMMARIZATION.iloc[:, 1:].ne("").any(axis=1)]
+
+ # Fill NaN after averaging
+ DATA_OVERALL.fillna("", inplace=True)
+
+ DATA_OVERALL = DATA_OVERALL[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Embedding Dimensions", "Max Tokens", f"Average ({len(TASK_LIST_EN)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION)} datasets)", f"Reranking Average ({len(TASK_LIST_RERANKING)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL)} datasets)", f"STS Average ({len(TASK_LIST_STS)} datasets)", f"Summarization Average ({len(TASK_LIST_SUMMARIZATION)} dataset)"]]
+ DATA_OVERALL = DATA_OVERALL[DATA_OVERALL.iloc[:, 5:].ne("").any(axis=1)]
+
+ return DATA_OVERALL
+
+def get_mteb_average_zh():
+ global DATA_OVERALL_ZH, DATA_CLASSIFICATION_ZH, DATA_CLUSTERING_ZH, DATA_PAIR_CLASSIFICATION_ZH, DATA_RERANKING_ZH, DATA_RETRIEVAL_ZH, DATA_STS_ZH
+ DATA_OVERALL_ZH = get_mteb_data(
+ tasks=[
+ "Classification",
+ "Clustering",
+ "PairClassification",
+ "Reranking",
+ "Retrieval",
+ "STS",
+ ],
+ datasets=TASK_LIST_CLASSIFICATION_ZH + TASK_LIST_CLUSTERING_ZH + TASK_LIST_PAIR_CLASSIFICATION_ZH + TASK_LIST_RERANKING_ZH + TASK_LIST_RETRIEVAL_ZH + TASK_LIST_STS_ZH,
+ fillna=False,
+ add_emb_dim=True,
+ rank=False,
+ )
+ # Debugging:
+ # DATA_OVERALL_ZH.to_csv("overall.csv")
+
+ DATA_OVERALL_ZH.insert(1, f"Average ({len(TASK_LIST_ZH)} datasets)", DATA_OVERALL_ZH[TASK_LIST_ZH].mean(axis=1, skipna=False))
+ DATA_OVERALL_ZH.insert(2, f"Classification Average ({len(TASK_LIST_CLASSIFICATION_ZH)} datasets)", DATA_OVERALL_ZH[TASK_LIST_CLASSIFICATION_ZH].mean(axis=1, skipna=False))
+ DATA_OVERALL_ZH.insert(3, f"Clustering Average ({len(TASK_LIST_CLUSTERING_ZH)} datasets)", DATA_OVERALL_ZH[TASK_LIST_CLUSTERING_ZH].mean(axis=1, skipna=False))
+ DATA_OVERALL_ZH.insert(4, f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_ZH)} datasets)", DATA_OVERALL_ZH[TASK_LIST_PAIR_CLASSIFICATION_ZH].mean(axis=1, skipna=False))
+ DATA_OVERALL_ZH.insert(5, f"Reranking Average ({len(TASK_LIST_RERANKING_ZH)} datasets)", DATA_OVERALL_ZH[TASK_LIST_RERANKING_ZH].mean(axis=1, skipna=False))
+ DATA_OVERALL_ZH.insert(6, f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_ZH)} datasets)", DATA_OVERALL_ZH[TASK_LIST_RETRIEVAL_ZH].mean(axis=1, skipna=False))
+ DATA_OVERALL_ZH.insert(7, f"STS Average ({len(TASK_LIST_STS_ZH)} datasets)", DATA_OVERALL_ZH[TASK_LIST_STS_ZH].mean(axis=1, skipna=False))
+ DATA_OVERALL_ZH.sort_values(f"Average ({len(TASK_LIST_ZH)} datasets)", ascending=False, inplace=True)
+ # Start ranking from 1
+ DATA_OVERALL_ZH.insert(0, "Rank", list(range(1, len(DATA_OVERALL_ZH) + 1)))
+
+ DATA_OVERALL_ZH = DATA_OVERALL_ZH.round(2)
+
+ DATA_CLASSIFICATION_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_CLASSIFICATION_ZH])
+ # Only keep rows with at least one score in addition to the "Model" & rank column
+ DATA_CLASSIFICATION_ZH = DATA_CLASSIFICATION_ZH[DATA_CLASSIFICATION_ZH.iloc[:, 4:].ne("").any(axis=1)]
+
+ DATA_CLUSTERING_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_CLUSTERING_ZH])
+ DATA_CLUSTERING_ZH = DATA_CLUSTERING_ZH[DATA_CLUSTERING_ZH.iloc[:, 4:].ne("").any(axis=1)]
+
+ DATA_PAIR_CLASSIFICATION_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_PAIR_CLASSIFICATION_ZH])
+ DATA_PAIR_CLASSIFICATION_ZH = DATA_PAIR_CLASSIFICATION_ZH[DATA_PAIR_CLASSIFICATION_ZH.iloc[:, 4:].ne("").any(axis=1)]
+
+ DATA_RERANKING_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_RERANKING_ZH])
+ DATA_RERANKING_ZH = DATA_RERANKING_ZH[DATA_RERANKING_ZH.iloc[:, 4:].ne("").any(axis=1)]
+
+ DATA_RETRIEVAL_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_RETRIEVAL_ZH])
+ DATA_RETRIEVAL_ZH = DATA_RETRIEVAL_ZH[DATA_RETRIEVAL_ZH.iloc[:, 4:].ne("").any(axis=1)]
+
+ DATA_STS_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_STS_ZH])
+ DATA_STS_ZH = DATA_STS_ZH[DATA_STS_ZH.iloc[:, 4:].ne("").any(axis=1)]
+
+ # Fill NaN after averaging
+ DATA_OVERALL_ZH.fillna("", inplace=True)
+
+ DATA_OVERALL_ZH = DATA_OVERALL_ZH[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Embedding Dimensions", "Max Tokens", f"Average ({len(TASK_LIST_ZH)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION_ZH)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING_ZH)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_ZH)} datasets)", f"Reranking Average ({len(TASK_LIST_RERANKING_ZH)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_ZH)} datasets)", f"STS Average ({len(TASK_LIST_STS_ZH)} datasets)"]]
+ DATA_OVERALL_ZH = DATA_OVERALL_ZH[DATA_OVERALL_ZH.iloc[:, 5:].ne("").any(axis=1)]
+
+ return DATA_OVERALL_ZH
+
+def get_mteb_average_fr():
+ global DATA_OVERALL_FR, DATA_CLASSIFICATION_FR, DATA_CLUSTERING_FR, DATA_PAIR_CLASSIFICATION_FR, DATA_RERANKING_FR, DATA_RETRIEVAL_FR, DATA_STS_FR, DATA_SUMMARIZATION_FR
+ DATA_OVERALL_FR = get_mteb_data(
+ tasks=[
+ "Classification",
+ "Clustering",
+ "PairClassification",
+ "Reranking",
+ "Retrieval",
+ "STS",
+ "Summarization"
+ ],
+ datasets=TASK_LIST_CLASSIFICATION_FR + TASK_LIST_CLUSTERING_FR + TASK_LIST_PAIR_CLASSIFICATION_FR + TASK_LIST_RERANKING_FR + TASK_LIST_RETRIEVAL_FR + TASK_LIST_STS_FR + TASK_LIST_SUMMARIZATION_FR,
+ fillna=False,
+ add_emb_dim=True,
+ rank=False,
+ )
+ # Debugging:
+ # DATA_OVERALL_FR.to_csv("overall.csv")
+
+ DATA_OVERALL_FR.insert(1, f"Average ({len(TASK_LIST_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_FR].mean(axis=1, skipna=False))
+ DATA_OVERALL_FR.insert(2, f"Classification Average ({len(TASK_LIST_CLASSIFICATION_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_CLASSIFICATION_FR].mean(axis=1, skipna=False))
+ DATA_OVERALL_FR.insert(3, f"Clustering Average ({len(TASK_LIST_CLUSTERING_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_CLUSTERING_FR].mean(axis=1, skipna=False))
+ DATA_OVERALL_FR.insert(4, f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_PAIR_CLASSIFICATION_FR].mean(axis=1, skipna=False))
+ DATA_OVERALL_FR.insert(5, f"Reranking Average ({len(TASK_LIST_RERANKING_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_RERANKING_FR].mean(axis=1, skipna=False))
+ DATA_OVERALL_FR.insert(6, f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_RETRIEVAL_FR].mean(axis=1, skipna=False))
+ DATA_OVERALL_FR.insert(7, f"STS Average ({len(TASK_LIST_STS_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_STS_FR].mean(axis=1, skipna=False))
+ DATA_OVERALL_FR.insert(8, f"Summarization Average ({len(TASK_LIST_SUMMARIZATION_FR)} dataset)", DATA_OVERALL_FR[TASK_LIST_SUMMARIZATION_FR].mean(axis=1, skipna=False))
+ DATA_OVERALL_FR.sort_values(f"Average ({len(TASK_LIST_FR)} datasets)", ascending=False, inplace=True)
+ # Start ranking from 1
+ DATA_OVERALL_FR.insert(0, "Rank", list(range(1, len(DATA_OVERALL_FR) + 1)))
+ DATA_OVERALL_FR = DATA_OVERALL_FR.round(2)
+
+ DATA_CLASSIFICATION_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_CLASSIFICATION_FR])
+ DATA_CLASSIFICATION_FR = DATA_CLASSIFICATION_FR[DATA_CLASSIFICATION_FR.iloc[:, 4:].ne("").any(axis=1)]
+
+ DATA_CLUSTERING_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_CLUSTERING_FR])
+ DATA_CLUSTERING_FR = DATA_CLUSTERING_FR[DATA_CLUSTERING_FR.iloc[:, 4:].ne("").any(axis=1)]
+
+ DATA_PAIR_CLASSIFICATION_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_PAIR_CLASSIFICATION_FR])
+ DATA_PAIR_CLASSIFICATION_FR = DATA_PAIR_CLASSIFICATION_FR[DATA_PAIR_CLASSIFICATION_FR.iloc[:, 4:].ne("").any(axis=1)]
+
+ DATA_RERANKING_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_RERANKING_FR])
+ DATA_RERANKING_FR = DATA_RERANKING_FR[DATA_RERANKING_FR.iloc[:, 4:].ne("").any(axis=1)]
+
+ DATA_RETRIEVAL_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_RETRIEVAL_FR])
+ DATA_RETRIEVAL_FR = DATA_RETRIEVAL_FR[DATA_RETRIEVAL_FR.iloc[:, 4:].ne("").any(axis=1)]
+
+ DATA_STS_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_STS_FR])
+ DATA_STS_FR = DATA_STS_FR[DATA_STS_FR.iloc[:, 4:].ne("").any(axis=1)]
+
+ DATA_SUMMARIZATION_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_SUMMARIZATION_FR])
+ DATA_SUMMARIZATION_FR = DATA_SUMMARIZATION_FR[DATA_SUMMARIZATION_FR.iloc[:, 1:].ne("").any(axis=1)]
+
+ # Fill NaN after averaging
+ DATA_OVERALL_FR.fillna("", inplace=True)
+
+ DATA_OVERALL_FR = DATA_OVERALL_FR[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Embedding Dimensions", "Max Tokens", f"Average ({len(TASK_LIST_FR)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION_FR)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING_FR)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_FR)} datasets)", f"Reranking Average ({len(TASK_LIST_RERANKING_FR)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_FR)} datasets)", f"STS Average ({len(TASK_LIST_STS_FR)} datasets)", f"Summarization Average ({len(TASK_LIST_SUMMARIZATION_FR)} dataset)"]]
+ DATA_OVERALL_FR = DATA_OVERALL_FR[DATA_OVERALL_FR.iloc[:, 5:].ne("").any(axis=1)]
+
+ return DATA_OVERALL_FR
+
+def get_mteb_average_pl():
+ global DATA_OVERALL_PL, DATA_CLASSIFICATION_PL, DATA_CLUSTERING_PL, DATA_PAIR_CLASSIFICATION_PL, DATA_RETRIEVAL_PL, DATA_STS_PL
+ DATA_OVERALL_PL = get_mteb_data(
+ tasks=[
+ "Classification",
+ "Clustering",
+ "PairClassification",
+ "Retrieval",
+ "STS",
+ ],
+ datasets=TASK_LIST_CLASSIFICATION_PL + TASK_LIST_CLUSTERING_PL + TASK_LIST_PAIR_CLASSIFICATION_PL + TASK_LIST_RETRIEVAL_PL + TASK_LIST_STS_PL,
+ fillna=False,
+ add_emb_dim=True,
+ rank=False,
+ )
+ # Debugging:
+ # DATA_OVERALL_PL.to_csv("overall.csv")
+
+ DATA_OVERALL_PL.insert(1, f"Average ({len(TASK_LIST_PL)} datasets)", DATA_OVERALL_PL[TASK_LIST_PL].mean(axis=1, skipna=False))
+ DATA_OVERALL_PL.insert(2, f"Classification Average ({len(TASK_LIST_CLASSIFICATION_PL)} datasets)", DATA_OVERALL_PL[TASK_LIST_CLASSIFICATION_PL].mean(axis=1, skipna=False))
+ DATA_OVERALL_PL.insert(3, f"Clustering Average ({len(TASK_LIST_CLUSTERING_PL)} datasets)", DATA_OVERALL_PL[TASK_LIST_CLUSTERING_PL].mean(axis=1, skipna=False))
+ DATA_OVERALL_PL.insert(4, f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_PL)} datasets)", DATA_OVERALL_PL[TASK_LIST_PAIR_CLASSIFICATION_PL].mean(axis=1, skipna=False))
+ DATA_OVERALL_PL.insert(5, f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_PL)} datasets)", DATA_OVERALL_PL[TASK_LIST_RETRIEVAL_PL].mean(axis=1, skipna=False))
+ DATA_OVERALL_PL.insert(6, f"STS Average ({len(TASK_LIST_STS_PL)} datasets)", DATA_OVERALL_PL[TASK_LIST_STS_PL].mean(axis=1, skipna=False))
+ DATA_OVERALL_PL.sort_values(f"Average ({len(TASK_LIST_PL)} datasets)", ascending=False, inplace=True)
+ # Start ranking from 1
+ DATA_OVERALL_PL.insert(0, "Rank", list(range(1, len(DATA_OVERALL_PL) + 1)))
+
+ DATA_OVERALL_PL = DATA_OVERALL_PL.round(2)
+
+ DATA_CLASSIFICATION_PL = add_rank(DATA_OVERALL_PL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_CLASSIFICATION_PL])
+ # Only keep rows with at least one score in addition to the "Model" & rank column
+ DATA_CLASSIFICATION_PL = DATA_CLASSIFICATION_PL[DATA_CLASSIFICATION_PL.iloc[:, 4:].ne("").any(axis=1)]
+
+ DATA_CLUSTERING_PL = add_rank(DATA_OVERALL_PL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_CLUSTERING_PL])
+ DATA_CLUSTERING_PL = DATA_CLUSTERING_PL[DATA_CLUSTERING_PL.iloc[:, 4:].ne("").any(axis=1)]
+
+ DATA_PAIR_CLASSIFICATION_PL = add_rank(DATA_OVERALL_PL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_PAIR_CLASSIFICATION_PL])
+ DATA_PAIR_CLASSIFICATION_PL = DATA_PAIR_CLASSIFICATION_PL[DATA_PAIR_CLASSIFICATION_PL.iloc[:, 4:].ne("").any(axis=1)]
+
+ DATA_RETRIEVAL_PL = add_rank(DATA_OVERALL_PL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_RETRIEVAL_PL])
+ DATA_RETRIEVAL_PL = DATA_RETRIEVAL_PL[DATA_RETRIEVAL_PL.iloc[:, 4:].ne("").any(axis=1)]
+
+ DATA_STS_PL = add_rank(DATA_OVERALL_PL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_STS_PL])
+ DATA_STS_PL = DATA_STS_PL[DATA_STS_PL.iloc[:, 4:].ne("").any(axis=1)]
+
+ # Fill NaN after averaging
+ DATA_OVERALL_PL.fillna("", inplace=True)
+
+ DATA_OVERALL_PL = DATA_OVERALL_PL[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Embedding Dimensions", "Max Tokens", f"Average ({len(TASK_LIST_PL)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION_PL)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING_PL)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_PL)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_PL)} datasets)", f"STS Average ({len(TASK_LIST_STS_PL)} datasets)"]]
+ DATA_OVERALL_PL = DATA_OVERALL_PL[DATA_OVERALL_PL.iloc[:, 5:].ne("").any(axis=1)]
+
+ return DATA_OVERALL_PL
+
+get_mteb_average()
+get_mteb_average_fr()
+get_mteb_average_pl()
+get_mteb_average_zh()
+DATA_BITEXT_MINING = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_BITEXT_MINING]
+DATA_BITEXT_MINING_DA = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING_DA)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_BITEXT_MINING_DA]
+DATA_CLASSIFICATION_DA = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_DA)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_CLASSIFICATION_DA]
+DATA_CLASSIFICATION_NB = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_NB)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_CLASSIFICATION_NB]
+DATA_CLASSIFICATION_SV = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_SV)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_CLASSIFICATION_SV]
+DATA_CLASSIFICATION_OTHER = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_OTHER)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_CLASSIFICATION_OTHER]
+DATA_CLUSTERING_DE = get_mteb_data(["Clustering"], [], TASK_LIST_CLUSTERING_DE)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_CLUSTERING_DE]
+DATA_STS_OTHER = get_mteb_data(["STS"], [], TASK_LIST_STS_OTHER)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_STS_OTHER]
+DATA_RETRIEVAL_LAW = get_mteb_data(["Retrieval"], [], TASK_LIST_RETRIEVAL_LAW)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_RETRIEVAL_LAW]
+
+# Exact, add all non-nan integer values for every dataset
+NUM_SCORES = 0
+DATASETS = []
+MODELS = []
+# LANGUAGES = []
+for d in [
+ DATA_BITEXT_MINING,
+ DATA_BITEXT_MINING_DA,
+ DATA_CLASSIFICATION_EN,
+ DATA_CLASSIFICATION_DA,
+ DATA_CLASSIFICATION_FR,
+ DATA_CLASSIFICATION_NB,
+ DATA_CLASSIFICATION_PL,
+ DATA_CLASSIFICATION_SV,
+ DATA_CLASSIFICATION_ZH,
+ DATA_CLASSIFICATION_OTHER,
+ DATA_CLUSTERING,
+ DATA_CLUSTERING_DE,
+ DATA_CLUSTERING_FR,
+ DATA_CLUSTERING_PL,
+ DATA_CLUSTERING_ZH,
+ DATA_PAIR_CLASSIFICATION,
+ DATA_PAIR_CLASSIFICATION_FR,
+ DATA_PAIR_CLASSIFICATION_PL,
+ DATA_PAIR_CLASSIFICATION_ZH,
+ DATA_RERANKING,
+ DATA_RERANKING_FR,
+ DATA_RERANKING_ZH,
+ DATA_RETRIEVAL,
+ DATA_RETRIEVAL_FR,
+ DATA_RETRIEVAL_PL,
+ DATA_RETRIEVAL_ZH,
+ DATA_RETRIEVAL_LAW,
+ DATA_STS_EN,
+ DATA_STS_FR,
+ DATA_STS_PL,
+ DATA_STS_ZH,
+ DATA_STS_OTHER,
+ DATA_SUMMARIZATION,
+ DATA_SUMMARIZATION_FR,
+]:
+ # NUM_SCORES += d.iloc[:, 1:].apply(lambda x: sum([1 for y in x if isinstance(y, float) and not np.isnan(y)]), axis=1).sum()
+ cols_to_ignore = 4 if "Average" in d.columns else 3
+ # Count number of scores including only non-nan floats & excluding the rank column
+ NUM_SCORES += d.iloc[:, cols_to_ignore:].notna().sum().sum()
+ # Exclude rank & model name column (first two); Do not count different language versions as different datasets
+ DATASETS += [i.split(" ")[0] for i in d.columns[cols_to_ignore:]]
+ # LANGUAGES += [i.split(" ")[-1] for i in d.columns[cols_to_ignore:]]
+ MODELS += d["Model"].tolist()
+
+NUM_DATASETS = len(set(DATASETS))
+# NUM_LANGUAGES = len(set(LANGUAGES))
+NUM_MODELS = len(set(MODELS))
# 1. Force headers to wrap
# 2. Force model column (maximum) width
@@ -72,98 +1694,323 @@ Each inner tab can have the following keys:
- language_long: [optional] The long form of the language
- description: The description of the leaderboard
- credits: [optional] The credits for the leaderboard
-- desc: [optional] The description of the leaderboard
- data: The data for the leaderboard
+- refresh: The function to refresh the leaderboard
"""
-# No more refreshing manually, happens daily
-# def get_refresh_function(task_category, task_list):
-# def _refresh():
-# data_task_category = get_mteb_data(tasks=[task_category], datasets=task_list)
-# data_task_category.drop(columns=["Embedding Dimensions", "Max Tokens"], inplace=True)
-# return data_task_category
-# return _refresh
-
-
-# def get_refresh_overall_function(tasks):
-# return lambda: get_mteb_average(tasks)[0]
-
-
-# load in the pre-calculated `all_data_tasks` and `boards_data`
-print(f"Loading pre-calculated data....")
-all_data_tasks = load_results("all_data_tasks")
-boards_data = load_results("boards_data")
-
-#### Caclulate Metadata
-# Exact, add all non-nan integer values for every dataset
-NUM_SCORES = 0
-DATASETS = []
-MODELS = []
-# LANGUAGES = []
-for d in all_data_tasks:
- if isinstance(d, list) and len(d) == 0:
- continue
- # NUM_SCORES += d.iloc[:, 1:].apply(lambda x: sum([1 for y in x if isinstance(y, float) and not np.isnan(y)]), axis=1).sum()
- cols_to_ignore = 4 if "Average" in d.columns else 3
- # Count number of scores including only non-nan floats & excluding the rank column
- NUM_SCORES += d.iloc[:, cols_to_ignore:].notna().sum().sum()
- # Exclude rank & model name column (first two); Do not count different language versions as different datasets
- DATASETS += [i.split(" ")[0] for i in d.columns[cols_to_ignore:]]
- # LANGUAGES += [i.split(" ")[-1] for i in d.columns[cols_to_ignore:]]
- MODELS += d["Model"].tolist()
-
-
-NUM_DATASETS = len(set(DATASETS))
-# NUM_LANGUAGES = len(set(LANGUAGES))
-NUM_MODELS = len(set(MODELS))
+chinese_credits = "[FlagEmbedding](https://github.com/FlagOpen/FlagEmbedding)"
+french_credits = "[Lyon-NLP](https://github.com/Lyon-NLP): [Gabriel Sequeira](https://github.com/GabrielSequeira), [Imene Kerboua](https://github.com/imenelydiaker), [Wissam Siblini](https://github.com/wissam-sib), [Mathieu Ciancone](https://github.com/MathieuCiancone), [Marion Schaeffer](https://github.com/schmarion)"
+danish_credits = "[Kenneth Enevoldsen](https://github.com/KennethEnevoldsen), [scandinavian-embedding-benchmark](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/)"
+norwegian_credits = "[Kenneth Enevoldsen](https://github.com/KennethEnevoldsen), [scandinavian-embedding-benchmark](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/)"
+polish_credits = "[Rafał Poświata](https://github.com/rafalposwiata)"
data = {
- "Overall": {"metric": "Various, refer to task tabs", "data": []}
+ "Overall": {
+ "metric": "Various, refer to task tabs",
+ "data": [
+ {
+ "language": "English",
+ "description": "**Overall MTEB English leaderboard** 🔮",
+ "data": DATA_OVERALL,
+ "refresh": get_mteb_average,
+ },
+ {
+ "language": "Chinese",
+ "data": DATA_OVERALL_ZH,
+ "description": "**Overall MTEB Chinese leaderboard (C-MTEB)** 🔮🇨🇳",
+ "credits": chinese_credits,
+ "refresh": get_mteb_average_zh,
+ },
+ {
+ "language": "French",
+ "data": DATA_OVERALL_FR,
+ "description": "**Overall MTEB French leaderboard (F-MTEB)** 🔮🇫🇷",
+ "credits": french_credits,
+ "refresh": get_mteb_average_fr,
+ },
+ {
+ "language": "Polish",
+ "data": DATA_OVERALL_PL,
+ "description": "**Overall MTEB Polish leaderboard** 🔮🇵🇱",
+ "refresh": get_mteb_average_pl,
+ },
+ ]
+ },
+ "Bitext Mining": {
+ "metric": "[F1](https://huggingface.co/spaces/evaluate-metric/f1)",
+ "data": [
+ {
+ "language": "English-X",
+ "language_long": "117 (Pairs of: English & other language)",
+ "description": "**Bitext Mining English-X Leaderboard** 🎌",
+ "data": DATA_BITEXT_MINING,
+ "refresh": partial(get_mteb_data, tasks=["BitextMining"], datasets=TASK_LIST_BITEXT_MINING),
+ },
+ {
+ "language": "Danish",
+ "language_long": "Danish & Bornholmsk (Danish Dialect)",
+ "description": "**Bitext Mining Danish Leaderboard** 🎌🇩🇰",
+ "credits": danish_credits,
+ "data": DATA_BITEXT_MINING_DA,
+ "refresh": partial(get_mteb_data, tasks=["BitextMining"], datasets=TASK_LIST_BITEXT_MINING_DA),
+ }
+ ]
+ },
+ "Classification": {
+ "metric": "[Accuracy](https://huggingface.co/spaces/evaluate-metric/accuracy)",
+ "data": [
+ {
+ "language": "English",
+ "description": "**Classification English Leaderboard** ❤️",
+ "data": DATA_CLASSIFICATION_EN,
+ "refresh": partial(get_mteb_data, tasks=["Classification"], langs=["en"])
+ },
+ {
+ "language": "Chinese",
+ "description": "**Classification Chinese Leaderboard** 🧡🇨🇳",
+ "credits": chinese_credits,
+ "data": DATA_CLASSIFICATION_ZH,
+ "refresh": partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_ZH)
+ },
+ {
+ "language": "Danish",
+ "description": "**Classification Danish Leaderboard** 🤍🇩🇰",
+ "credits": danish_credits,
+ "data": DATA_CLASSIFICATION_DA,
+ "refresh": partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_DA)
+ },
+ {
+ "language": "French",
+ "description": "**Classification French Leaderboard** 💙🇫🇷",
+ "credits": french_credits,
+ "data": DATA_CLASSIFICATION_FR,
+ "refresh": partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_FR)
+ },
+ {
+ "language": "Norwegian",
+ "language_long": "Norwegian Bokmål",
+ "description": "**Classification Norwegian Leaderboard** 💙🇳🇴",
+ "credits": norwegian_credits,
+ "data": DATA_CLASSIFICATION_NB,
+ "refresh": partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_NB)
+ },
+ {
+ "language": "Polish",
+ "description": "**Classification Polish Leaderboard** 🤍🇵🇱",
+ "credits": polish_credits,
+ "data": DATA_CLASSIFICATION_PL,
+ "refresh": partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_PL)
+ },
+ {
+ "language": "Swedish",
+ "description": "**Classification Swedish Leaderboard** 💛🇸🇪",
+ "credits": norwegian_credits,
+ "data": DATA_CLASSIFICATION_SV,
+ "refresh": partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_SV)
+ },
+ {
+ "language": "Other",
+ "language_long": "47 (Only languages not included in the other tabs)",
+ "description": "**Classification Other Languages Leaderboard** 💜💚💙",
+ "data": DATA_CLASSIFICATION_OTHER,
+ "refresh": partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_OTHER)
+ }
+ ]
+ },
+ "Clustering": {
+ "metric": "Validity Measure (v_measure)",
+ "data": [
+ {
+ "language": "English",
+ "description": "**Clustering Leaderboard** ✨",
+ "data": DATA_CLUSTERING,
+ "refresh": partial(get_mteb_data, tasks=["Clustering"], datasets=TASK_LIST_CLUSTERING)
+ },
+ {
+ "language": "Chinese",
+ "description": "**Clustering Chinese Leaderboard** ✨🇨🇳",
+ "credits": chinese_credits,
+ "data": DATA_CLUSTERING_ZH,
+ "refresh": partial(get_mteb_data, tasks=["Clustering"], datasets=TASK_LIST_CLUSTERING_ZH)
+ },
+ {
+ "language": "French",
+ "description": "**Clustering French Leaderboard** ✨🇫🇷",
+ "credits": french_credits,
+ "data": DATA_CLUSTERING_FR,
+ "refresh": partial(get_mteb_data, tasks=["Clustering"], datasets=TASK_LIST_CLUSTERING_FR)
+ },
+ {
+ "language": "German",
+ "description": "**Clustering German Leaderboard** ✨🇩🇪",
+ "credits": "[Silvan](https://github.com/slvnwhrl)",
+ "data": DATA_CLUSTERING_DE,
+ "refresh": partial(get_mteb_data, tasks=["Clustering"], datasets=TASK_LIST_CLUSTERING_DE)
+ },
+ {
+ "language": "Polish",
+ "description": "**Clustering Polish Leaderboard** ✨🇵🇱",
+ "credits": polish_credits,
+ "data": DATA_CLUSTERING_PL,
+ "refresh": partial(get_mteb_data, tasks=["Clustering"], datasets=TASK_LIST_CLUSTERING_PL)
+ },
+ ]
+ },
+ "Pair Classification": {
+ "metric": "Average Precision based on Cosine Similarities (cos_sim_ap)",
+ "data": [
+ {
+ "language": "English",
+ "description": "**Pair Classification English Leaderboard** 🎭",
+ "data": DATA_PAIR_CLASSIFICATION,
+ "refresh": partial(get_mteb_data, tasks=["PairClassification"], datasets=TASK_LIST_PAIR_CLASSIFICATION)
+ },
+ {
+ "language": "Chinese",
+ "description": "**Pair Classification Chinese Leaderboard** 🎭🇨🇳",
+ "credits": chinese_credits,
+ "data": DATA_PAIR_CLASSIFICATION_ZH,
+ "refresh": partial(get_mteb_data, tasks=["PairClassification"], datasets=TASK_LIST_PAIR_CLASSIFICATION_ZH)
+ },
+ {
+ "language": "French",
+ "description": "**Pair Classification French Leaderboard** 🎭🇫🇷",
+ "credits": french_credits,
+ "data": DATA_PAIR_CLASSIFICATION_FR,
+ "refresh": partial(get_mteb_data, tasks=["PairClassification"], datasets=TASK_LIST_PAIR_CLASSIFICATION_FR)
+ },
+ {
+ "language": "Polish",
+ "description": "**Pair Classification Polish Leaderboard** 🎭🇵🇱",
+ "credits": polish_credits,
+ "data": DATA_PAIR_CLASSIFICATION_PL,
+ "refresh": partial(get_mteb_data, tasks=["PairClassification"], datasets=TASK_LIST_PAIR_CLASSIFICATION_PL)
+ },
+ ]
+ },
+ "Reranking": {
+ "metric": "Mean Average Precision (MAP)",
+ "data": [
+ {
+ "language": "English",
+ "description": "**Reranking English Leaderboard** 🥈",
+ "data": DATA_RERANKING,
+ "refresh": partial(get_mteb_data, tasks=["Reranking"], datasets=TASK_LIST_RERANKING)
+ },
+ {
+ "language": "Chinese",
+ "description": "**Reranking Chinese Leaderboard** 🥈🇨🇳",
+ "credits": chinese_credits,
+ "data": DATA_RERANKING_ZH,
+ "refresh": partial(get_mteb_data, tasks=["Reranking"], datasets=TASK_LIST_RERANKING_ZH)
+ },
+ {
+ "language": "French",
+ "description": "**Reranking French Leaderboard** 🥈🇫🇷",
+ "credits": french_credits,
+ "data": DATA_RERANKING_FR,
+ "refresh": partial(get_mteb_data, tasks=["Reranking"], datasets=TASK_LIST_RERANKING_FR)
+ }
+ ]
+ },
+ "Retrieval": {
+ "metric": "Normalized Discounted Cumulative Gain @ k (ndcg_at_10)",
+ "data": [
+ {
+ "language": "English",
+ "description": "**Retrieval English Leaderboard** 🔎",
+ "data": DATA_RETRIEVAL,
+ "refresh": partial(get_mteb_data, tasks=["Retrieval"], datasets=TASK_LIST_RETRIEVAL)
+ },
+ {
+ "language": "Chinese",
+ "description": "**Retrieval Chinese Leaderboard** 🔎🇨🇳",
+ "credits": chinese_credits,
+ "data": DATA_RETRIEVAL_ZH,
+ "refresh": partial(get_mteb_data, tasks=["Retrieval"], datasets=TASK_LIST_RETRIEVAL_ZH)
+ },
+ {
+ "language": "French",
+ "description": "**Retrieval French Leaderboard** 🔎🇫🇷",
+ "credits": french_credits,
+ "data": DATA_RETRIEVAL_FR,
+ "refresh": partial(get_mteb_data, tasks=["Retrieval"], datasets=TASK_LIST_RETRIEVAL_FR)
+ },
+ {
+ "language": "Law",
+ "language_long": "English, German, Chinese",
+ "description": "**Retrieval Law Leaderboard** 🔎⚖️",
+ "credits": "[Voyage AI](https://www.voyageai.com/)",
+ "data": DATA_RETRIEVAL_LAW,
+ "refresh": partial(get_mteb_data, tasks=["Retrieval"], datasets=TASK_LIST_RETRIEVAL_LAW)
+ },
+ {
+ "language": "Polish",
+ "description": "**Retrieval Polish Leaderboard** 🔎🇵🇱",
+ "credits": polish_credits,
+ "data": DATA_RETRIEVAL_PL,
+ "refresh": partial(get_mteb_data, tasks=["Retrieval"], datasets=TASK_LIST_RETRIEVAL_PL)
+ }
+ ]
+ },
+ "STS": {
+ "metric": "Spearman correlation based on cosine similarity",
+ "data": [
+ {
+ "language": "English",
+ "description": "**STS English Leaderboard** 🤖",
+ "data": DATA_STS_EN,
+ "refresh": partial(get_mteb_data, tasks=["STS"], datasets=TASK_LIST_STS)
+ },
+ {
+ "language": "Chinese",
+ "description": "**STS Chinese Leaderboard** 🤖🇨🇳",
+ "credits": chinese_credits,
+ "data": DATA_STS_ZH,
+ "refresh": partial(get_mteb_data, tasks=["STS"], datasets=TASK_LIST_STS_ZH)
+ },
+ {
+ "language": "French",
+ "description": "**STS French Leaderboard** 🤖🇫🇷",
+ "credits": french_credits,
+ "data": DATA_STS_FR,
+ "refresh": partial(get_mteb_data, tasks=["STS"], datasets=TASK_LIST_STS_FR)
+ },
+ {
+ "language": "Polish",
+ "description": "**STS Polish Leaderboard** 🤖🇵🇱",
+ "credits": polish_credits,
+ "data": DATA_STS_PL,
+ "refresh": partial(get_mteb_data, tasks=["STS"], datasets=TASK_LIST_STS_PL)
+ },
+ {
+ "language": "Other",
+ "language_long": "Arabic, Chinese, Dutch, English, French, German, Italian, Korean, Polish, Russian, Spanish (Only language combos not included in the other tabs)",
+ "description": "**STS Other Leaderboard** 👽",
+ "data": DATA_STS_OTHER,
+ "refresh": partial(get_mteb_data, tasks=["STS"], datasets=TASK_LIST_STS_OTHER)
+ },
+ ]
+ },
+ "Summarization": {
+ "metric": "Spearman correlation based on cosine similarity",
+ "data": [
+ {
+ "language": "English",
+ "description": "**Summarization Leaderboard** 📜",
+ "data": DATA_SUMMARIZATION,
+ "refresh": partial(get_mteb_data, tasks=TASK_LIST_SUMMARIZATION)
+ },
+ {
+ "language": "French",
+ "description": "**Summarization Leaderboard** 📜",
+ "credits": french_credits,
+ "data": DATA_SUMMARIZATION_FR,
+ "refresh": partial(get_mteb_data, tasks=TASK_LIST_SUMMARIZATION_FR)
+ }
+ ]
+ }
}
-for task in TASKS:
- data[task] = {"metric": TASKS_CONFIG[task]["metric_description"], "data": []}
-
-for board, board_config in BOARDS_CONFIG.items():
- init_name = board_config["title"]
- if init_name in PRETTY_NAMES:
- init_name = PRETTY_NAMES[init_name]
- board_pretty_name = f"{init_name} leaderboard"
- acronym = board_config.get("acronym", None)
- board_icon = board_config.get("icon", None)
- if board_icon is None:
- board_icon = ""
- credits = board_config.get("credits", None)
- metric = board_config.get("metric", None)
- desc = board_config.get("desc", None)
-
- if board_config["has_overall"]:
- overall_pretty_name = board_pretty_name
- if acronym is not None:
- overall_pretty_name += f" ({board_config['acronym']})"
- data["Overall"]["data"].append({
- "language": board_config["title"],
- "language_long": board_config["language_long"],
- "description": f"**Overall MTEB {overall_pretty_name}** 🔮{board_icon}",
- "data": boards_data[board]["data_overall"],
- # "refresh": get_refresh_overall_function(board_config["tasks"]),
- "credits": credits,
- "metric": metric,
- "desc": desc,
- })
- for task_category, task_category_list in board_config["tasks"].items():
- task_icon = TASKS_CONFIG[task_category]['icon']
- if "special_icons" in board_config and isinstance(board_config["special_icons"], dict):
- task_icon = board_config["special_icons"].get(task_category, task_icon)
- data[task_category]["data"].append({
- "language": board_config["title"],
- "language_long": board_config["language_long"],
- "description": f"**{task_category} {board_pretty_name}** {task_icon}{board_icon}",
- "data": boards_data[board]["data_tasks"][task_category],
- # "refresh": get_refresh_function(task_category, task_category_list),
- "credits": credits,
- "metric": metric,
- "desc": desc,
- })
dataframes = []
full_dataframes = []
@@ -189,11 +2036,7 @@ function(goalUrlObject) {
def update_url_task(event: gr.SelectData, current_task_language: dict, language_per_task: dict):
current_task_language["task"] = event.target.id
# Either use the cached language for this task or the 1st language
- try:
- current_task_language["language"] = language_per_task.get(event.target.id, event.target.children[1].children[0].id)
- except Exception as e: # is Overall tab, no description
- current_task_language["language"] = language_per_task.get(event.target.id, event.target.children[0].children[0].id)
-
+ current_task_language["language"] = language_per_task.get(event.target.id, event.target.children[0].children[0].id)
return current_task_language, language_per_task
def update_url_language(event: gr.SelectData, current_task_language: dict, language_per_task: dict):
@@ -215,10 +2058,6 @@ MODEL_TYPES = [
"Open",
"Proprietary",
"Sentence Transformers",
- "Cross-Encoders",
- "Bi-Encoders",
- "Uses Instructions",
- "No Instructions",
]
def filter_data(search_query, model_types, model_sizes, *full_dataframes):
@@ -229,7 +2068,7 @@ def filter_data(search_query, model_types, model_sizes, *full_dataframes):
names = df["Model"].map(lambda x: re.match("(.+)", x).group(1))
masks = []
for query in search_query.split(";"):
- masks.append(names.str.lower().str.contains(query.lower()))
+ masks.append(names.str.contains(query))
df = df[reduce(lambda a, b: a | b, masks)]
# Apply the model type filtering
@@ -242,14 +2081,6 @@ def filter_data(search_query, model_types, model_sizes, *full_dataframes):
masks.append(df["Model"].isin(PROPRIETARY_MODELS))
elif model_type == "Sentence Transformers":
masks.append(df["Model"].isin(SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS))
- elif model_type == "Cross-Encoders":
- masks.append(df["Model"].isin(CROSS_ENCODERS))
- elif model_type == "Bi-Encoders":
- masks.append(df["Model"].isin(BI_ENCODERS))
- elif model_type == "Uses Instructions":
- masks.append(df["Model"].isin(INSTRUCT_MODELS))
- elif model_type == "No Instructions":
- masks.append(df["Model"].isin(NOINSTRUCT_MODELS))
if masks:
df = df[reduce(lambda a, b: a | b, masks)]
else:
@@ -261,10 +2092,10 @@ def filter_data(search_query, model_types, model_sizes, *full_dataframes):
sizes = df["Model Size (Million Parameters)"].replace('', 0)
mask = sizes.apply(lambda size: any(numeric_interval.contains(size)))
df = df[mask]
+
output_dataframes.append(df)
return output_dataframes
-
with gr.Blocks(css=css) as block:
# Store the current task and language for updating the URL. This is a bit hacky, but it works
@@ -273,7 +2104,7 @@ with gr.Blocks(css=css) as block:
language_per_task = gr.JSON(value=dict(), visible=False)
gr.Markdown(f"""
- Massive Text Embedding Benchmark (MTEB) Leaderboard. To submit, refer to the MTEB GitHub repository 🤗 Refer to the [MTEB paper](https://arxiv.org/abs/2210.07316) for details on metrics, tasks and models. Also check out [MTEB Arena](https://huggingface.co/spaces/mteb/arena) ⚔️
+ Massive Text Embedding Benchmark (MTEB) Leaderboard. To submit, refer to the MTEB GitHub repository 🤗 Refer to the [MTEB paper](https://arxiv.org/abs/2210.07316) for details on metrics, tasks and models.
""")
with gr.Row():
@@ -286,8 +2117,7 @@ with gr.Blocks(css=css) as block:
choices=MODEL_TYPES,
value=MODEL_TYPES,
interactive=True,
- elem_classes=["filter-checkbox-group"],
- scale=3,
+ elem_classes=["filter-checkbox-group"]
)
filter_model_sizes = gr.CheckboxGroup(
label="Model sizes (in number of parameters)",
@@ -301,17 +2131,16 @@ with gr.Blocks(css=css) as block:
with gr.Tabs() as outer_tabs:
# Store the tabs for updating them on load based on URL parameters
tabs.append(outer_tabs)
+
for task, task_values in data.items():
metric = task_values["metric"]
task_tab_id = task.lower().replace(" ", "-")
# Overall, Bitext Mining, Classification, etc.
- pretty_task_name = task if task not in PRETTY_NAMES.keys() else PRETTY_NAMES[task]
- with gr.Tab(pretty_task_name, id=task_tab_id) as task_tab:
+ with gr.Tab(task, id=task_tab_id) as task_tab:
# For updating the 'task' in the URL
task_tab.select(update_url_task, [current_task_language, language_per_task], [current_task_language, language_per_task]).then(None, [current_task_language], [], js=set_window_url_params)
- if "Overall" != task:
- gr.Markdown(TASK_DESCRIPTIONS[task])
+
with gr.Tabs() as task_tabs:
# Store the task tabs for updating them on load based on URL parameters
tabs.append(task_tabs)
@@ -324,18 +2153,13 @@ with gr.Blocks(css=css) as block:
# For updating the 'language' in the URL
item_tab.select(update_url_language, [current_task_language, language_per_task], [current_task_language, language_per_task], trigger_mode="always_last").then(None, [current_task_language], [], js=set_window_url_params)
- specific_metric = metric
- if item.get("metric", None) is not None:
- specific_metric = item['metric']
-
with gr.Row():
gr.Markdown(f"""
{item['description']}
- - **Metric:** {specific_metric}
+ - **Metric:** {metric}
- **Languages:** {item['language_long'] if 'language_long' in item else item['language']}
- {"- **Credits:** " + item['credits'] if ("credits" in item and item["credits"] is not None) else ''}
- {"- **Description:** " + item['desc'] if ("desc" in item and item["desc"] is not None) else ''}
+ {"- **Credits:** " + item['credits'] if "credits" in item else ''}
""")
with gr.Row():
@@ -346,9 +2170,9 @@ with gr.Blocks(css=css) as block:
full_dataframe = gr.Dataframe(item["data"], datatype=datatype, type="pandas", visible=False)
full_dataframes.append(full_dataframe)
- # with gr.Row():
- # refresh_button = gr.Button("Refresh")
- # refresh_button.click(item["refresh"], inputs=None, outputs=dataframe, concurrency_limit=20)
+ with gr.Row():
+ refresh_button = gr.Button("Refresh")
+ refresh_button.click(item["refresh"], inputs=None, outputs=dataframe)
gr.Markdown(f"""
- **Total Datasets**: {NUM_DATASETS}
@@ -399,14 +2223,6 @@ with gr.Blocks(css=css) as block:
block.queue(max_size=10)
block.launch()
-# Add model names here so the mteb/leaderboard space shows up on their model page
-# from envs import MODEL_META
-# print("','".join(MODEL_META["models_to_skip"]))
-# print("','".join(list(MODEL_META['model_meta'].keys())))
-# print("','".join([x['link'].split("co/")[-1] for x in MODEL_META['model_meta'].values() if (x.get('link', None)) and ("huggingface.co" in x['link'])]))
-# from envs import API; print("','".join([x.modelId for x in list(API.list_models(filter="mteb")) if x.modelId not in UNUSED]))
-UNUSED = ['michaelfeil/ct2fast-e5-large-v2','McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-unsup-simcse','newsrx/instructor-xl','sionic-ai/sionic-ai-v1','lsf1000/bge-evaluation','Intel/bge-small-en-v1.5-sst2','newsrx/instructor-xl-newsrx','McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-unsup-simcse','McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-unsup-simcse','davidpeer/gte-small','goldenrooster/multilingual-e5-large','kozistr/fused-large-en','mixamrepijey/instructor-small','McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-supervised','DecisionOptimizationSystem/DeepFeatEmbeddingLargeContext','Intel/bge-base-en-v1.5-sst2-int8-dynamic','morgendigital/multilingual-e5-large-quantized','BAAI/bge-small-en','ggrn/e5-small-v2','vectoriseai/gte-small','giulio98/placeholder','odunola/UAE-Large-VI','vectoriseai/e5-large-v2','gruber/e5-small-v2-ggml','Severian/nomic','arcdev/e5-mistral-7b-instruct','mlx-community/multilingual-e5-base-mlx','michaelfeil/ct2fast-bge-base-en-v1.5','Intel/bge-small-en-v1.5-sst2-int8-static','jncraton/stella-base-en-v2-ct2-int8','vectoriseai/multilingual-e5-large','rlsChapters/Chapters-SFR-Embedding-Mistral','arcdev/SFR-Embedding-Mistral','McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised','McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-supervised','vectoriseai/gte-base','mixamrepijey/instructor-models','GovCompete/e5-large-v2','ef-zulla/e5-multi-sml-torch','khoa-klaytn/bge-small-en-v1.5-angle','krilecy/e5-mistral-7b-instruct','vectoriseai/bge-base-en-v1.5','vectoriseai/instructor-base','jingyeom/korean_embedding_model','rizki/bgr-tf','barisaydin/bge-base-en','jamesgpt1/zzz','Malmuk1/e5-large-v2_Sharded','vectoriseai/ember-v1','Consensus/instructor-base','barisaydin/bge-small-en','barisaydin/gte-base','woody72/multilingual-e5-base','Einas/einas_ashkar','michaelfeil/ct2fast-bge-large-en-v1.5','vectoriseai/bge-small-en-v1.5','iampanda/Test','cherubhao/yogamodel','ieasybooks/multilingual-e5-large-onnx','jncraton/e5-small-v2-ct2-int8','radames/e5-large','khoa-klaytn/bge-base-en-v1.5-angle','Intel/bge-base-en-v1.5-sst2-int8-static','vectoriseai/e5-large','TitanML/jina-v2-base-en-embed','Koat/gte-tiny','binqiangliu/EmbeddingModlebgelargeENv1.5','beademiguelperez/sentence-transformers-multilingual-e5-small','sionic-ai/sionic-ai-v2','jamesdborin/jina-v2-base-en-embed','maiyad/multilingual-e5-small','dmlls/all-mpnet-base-v2','odunola/e5-base-v2','vectoriseai/bge-large-en-v1.5','vectoriseai/bge-small-en','karrar-alwaili/UAE-Large-V1','t12e/instructor-base','Frazic/udever-bloom-3b-sentence','Geolumina/instructor-xl','hsikchi/dump','recipe/embeddings','michaelfeil/ct2fast-bge-small-en-v1.5','ildodeltaRule/multilingual-e5-large','shubham-bgi/UAE-Large','BAAI/bge-large-en','michaelfeil/ct2fast-e5-small-v2','cgldo/semanticClone','barisaydin/gte-small','aident-ai/bge-base-en-onnx','jamesgpt1/english-large-v1','michaelfeil/ct2fast-e5-small','baseplate/instructor-large-1','newsrx/instructor-large','Narsil/bge-base-en','michaelfeil/ct2fast-e5-large','mlx-community/multilingual-e5-small-mlx','lightbird-ai/nomic','MaziyarPanahi/GritLM-8x7B-GGUF','newsrx/instructor-large-newsrx','dhairya0907/thenlper-get-large','barisaydin/bge-large-en','jncraton/bge-small-en-ct2-int8','retrainai/instructor-xl','BAAI/bge-base-en','gentlebowl/instructor-large-safetensors','d0rj/e5-large-en-ru','atian-chapters/Chapters-SFR-Embedding-Mistral','Intel/bge-base-en-v1.5-sts-int8-static','Intel/bge-base-en-v1.5-sts-int8-dynamic','jncraton/GIST-small-Embedding-v0-ct2-int8','jncraton/gte-tiny-ct2-int8','d0rj/e5-small-en-ru','vectoriseai/e5-small-v2','SmartComponents/bge-micro-v2','michaelfeil/ct2fast-gte-base','vectoriseai/e5-base-v2','Intel/bge-base-en-v1.5-sst2','McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-supervised','Research2NLP/electrical_stella','weakit-v/bge-base-en-v1.5-onnx','GovCompete/instructor-xl','barisaydin/text2vec-base-multilingual','Intel/bge-small-en-v1.5-sst2-int8-dynamic','jncraton/gte-small-ct2-int8','d0rj/e5-base-en-ru','barisaydin/gte-large','fresha/e5-large-v2-endpoint','vectoriseai/instructor-large','Severian/embed','vectoriseai/e5-base','mlx-community/multilingual-e5-large-mlx','vectoriseai/gte-large','anttip/ct2fast-e5-small-v2-hfie','michaelfeil/ct2fast-gte-large','gizmo-ai/Cohere-embed-multilingual-v3.0','McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-unsup-simcse','Kenknight1999/tungdd7_ft_e5','joteqwork/new_gsev0','vantagediscovery/jina-embeddings-v2-base-en','vantagediscovery/nomic-embed-text-v1','vantagediscovery/nomic-embed-text-v1.5','srikanthmalla/hkunlp-instructor-xl','afrideva/GIST-all-MiniLM-L6-v2-GGUF','nadeem1362/mxbai-embed-large-v1-Q4_K_M-GGUF','agier9/gte-Qwen1.5-7B-instruct-Q5_K_M-GGUF','ekorman-strive/bge-large-en-v1.5','raghavlight/SE_v1','liddlefish/privacyembeddingv2_bge_small','ahmet1338/finetuned_embedder','radia/snowflake-arctic-embed-l-Q4_K_M-GGUF','GregorBiswanger/GritLM-7B-Q4_K_M-GGUF','powermove72/GritLM-7B-Q4_K_M-GGUF','sunzx0810/gte-Qwen2-7B-instruct-Q5_K_M-GGUF','nazimali/gte-Qwen2-7B-instruct-Q6_K-GGUF','nazimali/gte-Qwen2-7B-instruct-Q6_K-GGUF','fishbone64/gte-Qwen2-7B-instruct-Q8_0-GGUF','tobchef/gte-Qwen2-1.5B-instruct-Q4_K_M-GGUF','liddlefish/privacy_embedding_rag','liddlefish/privacy_embedding_rag_10k_tmp','liddlefish/privacy_embedding_bge_small_synthetic','mxs980/gte-Qwen2-1.5B-instruct-Q8_0-GGUF','leonn71/gte-Qwen2-1.5B-instruct-Q6_K-GGUF', 'Baichuan-text-embedding','Cohere-embed-english-v3.0','Cohere-embed-multilingual-light-v3.0','Cohere-embed-multilingual-v3.0','DanskBERT','FollowIR-7B','GritLM-7B','LASER2','LLM2Vec-Llama-2-supervised','LLM2Vec-Llama-2-unsupervised','LLM2Vec-Meta-Llama-3-supervised','LLM2Vec-Meta-Llama-3-unsupervised','LLM2Vec-Mistral-supervised','LLM2Vec-Mistral-unsupervised','LLM2Vec-Sheared-Llama-supervised','LLM2Vec-Sheared-Llama-unsupervised','LaBSE','OpenSearch-text-hybrid','SFR-Embedding-Mistral','all-MiniLM-L12-v2','all-MiniLM-L6-v2','all-mpnet-base-v2','allenai-specter','bert-base-10lang-cased','bert-base-15lang-cased','bert-base-25lang-cased','bert-base-multilingual-cased','bert-base-multilingual-uncased','bert-base-swedish-cased','bert-base-uncased','bge-base-zh-v1.5','bge-large-en-v1.5','bge-large-zh-noinstruct','bge-large-zh-v1.5','bge-m3','bge-small-zh-v1.5','bm25','camembert-base','camembert-large','contriever-base-msmarco','cross-en-de-roberta-sentence-transformer','dfm-encoder-large-v1','dfm-sentence-encoder-large-1','distilbert-base-25lang-cased','distilbert-base-en-fr-cased','distilbert-base-en-fr-es-pt-it-cased','distilbert-base-fr-cased','distilbert-base-uncased','distiluse-base-multilingual-cased-v2','e5-base-4k','e5-base-v2','e5-base','e5-large-v2','e5-large','e5-mistral-7b-instruct','e5-small','electra-small-nordic','electra-small-swedish-cased-discriminator','elser-v2','flan-t5-base','flan-t5-large','flaubert_base_cased','flaubert_base_uncased','flaubert_large_cased','gbert-base','gbert-large','gelectra-base','gelectra-large','glove.6B.300d','google-gecko-256.text-embedding-004','google-gecko.text-embedding-004','gottbert-base','gte-Qwen1.5-7B-instruct','gte-Qwen2-7B-instruct','gtr-t5-base','gtr-t5-large','gtr-t5-xl','gtr-t5-xxl','herbert-base-retrieval-v2','instructor-base','instructor-large','instructor-xl','jina-embeddings-v2-base-en','komninos','llama-2-7b-chat','luotuo-bert-medium','m3e-base','m3e-large','mistral-7b-instruct-v0.2','mistral-embed','monobert-large-msmarco','monot5-3b-msmarco-10k','monot5-base-msmarco-10k','msmarco-bert-co-condensor','multi-qa-MiniLM-L6-cos-v1','multilingual-e5-base','multilingual-e5-large','multilingual-e5-small','nb-bert-base','nb-bert-large','nomic-embed-text-v1','nomic-embed-text-v1.5-128','nomic-embed-text-v1.5-256','nomic-embed-text-v1.5-512','nomic-embed-text-v1.5-64','norbert3-base','norbert3-large','paraphrase-multilingual-MiniLM-L12-v2','paraphrase-multilingual-mpnet-base-v2','sentence-bert-swedish-cased','sentence-camembert-base','sentence-camembert-large','sentence-croissant-llm-base','sentence-t5-base','sentence-t5-large','sentence-t5-xl','sentence-t5-xxl','silver-retriever-base-v1','st-polish-paraphrase-from-distilroberta','st-polish-paraphrase-from-mpnet','sup-simcse-bert-base-uncased','text-embedding-3-large','text-embedding-3-large-256','text-embedding-3-small','text-embedding-ada-002','text-search-ada-001','text-search-ada-doc-001','text-search-ada-query-001','text-search-babbage-001','text-search-curie-001','text-search-davinci-001','text-similarity-ada-001','text-similarity-babbage-001','text-similarity-curie-001','text-similarity-davinci-001','tart-dual-contriever-msmarco','tart-full-flan-t5-xl','text2vec-base-chinese','text2vec-base-multilingual','text2vec-large-chinese','titan-embed-text-v1','udever-bloom-1b1','udever-bloom-560m','universal-sentence-encoder-multilingual-3','universal-sentence-encoder-multilingual-large-3','unsup-simcse-bert-base-uncased','use-cmlm-multilingual','voyage-2','voyage-code-2','voyage-large-2-instruct','voyage-law-2','voyage-lite-01-instruct','voyage-lite-02-instruct','voyage-multilingual-2','xlm-roberta-base','xlm-roberta-large','NV-Retriever-v1','NV-Embed-v1','Linq-Embed-Mistral','Muennighoff/SGPT-1.3B-weightedmean-msmarco-specb-bitfit','Muennighoff/SGPT-125M-weightedmean-msmarco-specb-bitfit','Muennighoff/SGPT-125M-weightedmean-nli-bitfit','Muennighoff/SGPT-2.7B-weightedmean-msmarco-specb-bitfit','Muennighoff/SGPT-5.8B-weightedmean-msmarco-specb-bitfit','Muennighoff/SGPT-5.8B-weightedmean-nli-bitfit','DMetaSoul/sbert-chinese-general-v1','bigscience-data/sgpt-bloom-1b7-nli','bigscience/sgpt-bloom-7b1-msmarco','aari1995/German_Semantic_STS_V2','intfloat/e5-small','hkunlp/instructor-large','hkunlp/instructor-base','hkunlp/instructor-xl','intfloat/e5-base','intfloat/e5-large','Shimin/yiyouliao','vprelovac/universal-sentence-encoder-multilingual-large-3','vprelovac/universal-sentence-encoder-multilingual-3','vprelovac/universal-sentence-encoder-4','vprelovac/universal-sentence-encoder-large-5','ManiShankar-AlpesAi/paraphrase-multilingual-mpnet-base-v2-KE_Sieve','nickprock/mmarco-bert-base-italian-uncased','intfloat/e5-small-v2','intfloat/e5-base-v2','intfloat/e5-large-v2','intfloat/multilingual-e5-base','Shimin/LLaMA-embeeding','Forbu14/openai_clip_embeddings','shibing624/text2vec-base-multilingual','consciousAI/cai-lunaris-text-embeddings','consciousAI/cai-stellaris-text-embeddings','intfloat/multilingual-e5-small','intfloat/multilingual-e5-large','jinaai/jina-embedding-s-en-v1','jinaai/jina-embedding-b-en-v1','jinaai/jina-embedding-l-en-v1','deepfile/embedder-100p','lixsh6/XLM-3B5-embedding','lixsh6/XLM-0B6-embedding','thenlper/gte-base','thenlper/gte-large','thenlper/gte-small','lixsh6/MegatronBert-1B3-embedding','facebook/SONAR','Hum-Works/lodestone-base-4096-v1','sensenova/piccolo-base-zh','sensenova/piccolo-large-zh','infgrad/stella-base-zh','infgrad/stella-large-zh','BAAI/bge-reranker-base','BAAI/bge-base-en-v1.5','BAAI/bge-large-en-v1.5','BAAI/bge-small-en-v1.5','BAAI/bge-reranker-large','mgoin/all-MiniLM-L6-v2-ds','neuralmagic/bge-small-en-v1.5-sparse','jinaai/jina-embeddings-v2-base-en','jinaai/jina-embeddings-v2-small-en','neuralmagic/bge-small-en-v1.5-quant','nickprock/stsbm-sentence-flare-it','nickprock/mmarco-sentence-flare-it','neuralmagic/bge-base-en-v1.5-sparse','neuralmagic/bge-base-en-v1.5-quant','neuralmagic/bge-large-en-v1.5-sparse','neuralmagic/bge-large-en-v1.5-quant','TaylorAI/gte-tiny','TaylorAI/bge-micro','llmrails/ember-v1','TaylorAI/bge-micro-v2','zeroshot/gte-small-quant','infgrad/stella-large-zh-v2','infgrad/stella-base-zh-v2','zeroshot/gte-large-quant','zeroshot/gte-large-sparse','EdwardBurgin/paraphrase-multilingual-mpnet-base-v2','Amu/tao','infgrad/stella-base-en-v2','djovak/multi-qa-MiniLM-L6-cos-v1','izhx/udever-bloom-560m','izhx/udever-bloom-1b1','izhx/udever-bloom-3b','izhx/udever-bloom-7b1','thtang/ALL_862873','andersonbcdefg/bge-small-4096','Cohere/Cohere-embed-multilingual-light-v3.0','Cohere/Cohere-embed-multilingual-v3.0','Cohere/Cohere-embed-english-light-v3.0','Cohere/Cohere-embed-english-v3.0','Amu/tao-8k','thenlper/gte-large-zh','thenlper/gte-base-zh','thenlper/gte-small-zh','jamesgpt1/sf_model_e5','OrlikB/st-polish-kartonberta-base-alpha-v1','TownsWu/PEG','sdadas/mmlw-e5-small','sdadas/mmlw-e5-base','sdadas/mmlw-e5-large','sdadas/mmlw-roberta-base','sdadas/mmlw-roberta-large','jinaai/jina-embeddings-v2-base-code','aws-neuron/bge-base-en-v1-5-seqlen-384-bs-1','Erin/mist-zh','ClayAtlas/winberta-base','Pristinenlp/alime-reranker-large-zh','WhereIsAI/UAE-Large-V1','OrdalieTech/Solon-embeddings-large-0.1','ClayAtlas/winberta-large','intfloat/e5-mistral-7b-instruct','liujiarik/lim_base_zh','RookieHX/bge_m3e_stella','akarum/cloudy-large-zh','zhou-xl/bi-cse','lier007/xiaobu-embedding','jinaai/jina-embeddings-v2-base-zh','jinaai/jina-embeddings-v2-base-de','nomic-ai/nomic-embed-text-v1-ablated','nomic-ai/nomic-embed-text-v1-unsupervised','mukaj/fin-mpnet-base','Pristinenlp/alime-embedding-large-zh','pascalhuerten/instructor-skillfit','jinaai/jina-embeddings-v2-base-es','Salesforce/SFR-Embedding-Mistral','DMetaSoul/Dmeta-embedding-zh','Xenova/jina-embeddings-v2-base-zh','Xenova/jina-embeddings-v2-base-de','avsolatorio/GIST-Embedding-v0','nomic-ai/nomic-embed-text-v1','avsolatorio/GIST-all-MiniLM-L6-v2','avsolatorio/GIST-small-Embedding-v0','biswa921/bge-m3','Jechto/e5-dansk-test-0.1','intfloat/multilingual-e5-large-instruct','tanmaylaud/ret-phi2-v0','nomic-ai/nomic-embed-text-v1.5','GritLM/GritLM-7B','GritLM/GritLM-8x7B','avsolatorio/GIST-large-Embedding-v0','ClayAtlas/windberta-large','infgrad/stella-base-zh-v3-1792d','dunzhang/stella-large-zh-v3-1792d','jspringer/echo-mistral-7b-instruct-lasttoken','dunzhang/stella-mrl-large-zh-v3.5-1792d','sentosa/ZNV-Embedding','Nehc/e5-large-ru','neofung/m3e-ernie-xbase-zh','mixedbread-ai/mxbai-embed-2d-large-v1','mixedbread-ai/mxbai-embed-large-v1','aspire/acge_text_embedding','manu/sentence_croissant_alpha_v0.1','wongctroman/hktv-fine-tuned-cloudy-large-zh-metaphor14','manu/sentence_croissant_alpha_v0.2','mradermacher/GritLM-8x7B-GGUF','jhu-clsp/FollowIR-7B','DMetaSoul/Dmeta-embedding-zh-small','dwzhu/e5-base-4k','McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp','McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp','McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp','ChristianAzinn/uae-large-v1-gguf','ChristianAzinn/gist-large-embedding-v0-gguf','ChristianAzinn/bge-base-en-v1.5-gguf','ChristianAzinn/bge-small-en-v1.5-gguf','ChristianAzinn/bge-large-en-v1.5-gguf','ChristianAzinn/gte-base-gguf','ChristianAzinn/gte-large-gguf','ChristianAzinn/gte-small-gguf','ChristianAzinn/mxbai-embed-large-v1-gguf','ChristianAzinn/gist-small-embedding-v0-gguf','ChristianAzinn/e5-base-v2-gguf','ChristianAzinn/e5-large-v2-gguf','ChristianAzinn/e5-small-v2-gguf','ChristianAzinn/labse-gguf','srikanthmalla/BAAI-bge-reranker-large','Snowflake/snowflake-arctic-embed-m','manu/bge-m3-custom-fr','Snowflake/snowflake-arctic-embed-m-long','Snowflake/snowflake-arctic-embed-s','Snowflake/snowflake-arctic-embed-xs','Snowflake/snowflake-arctic-embed-l','ChristianAzinn/snowflake-arctic-embed-l-gguf','ChristianAzinn/snowflake-arctic-embed-m-long-GGUF','ChristianAzinn/snowflake-arctic-embed-m-gguf','ChristianAzinn/snowflake-arctic-embed-s-gguf','ChristianAzinn/snowflake-arctic-embed-xs-gguf','dwzhu/e5rope-base','pengql/checkpoint-9000','Alibaba-NLP/gte-base-en-v1.5','Alibaba-NLP/gte-large-en-v1.5','Alibaba-NLP/gte-Qwen1.5-7B-instruct','sensenova/piccolo-large-zh-v2','Mihaiii/gte-micro','NLPArtisan/qwen-1.8b-retrieval-test','Mihaiii/gte-micro-v2','Mihaiii/gte-micro-v3','Mihaiii/gte-micro-v4','Mihaiii/Taximetristi-2023','manu/sentence_croissant_alpha_v0.3','Mihaiii/Bulbasaur','Mihaiii/Ivysaur','manu/sentence_croissant_alpha_v0.4','Mihaiii/Venusaur','McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp','amazon/Titan-text-embeddings-v2','Mihaiii/Squirtle','Mihaiii/Wartortle','avsolatorio/NoInstruct-small-Embedding-v0','Mihaiii/test24','Mihaiii/test25','yessilver/new_model','fine-tuned/jina-embeddings-v2-base-en-03052024-r5ez-webapp','fine-tuned/jina-embeddings-v2-base-en-03052024-c20v-webapp','fine-tuned/jina-embeddings-v2-base-en-03052024-x8ew-webapp','fine-tuned/jina-embeddings-v2-base-en-03052024-73xx-webapp','fine-tuned/jina-embeddings-v2-base-en-03052024-21on-webapp','fine-tuned/jina-embeddings-v2-base-en-03052024-0swb-webapp','corto-ai/nomic-embed-text-v1','fine-tuned/jina-embeddings-v2-base-en-06052024-lmgf-webapp','fine-tuned/jina-embeddings-v2-base-en-06052024-6bdu-webapp','fine-tuned/jina-embeddings-v2-base-en-06052024-5pdj-webapp','fine-tuned/jina-embeddings-v2-base-en-06052024-yl1z-webapp','fine-tuned/jina-embeddings-v2-base-en-652024-vsmg-webapp','fine-tuned/jina-embeddings-v2-base-en-06052024-ruwi-webapp','fine-tuned/test','fine-tuned/jina-embeddings-v2-base-code-06052024-mhal-webapp','fine-tuned/jina-embeddings-v2-base-en-562024-j9xx-webapp','fine-tuned/jina-embeddings-v2-base-en-572024-xg53-webapp','fine-tuned/jina-embeddings-v2-base-en-202457-oc31-webapp','fine-tuned/scientific_papers_from_arxiv','fine-tuned/coding','fine-tuned/very_specific_technical_questions_about_Ubuntu','fine-tuned/CMedQAv2-reranking-improved','Labib11/MUG-B-1.6','shhy1995/AGE_Hybrid','fine-tuned/jina-embeddings-v2-base-en-10052024-lns6-webapp','fine-tuned/jinaai_jina-embeddings-v2-base-en-scientific-papers-from-arxiv','fine-tuned/jinaai_jina-embeddings-v2-base-code-askubuntu','fine-tuned/jinaai_jina-embeddings-v2-base-en-scidocs','fine-tuned/jinaai_jina-embeddings-v2-base-code-stackoverflow','fine-tuned/jina-embeddings-v2-base-en-5102024-kvgq-webapp','fine-tuned/jinaai_jina-embeddings-v2-base-zh-CMedQAv2','fine-tuned/jina-embeddings-v2-base-code-11_05_2024-hbxc-webapp','fine-tuned/jina-embeddings-v2-base-en-5102024-h7o7-webapp','fine-tuned/CMedQAv2-3','michaelfeil/jina-embeddings-v2-base-code','fine-tuned/jina-embeddings-v2-base-en-2024512-wvj9-webapp','fine-tuned/jina-embeddings-v2-base-en-5122024-3toh-webapp','MoMonir/SFR-Embedding-Mistral-GGUF','technicolor/Angle_BERT','fine-tuned/jina-embeddings-v2-base-en-2024513-kkxa-webapp','fine-tuned/jina-embeddings-v2-base-en-13052024-35bv-webapp','fine-tuned/jinaai_jina-embeddings-v2-base-code-jinaai_jina-embeddings-v2-base-cod','fine-tuned/jinaai_jina-embeddings-v2-base-en-jinaai_jina-embeddings-v2-base-en-sc','fine-tuned/jinaai_jina-embeddings-v2-base-zh-jinaai_jina-embeddings-v2-base-zh-CM','fine-tuned/jinaai_jina-embeddings-v2-base-zh-CMedQAv2-3','fine-tuned/scidocs','fine-tuned/askubuntu','fine-tuned/stackoverflow','fine-tuned/cmedqav2','fine-tuned/jina-embeddings-v2-base-en-13052024-ch9n-webapp','fine-tuned/askubuntu-c','fine-tuned/askubuntu-l','fine-tuned/scidocs-c','fine-tuned/stackoverflow-c','fine-tuned/cmedqav2-c','fine-tuned/norwegian-nli-triplets-c','AdrienB134/llm2vec-croissant-mntp','Erin/IYun-large-zh','fine-tuned/jina-embeddings-v2-base-en-14052024-5b5o-webapp','fine-tuned/jina-embeddings-v2-base-en-14052024-9xxb-webapp','fine-tuned/jina-embeddings-v2-base-en-14052024-afuz-webapp','fine-tuned/dutch-legal-c','AdrienB134/llm2vec-occiglot-mntp','fine-tuned/dutch-legal-c-64-24','w601sxs/b1ade-embed','fine-tuned/dutch-legal-c-1280-24','neofung/bge-reranker-large-1k','fine-tuned/askubuntu-c-128-24','fine-tuned/askubuntu-c-256-24','fine-tuned/stackoverflow-c-128-24','fine-tuned/cmedqav2-c-128-24','fine-tuned/scidocs-c-128-24','fine-tuned/dutch-legal-c-128-24','fine-tuned/scidocs-c-256-24','fine-tuned/stackoverflow-c-256-24','qihoo360/360Zhinao-search','fine-tuned/stackoverflow-c-64-24','fine-tuned/askubuntu-c-64-24','fine-tuned/scidocs-c-64-24','fine-tuned/cmedqav2-c-64-24','fine-tuned/jina-embeddings-v2-base-en-15052024-stsl-webapp','fine-tuned/jina-embeddings-v2-base-en-5152024-tsbl-webapp','fine-tuned/jina-embeddings-v2-base-en-5162024-o9um-webapp','fine-tuned/test-run','fine-tuned/stackoverflow-c-64-24-gpt-4o-2024-05-13','MoMonir/gte-Qwen1.5-7B-instruct-GGUF','fine-tuned/scidocs-c-64-24-gpt-4o-2024-05-133652','fine-tuned/askubuntu-c-64-24-gpt-4o-2024-05-135760','fine-tuned/stackoverflow-c-64-24-gpt-4o-2024-05-137765','fine-tuned/scidocs-c-64-24-gpt-4o-2024-05-13-46337','fine-tuned/askubuntu-c-64-24-gpt-4o-2024-05-131171','fine-tuned/scidocs-c-64-24-gpt-4o-2024-05-135334','fine-tuned/askubuntu-c-64-24-gpt-4o-2024-05-13-61285','fine-tuned/cmedqav2-c-64-24-gpt-4o-2024-05-13-50353','fine-tuned/jina-embeddings-v2-base-en-1752024-13s3-webapp','fine-tuned/jina-embeddings-v2-base-en-1752024-zdtc-webapp','fine-tuned/jina-embeddings-v2-base-en-17052024-uhub-webapp','neofung/bge-reranker-base-1k','fine-tuned/jina-embeddings-v2-base-en-17052024-dumr-webapp','fine-tuned/arguana-c-64-24-gpt-4o-2024-05-136897','fine-tuned/arguana-c-64-24-gpt-4o-2024-05-136538','fine-tuned/arguana-c-128-24-gpt-4o-2024-05-13-68212','fine-tuned/arguana-c-256-24-gpt-4o-2024-05-13-51550','fine-tuned/jina-embeddings-v2-base-en-19052024-oiu8-webapp','fine-tuned/jina-embeddings-v2-base-en-5192024-xqq9-webapp','fine-tuned/jina-embeddings-v2-base-en-5192024-qeye-webapp','fine-tuned/jina-embeddings-v2-base-en-5192024-seuc-webapp','qihoo360/360Zhinao-1.8B-Reranking','fine-tuned/jina-embeddings-v2-base-en-5202024-55bm-webapp','fine-tuned/arguana-c-256-24-gpt-4o-2024-05-13-693632','fine-tuned/arguana-c-256-24-gpt-4o-2024-05-13-819563','fine-tuned/arguana-c-256-24-gpt-4o-2024-05-13-413214','fine-tuned/arguana-c-256-24-gpt-4o-2024-05-13-129048','fine-tuned/arguana-c-256-24-gpt-4o-2024-05-13-550302','fine-tuned/arguana-c-256-24-gpt-4o-2024-05-13-799305','fine-tuned/jina-embeddings-v2-base-en-5202024-6tkj-webapp','fine-tuned/arguana-c-256-24-gpt-4o-2024-05-13-264015','fine-tuned/arguana-c-256-24-gpt-4o-2024-05-13-994439','fine-tuned/jina-embeddings-v2-base-en-5202024-rxyq-webapp','jinaai/jina-clip-v1','fine-tuned/jina-embeddings-v2-base-en-21052024-5qm5-webapp','dayyass/universal-sentence-encoder-multilingual-large-3-pytorch','fine-tuned/jina-embeddings-v2-base-en-21052024-5smg-webapp','fine-tuned/jina-embeddings-v2-base-en-22052024-vuno-webapp','fine-tuned/arguana-c-256-24-gpt-4o-2024-05-13-387094','fine-tuned/LegalBenchConsumerContractsQA-256-24-gpt-4o-2024-05-13-292605','fine-tuned/LegalBenchCorporateLobbying-256-24-gpt-4o-2024-05-13-296144','fine-tuned/SciFact-256-24-gpt-4o-2024-05-13-454852','fine-tuned/SCIDOCS-256-24-gpt-4o-2024-05-13-79875','fine-tuned/TRECCOVID-256-24-gpt-4o-2024-05-13-190413','fine-tuned/NFCorpus-256-24-gpt-4o-2024-05-13-727361','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-410031','fine-tuned/jina-embeddings-v2-base-code-5222024-i8af-webapp','fine-tuned/jina-embeddings-v2-base-en-5222024-hkde-webapp','fine-tuned/NFCorpus-256-24-gpt-4o-2024-05-13-14719','fine-tuned/SciFact-256-24-gpt-4o-2024-05-13-526066','fine-tuned/SCIDOCS-256-24-gpt-4o-2024-05-13-10630','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-825318','nvidia/NV-Embed-v1','fine-tuned/NFCorpus-256-24-gpt-4o-2024-05-13-203779','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-497939','fine-tuned/SCIDOCS-256-24-gpt-4o-2024-05-13-417900','fine-tuned/SciFact-256-24-gpt-4o-2024-05-13-994884','fine-tuned/jina-embeddings-v2-base-en-23052024-hbdj-webapp','fine-tuned/jina-embeddings-v2-base-en-23052024-6kfw-webapp','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-214114','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-587313','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-36954','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-814821','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-256742','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-317735','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-378237','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-992459','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-552473','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-816730','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-875153','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-630221','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-214478','fine-tuned/NFCorpus-256-24-gpt-4o-2024-05-13-645586','fine-tuned/SciFact-256-24-gpt-4o-2024-05-13-786584','fine-tuned/SciFact-256-24-gpt-4o-2024-05-13-785172','fine-tuned/NFCorpus-256-24-gpt-4o-2024-05-13-687872','fine-tuned/BAAI_bge-small-en-v1_5-23052024-upq5-webapp','fine-tuned/NFCorpus-8-8-gpt-4o-2024-05-13-855191','fine-tuned/NFCorpus-8-8-gpt-4o-2024-05-13-978964','fine-tuned/NFCorpus-8-8-gpt-4o-2024-05-13-847943','fine-tuned/NFCorpus-8-8-gpt-4o-2024-05-13-449863','fine-tuned/NFCorpus-8-8-gpt-4o-2024-05-13-610535','fine-tuned/NFCorpus-8-8-gpt-4o-2024-05-13-322852','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-898550','fine-tuned/NFCorpus-256-24-gpt-4o-2024-05-13-546049','fine-tuned/SciFact-256-24-gpt-4o-2024-05-13-499715','fine-tuned/SCIDOCS-256-24-gpt-4o-2024-05-13-598568','fine-tuned/BAAI_bge-large-en-v1_5-5242024-5uvy-webapp','fine-tuned/SciFact-256-24-gpt-4o-2024-05-13-304829','fine-tuned/NFCorpus-256-24-gpt-4o-2024-05-13-138515','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-269096','fine-tuned/SCIDOCS-256-24-gpt-4o-2024-05-13-778232','fine-tuned/SciFact-256-24-gpt-4o-2024-05-13-111876','fine-tuned/SCIDOCS-256-24-gpt-4o-2024-05-13-292803','fine-tuned/NFCorpus-256-24-gpt-4o-2024-05-13-96776','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-67198','fine-tuned/SciFact-256-24-gpt-4o-2024-05-13-310581','fine-tuned/SciFact-256-24-gpt-4o-2024-05-13-449834','fine-tuned/NFCorpus-256-24-gpt-4o-2024-05-13-737659','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-976783','fine-tuned/SciFact-256-24-gpt-4o-2024-05-13-27685','fine-tuned/SCIDOCS-256-24-gpt-4o-2024-05-13-54716','fine-tuned/NFCorpus-256-24-gpt-4o-2024-05-13-166315','fine-tuned/SciFact-256-24-gpt-4o-2024-05-13-812157','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-133486','fine-tuned/SCIDOCS-256-24-gpt-4o-2024-05-13-423936','fine-tuned/NFCorpus-256-24-gpt-4o-2024-05-13-772252','w601sxs/b1ade-embed-kd','fine-tuned/NFCorpus-256-24-gpt-4o-2024-05-13-141246','fine-tuned/SciFact-256-24-gpt-4o-2024-05-13-478897','fine-tuned/ArguAna-256-24-gpt-4o-2024-05-13-952023','fine-tuned/SCIDOCS-256-24-gpt-4o-2024-05-13-157892','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-780826','fine-tuned/TRECCOVID-256-24-gpt-4o-2024-05-13-475598','fine-tuned/QuoraRetrieval-256-24-gpt-4o-2024-05-13-635320','fine-tuned/Touche2020-256-24-gpt-4o-2024-05-13-27907','fine-tuned/BAAI_bge-small-en-v1_5-5252024-jzfp-webapp','fine-tuned/TRECCOVID-256-24-gpt-4o-2024-05-13-953989','fine-tuned/ArguAna-256-24-gpt-4o-2024-05-13-413991','fine-tuned/QuoraRetrieval-256-24-gpt-4o-2024-05-13-80208','fine-tuned/SciFact-256-24-gpt-4o-2024-05-13-484582','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-919917','fine-tuned/NFCorpus-256-24-gpt-4o-2024-05-13-988957','fine-tuned/SCIDOCS-256-24-gpt-4o-2024-05-13-597314','fine-tuned/TRECCOVID-256-24-gpt-4o-2024-05-13-896673','fine-tuned/ArguAna-256-24-gpt-4o-2024-05-13-689823','fine-tuned/BAAI_bge-small-en-v1_5-5272024-2fs4-webapp','fine-tuned/BAAI_bge-small-en-v1_5-27052024-4e8w-webapp','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-890333','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-140539','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-2499','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-733782','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-221689','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-465198','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-698531','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-548936','fine-tuned/BAAI_bge-small-en-v1_5-5272024-ou25-webapp','agier9/UAE-Large-V1-Q5_K_S-GGUF','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-69882','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-822545','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-268697','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-43315','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-866232','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-580978','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-115380','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-985263','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-439294','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-607244','fine-tuned/TRECCOVID-512-192-gpt-4o-2024-05-13-347397','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-650620','fine-tuned/QuoraRetrieval-512-192-gpt-4o-2024-05-13-777321','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-73934','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-14571','fine-tuned/TRECCOVID-512-192-gpt-4o-2024-05-13-653452','fine-tuned/QuoraRetrieval-512-192-gpt-4o-2024-05-13-768442','fine-tuned/BAAI_bge-small-en-v1_5-5282024-hkt5-webapp','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-100928','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-906438','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-266507','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-93805','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-424608','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-710799','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-357185','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-873132','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-452456','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-143735','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-625238','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-186741','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-935443','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-418918','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-110174','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-859511','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-437825','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-986812','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-37395','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-591725','fine-tuned/BAAI_bge-small-en-v1_5-2852024-6p16-webapp','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-93651135','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-89953157','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-23636059','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-83930416','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-27692546','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-76823162','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-89836585','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-28032241','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-34914559','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-10552781','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-44219785','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-60453771','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-34917964','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-24541174','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-20151707','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-26543668','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-3292683','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-14028623','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-378068','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-27258064','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-79168271','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-80780135','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-42468142','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-47583376','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-80745457','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-34699555','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-35912','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-6089388','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-43473113','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-31581583','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-79659206','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-51211577','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-53785794','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-37851926','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-93507731','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-24464680','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-1134151','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-87401391','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-76679499','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-58211433','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-56351634','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-87403910','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-67485775','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-8421720','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-50444055','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-67948597','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-63275487','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-90390391','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-16241583','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-86331274','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-53403987','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-3465370','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-19100452','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-83904142','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-37125303','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-94762694','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-20768519','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-35609715','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-14003539','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-16083606','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-3973638','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-76839538','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-90164285','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-52015789','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-93248154','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-74504128','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-65608189','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-92012085','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-34898812','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-59792256','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-26737110','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-41821758','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-29425597','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-12907987','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-34642434','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-65268203','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-85722278','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-7975202','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-62563104','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-22039677','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-80948573','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-48400660','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-10086588','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-11626257','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-5953538','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-68485784','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-51991531','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-81928581','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-6825910','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-52686172','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-63983441','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-76979764','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-25305323','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-89774081','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-99342737','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-97839788','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-52238558','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-486134','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-46607440','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-80802988','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-67820659','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-37230491','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-64924747','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-17390035','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-66909812','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-67941497','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-95714065','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-65992666','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-89826544','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-74939490','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-16883408','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-40695234','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-68577224','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-47339454','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-36338558','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-17911388','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-97777963','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-51883844','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-24419258','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-34427772','Linq-AI-Research/Linq-Embed-Mistral','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-14562627','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-37833293','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-65274313','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-99421248','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-67596481','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-3038586','fine-tuned/before-finetuning-32000-384-gpt-4o-2024-05-13-18360524','fine-tuned/before-finetuning-32000-384-gpt-4o-2024-05-13-73143156','fine-tuned/before-finetuning-32000-384-gpt-4o-2024-05-13-20584918','fine-tuned/FiQA2018-32000-384-gpt-4o-2024-05-13-52831585','fine-tuned/SciFact-32000-384-gpt-4o-2024-05-13-45622553','fine-tuned/NFCorpus-32000-384-gpt-4o-2024-05-13-45587246','fine-tuned/ArguAna-32000-384-gpt-4o-2024-05-13-39088299','fine-tuned/SCIDOCS-32000-384-gpt-4o-2024-05-13-5483216','fine-tuned/FiQA2018-32000-384-gpt-4o-2024-05-13-23538198','fine-tuned/NFCorpus-32000-384-gpt-4o-2024-05-13-94858978','fine-tuned/SciFact-32000-384-gpt-4o-2024-05-13-25926506','fine-tuned/ArguAna-32000-384-gpt-4o-2024-05-13-60385830','fine-tuned/SCIDOCS-32000-384-gpt-4o-2024-05-13-19472313','fine-tuned/NFCorpus-32000-384-gpt-4o-2024-05-13-1216656','fine-tuned/before-finetuning-32000-384-gpt-4o-2024-05-13-39265981','fine-tuned/SciFact-32000-384-gpt-4o-2024-05-13-76083984','fine-tuned/SCIDOCS-32000-384-gpt-4o-2024-05-13-97946708','fine-tuned/FiQA2018-32000-384-gpt-4o-2024-05-13-66633416','fine-tuned/ArguAna-32000-384-gpt-4o-2024-05-13-13220755','fine-tuned/NFCorpus-32000-384-gpt-4o-2024-05-13-62034393','Classical/Yinka','fine-tuned/BAAI_bge-small-en-v1_5-30052024-rc2l-webapp','fine-tuned/ArguAna-32000-384-gpt-4o-2024-05-13-55034819','twadada/tst','fine-tuned/NFCorpus-32000-384-gpt-4o-2024-05-13-2553188','fine-tuned/FiQA2018-32000-384-gpt-4o-2024-05-13-28832324','fine-tuned/ArguAna-32000-384-gpt-4o-2024-05-13-50573159','fine-tuned/SCIDOCS-32000-384-gpt-4o-2024-05-13-38097330','fine-tuned/SciFact-32000-384-gpt-4o-2024-05-13-66747460','fine-tuned/NFCorpus-32000-384-gpt-4o-2024-05-13-48618256','fine-tuned/BAAI_bge-small-en-v1_5-612024-vf79-webapp','fine-tuned/BAAI_bge-small-en-v1_5-632024-34lw-webapp','corto-ai/bge-reranker-large-onnx','fine-tuned/BAAI_bge-small-en-v1_5-04062024-hsmq-webapp','iampanda/zpoint_large_embedding_zh','silverjam/jina-embeddings-v2-base-zh','fine-tuned/jinaai_jina-embeddings-v2-base-en-05062024-16gq-webapp','fine-tuned/jinaai_jina-embeddings-v2-base-en-05062024-445b-webapp','neofung/LdIR-reranker-large','fine-tuned/jinaai_jina-embeddings-v2-base-en-05062024-zvoa-webapp','fine-tuned/BAAI_bge-small-en-v1_5-05062024-x987-webapp','fine-tuned/deepspeed-from-new-new-docker','fine-tuned/before-finetuning-32000-384-gpt-4o-2024-05-13-86786922','fine-tuned/before-finetuning-32000-384-gpt-4o-2024-05-13-59074949','fine-tuned/before-finetuning-32000-384-gpt-4o-2024-05-13-55567015','fine-tuned/before-finetuning-32000-384-gpt-4o-2024-05-13-67199932','fine-tuned/before-finetuning-32000-384-gpt-4o-2024-05-13-24297328','fine-tuned/NFCorpus-32000-384-gpt-4o-2024-05-13-81211802','fine-tuned/before-finetuning-32000-384-gpt-4o-2024-05-13-7385160','fine-tuned/FiQA2018-32000-384-gpt-4o-2024-05-13-74794049','fine-tuned/SciFact-32000-384-gpt-4o-2024-05-13-42885533','fine-tuned/NFCorpus-32000-384-gpt-4o-2024-05-13-27359624','fine-tuned/ArguAna-32000-384-gpt-4o-2024-05-13-35162543','fine-tuned/SCIDOCS-32000-384-gpt-4o-2024-05-13-33133286','fine-tuned/FiQA2018-32000-384-gpt-4o-2024-05-13-83115388','fine-tuned/SciFact-32000-384-gpt-4o-2024-05-13-41822019','fine-tuned/NFCorpus-32000-384-gpt-4o-2024-05-13-66131574','fine-tuned/ArguAna-32000-384-gpt-4o-2024-05-13-68388407','fine-tuned/SCIDOCS-32000-384-gpt-4o-2024-05-13-71434542','fine-tuned/before-finetuning-32000-384-gpt-4o-2024-05-13-6875032','fine-tuned/before-finetuning-32000-384-gpt-4o-2024-05-13-91940173','fine-tuned/NFCorpus-32000-384-gpt-4o-2024-05-13-70846146','fine-tuned/BAAI_bge-large-en-v1_5-672024-v51y-webapp','Gameselo/STS-multilingual-mpnet-base-v2','itod/UAE-Large-V1-Q8_0-GGUF','fine-tuned/jinaai_jina-embeddings-v2-base-en-08062024-z8ik-webapp','fine-tuned/jinaai_jina-embeddings-v2-base-en-202469-tgjk-webapp','liddlefish/privacy_embedding_rag_10k_base_checkpoint_2','liddlefish/privacy_embedding_rag_10k_base_final','w601sxs/b1ade-embed-kd_3','fine-tuned/jinaai_jina-embeddings-v2-base-en-6112024-fmxr-webapp','liddlefish/privacy_embedding_rag_10k_base_15_final','liddlefish/privacy_embedding_rag_10k_base_12_final','fine-tuned/BAAI_bge-m3-6122024-ibs3-webapp','fine-tuned/BAAI_bge-m3-2024__6__12_-1217-webapp','fine-tuned/jinaai_jina-embeddings-v2-base-es-6122024-fv1x-webapp','fine-tuned/jinaai_jina-embeddings-v2-base-en-6122024-bhm2-webapp','fine-tuned/BAAI_bge-large-en-v1_5-1362024-2wos-webapp','raghavlight/TDTE','fine-tuned/jinaai_jina-embeddings-v2-base-en-6132024-wvrg-webapp','fine-tuned/jinaai_jina-embeddings-v2-base-en-6132024-bez1-webapp','fine-tuned/BAAI_bge-large-en-1362024-gcw6-webapp','fine-tuned/BAAI_bge-base-en-1362024-n19c-webapp','fine-tuned/BAAI_bge-m3-1362024-m82b-webapp','fine-tuned/jinaai_jina-embeddings-v2-base-en-6142024-huet-webapp','fine-tuned/BAAI_bge-m3-6142024-0ndt-webapp','fine-tuned/BAAI_bge-large-en-v1_5-14062024-fimj-webapp','Omartificial-Intelligence-Space/Arabic-all-nli-triplet-Matryoshka','CAiRE/UniVaR-lambda-80','CAiRE/UniVaR-lambda-20','CAiRE/UniVaR-lambda-5','CAiRE/UniVaR-lambda-1','fine-tuned/BAAI_bge-large-en-v1_5-14062024-xdwa-webapp','Salesforce/SFR-Embedding-2_R','fine-tuned/BAAI_bge-large-en-15062024-atex-webapp','fine-tuned/jinaai_jina-embeddings-v2-base-en-2024615-ioyu-webapp','ILKT/2024-06-15_10-09-42','Alibaba-NLP/gte-Qwen2-7B-instruct','fine-tuned/BAAI_bge-large-en-v1_5-1562024-to89-webapp','Omartificial-Intelligence-Space/Arabic-mpnet-base-all-nli-triplet','fine-tuned/jinaai_jina-embeddings-v2-base-en-6162024-xxse-webapp','Omartificial-Intelligence-Space/Arabic-labse-Matryoshka','Omartificial-Intelligence-Space/Arabert-all-nli-triplet-Matryoshka','Omartificial-Intelligence-Space/Marbert-all-nli-triplet-Matryoshka','ILKT/2024-06-17_21-37-12','fine-tuned/BAAI_bge-small-en-v1_5-18062024-56t5-webapp','ILKT/2024-06-19_08-22-22','ILKT/2024-06-19_10-03-38','fine-tuned/jinaai_jina-embeddings-v2-base-en-6192024-56os-webapp','ILKT/2024-06-19_21-12-17','ILKT/2024-06-19_22-27-15','ILKT/2024-06-19_22-23-38','fine-tuned/jinaai_jina-embeddings-v2-base-en-20062024-djhb-webapp','fine-tuned/jinaai_jina-embeddings-v2-base-en-20062024-t2n9-webapp','ILKT/2024-06-20_12-31-59','fine-tuned/BAAI_bge-large-en-2062024-u43q-webapp','ILKT/2024-06-20_12-31-55','tomaarsen/jina-clip-v1-st','tomaarsen/jina-clip-v1-st-remote','fine-tuned/jinaai_jina-embeddings-v2-base-en-6212024-p8j6-webapp','ILKT/2024-06-22_12-37-29_epoch_1','ILKT/2024-06-22_12-37-29_epoch_2','ILKT/2024-06-22_12-37-29_epoch_3','ILKT/2024-06-22_12-37-29_epoch_4','ILKT/2024-06-22_12-37-29_epoch_5','fine-tuned/jinaai_jina-embeddings-v2-base-es-22062024-taeu-webapp','ILKT/2024-06-22_12-37-29_epoch_6','ILKT/2024-06-22_12-37-29_epoch_7','ILKT/2024-06-22_12-37-29_epoch_8','ILKT/2024-06-22_12-37-29_epoch_9','ILKT/2024-06-22_12-37-29_epoch_10','ILKT/2024-06-22_12-37-29_epoch_11','ILKT/2024-06-22_12-37-29_epoch_12','fine-tuned/jinaai_jina-embeddings-v2-base-en-6232024-zldx-webapp','ILKT/2024-06-22_12-37-29_epoch_13','ILKT/2024-06-22_12-37-29_epoch_14','ILKT/2024-06-23_09-09-07_epoch_1','ILKT/2024-06-22_12-37-29_epoch_15','ILKT/2024-06-23_09-09-07_epoch_2','ILKT/2024-06-23_09-09-07_epoch_3','ILKT/2024-06-23_09-09-07_epoch_4','ILKT/2024-06-23_09-09-07_epoch_5','ILKT/2024-06-23_09-09-07_epoch_6','ILKT/2024-06-23_09-09-07_epoch_7','ILKT/2024-06-23_09-09-07_epoch_8','fine-tuned/BAAI_bge-m3-6232024-4vtf-webapp','ILKT/2024-06-23_09-09-07_epoch_9','ILKT/2024-06-24_00-11-56_epoch_1','ILKT/2024-06-23_09-09-07_epoch_10','ILKT/2024-06-24_00-11-56_epoch_2','ILKT/2024-06-23_09-09-07_epoch_11','ILKT/2024-06-24_00-11-56_epoch_3','ILKT/2024-06-24_00-11-56_epoch_4','ILKT/2024-06-23_09-09-07_epoch_12','ILKT/2024-06-24_00-11-56_epoch_5','ILKT/2024-06-23_09-09-07_epoch_13','ILKT/2024-06-24_00-11-56_epoch_6','ILKT/2024-06-24_00-11-56_epoch_7','Lajavaness/bilingual-embedding-large','fine-tuned/jinaai_jina-embeddings-v2-base-en-24_06_2024-lrip-webapp','ILKT/2024-06-24_22-31-18_epoch_1','ILKT/2024-06-24_22-31-28_epoch_1','ILKT/2024-06-24_22-31-18_epoch_2','ILKT/2024-06-24_22-31-28_epoch_2','ILKT/2024-06-24_22-31-18_epoch_3','ILKT/2024-06-24_22-31-28_epoch_3','ILKT/2024-06-24_22-31-18_epoch_4','ILKT/2024-06-24_22-31-28_epoch_4','ILKT/2024-06-24_22-31-18_epoch_5','ILKT/2024-06-24_22-31-28_epoch_5','ILKT/2024-06-24_22-31-18_epoch_6','ILKT/2024-06-24_22-31-28_epoch_6','ILKT/2024-06-24_22-31-18_epoch_7','ILKT/2024-06-24_22-31-28_epoch_7','ILKT/2024-06-24_22-31-18_epoch_8','ILKT/2024-06-24_22-31-28_epoch_8','ILKT/2024-06-24_22-31-18_epoch_9','ILKT/2024-06-24_22-31-28_epoch_9','ILKT/2024-06-24_22-31-18_epoch_10','ILKT/2024-06-24_22-31-28_epoch_10','ILKT/2024-06-24_22-31-18_epoch_11','ILKT/2024-06-24_22-31-28_epoch_11','ILKT/2024-06-24_22-31-18_epoch_12','ILKT/2024-06-24_22-31-28_epoch_12','ILKT/2024-06-24_22-31-18_epoch_13','ILKT/2024-06-24_22-31-28_epoch_13','ILKT/2024-06-24_22-31-18_epoch_14','ILKT/2024-06-24_22-31-28_epoch_14','ILKT/2024-06-24_22-31-18_epoch_15','ILKT/2024-06-24_22-31-28_epoch_15','ILKT/2024-06-24_22-31-18_epoch_16','ILKT/2024-06-24_22-31-28_epoch_16','ILKT/2024-06-24_22-31-18_epoch_17','ILKT/2024-06-24_22-31-28_epoch_17','ILKT/2024-06-24_22-31-18_epoch_18','ILKT/2024-06-24_22-31-28_epoch_18','ILKT/2024-06-24_22-31-18_epoch_19','ILKT/2024-06-24_22-31-28_epoch_19','ILKT/2024-06-24_22-31-18_epoch_20','ILKT/2024-06-24_22-31-28_epoch_20','ILKT/2024-06-24_22-31-18_epoch_21','ILKT/2024-06-24_22-31-28_epoch_21','ILKT/2024-06-24_22-31-18_epoch_22','ILKT/2024-06-24_22-31-28_epoch_22','ILKT/2024-06-24_22-31-18_epoch_23','ILKT/2024-06-24_22-31-28_epoch_23','ILKT/2024-06-24_22-31-18_epoch_24','ILKT/2024-06-24_22-31-28_epoch_24','ILKT/2024-06-24_22-31-18_epoch_25','ILKT/2024-06-24_22-31-28_epoch_25','ILKT/2024-06-24_22-31-18_epoch_26','ILKT/2024-06-24_22-31-28_epoch_26','ILKT/2024-06-24_22-31-18_epoch_27','ILKT/2024-06-24_22-31-28_epoch_27','ILKT/2024-06-24_22-31-18_epoch_28','ILKT/2024-06-24_22-31-28_epoch_28','ILKT/2024-06-24_22-31-18_epoch_29','ILKT/2024-06-24_22-31-28_epoch_29','ILKT/2024-06-24_22-31-18_epoch_30','Lenovo-Zhihui/Zhihui_LLM_Embedding','ILKT/2024-06-24_22-31-28_epoch_30','ILKT/2024-06-24_22-31-18_epoch_31','ILKT/2024-06-24_22-31-28_epoch_31','ILKT/2024-06-24_22-31-18_epoch_32','ILKT/2024-06-24_22-31-28_epoch_32','ILKT/2024-06-24_22-31-18_epoch_33','ILKT/2024-06-24_22-31-28_epoch_33','ILKT/2024-06-24_22-31-18_epoch_34','ILKT/2024-06-24_22-31-28_epoch_34','ILKT/2024-06-24_22-31-18_epoch_35','ILKT/2024-06-24_22-31-28_epoch_35','ILKT/2024-06-24_22-31-18_epoch_36','ILKT/2024-06-24_22-31-28_epoch_36','ILKT/2024-06-24_22-31-18_epoch_37','ILKT/2024-06-24_22-31-28_epoch_37','ILKT/2024-06-24_22-31-18_epoch_38','ILKT/2024-06-24_22-31-28_epoch_38','ILKT/2024-06-24_22-31-18_epoch_39','ILKT/2024-06-24_22-31-28_epoch_39','ILKT/2024-06-24_22-31-18_epoch_40','ILKT/2024-06-24_22-31-28_epoch_40','ILKT/2024-06-24_22-31-18_epoch_41','ILKT/2024-06-24_22-31-28_epoch_41','ILKT/2024-06-24_22-31-18_epoch_42','ILKT/2024-06-24_22-31-28_epoch_42','ILKT/2024-06-24_22-31-18_epoch_43','ILKT/2024-06-24_22-31-28_epoch_43','ILKT/2024-06-24_22-31-18_epoch_44','ILKT/2024-06-24_22-31-28_epoch_44','ILKT/2024-06-24_22-31-18_epoch_45','ILKT/2024-06-24_22-31-28_epoch_45','ILKT/2024-06-24_22-31-18_epoch_46','ILKT/2024-06-24_22-31-28_epoch_46','ILKT/2024-06-24_22-31-18_epoch_47','ILKT/2024-06-24_22-31-28_epoch_47','ILKT/2024-06-24_22-31-18_epoch_48','ILKT/2024-06-24_22-31-28_epoch_48','ILKT/2024-06-24_22-31-18_epoch_49','ILKT/2024-06-24_22-31-28_epoch_49','ILKT/2024-06-24_22-31-18_epoch_50','ILKT/2024-06-24_22-31-28_epoch_50','ILKT/2024-06-24_22-31-18_epoch_51','ILKT/2024-06-24_22-31-28_epoch_51','ILKT/2024-06-24_22-31-18_epoch_52','ILKT/2024-06-24_22-31-28_epoch_52','ILKT/2024-06-24_22-31-18_epoch_53','ILKT/2024-06-24_22-31-28_epoch_53','ILKT/2024-06-24_22-31-18_epoch_54','ILKT/2024-06-24_22-31-28_epoch_54','ILKT/2024-06-24_22-31-18_epoch_55','ILKT/2024-06-24_22-31-28_epoch_55','ILKT/2024-06-24_22-31-18_epoch_56','ILKT/2024-06-24_22-31-28_epoch_56','ILKT/2024-06-24_22-31-18_epoch_57','ILKT/2024-06-24_22-31-28_epoch_57','ILKT/2024-06-24_22-31-18_epoch_58','ILKT/2024-06-24_22-31-28_epoch_58','ILKT/2024-06-24_22-31-18_epoch_59','ILKT/2024-06-24_22-31-28_epoch_59','ILKT/2024-06-24_22-31-18_epoch_60','ILKT/2024-06-24_22-31-28_epoch_60','ILKT/2024-06-24_22-31-18_epoch_61','ILKT/2024-06-24_22-31-28_epoch_61','ILKT/2024-06-24_22-31-18_epoch_62','ILKT/2024-06-24_22-31-28_epoch_62','ILKT/2024-06-24_22-31-18_epoch_63','ILKT/2024-06-24_22-31-28_epoch_63','ILKT/2024-06-24_22-31-18_epoch_64','ILKT/2024-06-24_22-31-28_epoch_64','ILKT/2024-06-24_22-31-18_epoch_65','ILKT/2024-06-24_22-31-28_epoch_65','ILKT/2024-06-24_22-31-18_epoch_66','ILKT/2024-06-24_22-31-28_epoch_66','ILKT/2024-06-24_22-31-18_epoch_67','Omartificial-Intelligence-Space/Arabic-MiniLM-L12-v2-all-nli-triplet','ILKT/2024-06-24_22-31-28_epoch_67','ILKT/2024-06-24_22-31-18_epoch_68','ILKT/2024-06-24_22-31-28_epoch_68','ILKT/2024-06-24_22-31-18_epoch_69','ILKT/2024-06-24_22-31-28_epoch_69','ILKT/2024-06-24_22-31-18_epoch_70','ILKT/2024-06-24_22-31-28_epoch_70','ILKT/2024-06-24_22-31-18_epoch_71','ILKT/2024-06-24_22-31-28_epoch_71','ILKT/2024-06-24_22-31-18_epoch_72','ILKT/2024-06-24_22-31-28_epoch_72','ILKT/2024-06-24_22-31-18_epoch_73','ILKT/2024-06-24_22-31-28_epoch_73','ILKT/2024-06-24_22-31-18_epoch_74','ILKT/2024-06-24_22-31-28_epoch_74','ILKT/2024-06-24_22-31-18_epoch_75','Intel/neural-embedding-v1','ILKT/2024-06-24_22-31-28_epoch_75','fine-tuned/BAAI_bge-m3-26062024-gdon-webapp','Lajavaness/bilingual-embedding-base','fine-tuned/jinaai_jina-embeddings-v2-base-es-6262024-yjwm-webapp','fine-tuned/jinaai_jina-embeddings-v2-base-en-6262024-wtkc-webapp','fine-tuned/jinaai_jina-embeddings-v2-base-en-6272024-qn9b-webapp','BeastyZ/e5-R-mistral-7b','ILKT/2024-06-23_09-09-07_epoch_14','ILKT/2024-06-23_09-09-07_epoch_15','ILKT/2024-06-23_09-09-07_epoch_16','ILKT/2024-06-23_09-09-07_epoch_17','ILKT/2024-06-23_09-09-07_epoch_18','ILKT/2024-06-23_09-09-07_epoch_19','ILKT/2024-06-23_09-09-07_epoch_20','ILKT/2024-06-23_09-09-07_epoch_21','ILKT/2024-06-23_09-09-07_epoch_22','ILKT/2024-06-23_09-09-07_epoch_23','ILKT/2024-06-23_09-09-07_epoch_24','ILKT/2024-06-23_09-09-07_epoch_25','ILKT/2024-06-23_09-09-07_epoch_26','ILKT/2024-06-23_09-09-07_epoch_27','ILKT/2024-06-23_09-09-07_epoch_28','ILKT/2024-06-23_09-09-07_epoch_29','ILKT/2024-06-23_09-09-07_epoch_30','ILKT/2024-06-23_09-09-07_epoch_31','ILKT/2024-06-23_09-09-07_epoch_32','ILKT/2024-06-23_09-09-07_epoch_33','ILKT/2024-06-23_09-09-07_epoch_34','ILKT/2024-06-23_09-09-07_epoch_35','ILKT/2024-06-23_09-09-07_epoch_36','ILKT/2024-06-23_09-09-07_epoch_37','ILKT/2024-06-23_09-09-07_epoch_38','ILKT/2024-06-23_09-09-07_epoch_39','ILKT/2024-06-23_09-09-07_epoch_40','ILKT/2024-06-23_09-09-07_epoch_41','ILKT/2024-06-23_09-09-07_epoch_42','ILKT/2024-06-23_09-09-07_epoch_43','ILKT/2024-06-23_09-09-07_epoch_44','ILKT/2024-06-23_09-09-07_epoch_45','ILKT/2024-06-23_09-09-07_epoch_46','ILKT/2024-06-23_09-09-07_epoch_47','ILKT/2024-06-23_09-09-07_epoch_48','ILKT/2024-06-23_09-09-07_epoch_49','ILKT/2024-06-23_09-09-07_epoch_50','ILKT/2024-06-23_09-09-07_epoch_51','ILKT/2024-06-23_09-09-07_epoch_52','ILKT/2024-06-23_09-09-07_epoch_53','ILKT/2024-06-23_09-09-07_epoch_54','ILKT/2024-06-23_09-09-07_epoch_55','ILKT/2024-06-23_09-09-07_epoch_56','ILKT/2024-06-23_09-09-07_epoch_57','ILKT/2024-06-23_09-09-07_epoch_58','ILKT/2024-06-23_09-09-07_epoch_59','ILKT/2024-06-23_09-09-07_epoch_60','ILKT/2024-06-23_09-09-07_epoch_61','ILKT/2024-06-23_09-09-07_epoch_62','ILKT/2024-06-23_09-09-07_epoch_63','ILKT/2024-06-23_09-09-07_epoch_64','ILKT/2024-06-23_09-09-07_epoch_65','ILKT/2024-06-23_09-09-07_epoch_66','ILKT/2024-06-23_09-09-07_epoch_67','ILKT/2024-06-23_09-09-07_epoch_68','ILKT/2024-06-23_09-09-07_epoch_69','ILKT/2024-06-23_09-09-07_epoch_70','ILKT/2024-06-23_09-09-07_epoch_71','ILKT/2024-06-23_09-09-07_epoch_72','ILKT/2024-06-23_09-09-07_epoch_73','ILKT/2024-06-23_09-09-07_epoch_74','ILKT/2024-06-23_09-09-07_epoch_75','Pekarnick/e5-large-v2-Q4_K_M-GGUF','ILKT/2024-06-24_00-11-56_epoch_8','ILKT/2024-06-24_00-11-56_epoch_9','ILKT/2024-06-24_00-11-56_epoch_10','ILKT/2024-06-24_00-11-56_epoch_11','ILKT/2024-06-24_00-11-56_epoch_12','ILKT/2024-06-24_00-11-56_epoch_13','ILKT/2024-06-24_00-11-56_epoch_14','ILKT/2024-06-24_00-11-56_epoch_15','ILKT/2024-06-24_00-11-56_epoch_16','ILKT/2024-06-24_00-11-56_epoch_17','ILKT/2024-06-24_00-11-56_epoch_18','ILKT/2024-06-24_00-11-56_epoch_19','ILKT/2024-06-24_00-11-56_epoch_20','ILKT/2024-06-24_00-11-56_epoch_21','ILKT/2024-06-24_00-11-56_epoch_22','ILKT/2024-06-24_00-11-56_epoch_23','ILKT/2024-06-24_00-11-56_epoch_24','ILKT/2024-06-24_00-11-56_epoch_25','ILKT/2024-06-24_00-11-56_epoch_26','ILKT/2024-06-24_00-11-56_epoch_27','ILKT/2024-06-24_00-11-56_epoch_28','ILKT/2024-06-24_00-11-56_epoch_29','ILKT/2024-06-24_00-11-56_epoch_30','ILKT/2024-06-24_00-11-56_epoch_31','ILKT/2024-06-24_00-11-56_epoch_32','ILKT/2024-06-24_00-11-56_epoch_33','ILKT/2024-06-24_00-11-56_epoch_34','ILKT/2024-06-24_00-11-56_epoch_35','ILKT/2024-06-24_00-11-56_epoch_36','ILKT/2024-06-24_00-11-56_epoch_37','ILKT/2024-06-24_00-11-56_epoch_38','ILKT/2024-06-24_00-11-56_epoch_39','ILKT/2024-06-24_00-11-56_epoch_40','ILKT/2024-06-24_00-11-56_epoch_41','ILKT/2024-06-24_00-11-56_epoch_42','ILKT/2024-06-24_00-11-56_epoch_43','ILKT/2024-06-24_00-11-56_epoch_44','ILKT/2024-06-24_00-11-56_epoch_45','ILKT/2024-06-24_00-11-56_epoch_46','ILKT/2024-06-24_00-11-56_epoch_47','ILKT/2024-06-24_00-11-56_epoch_48','ILKT/2024-06-24_00-11-56_epoch_49','ILKT/2024-06-24_00-11-56_epoch_50','ILKT/2024-06-24_00-11-56_epoch_51','ILKT/2024-06-24_00-11-56_epoch_52','ILKT/2024-06-24_00-11-56_epoch_53','ILKT/2024-06-24_00-11-56_epoch_54','ILKT/2024-06-24_00-11-56_epoch_55','ILKT/2024-06-24_00-11-56_epoch_56','ILKT/2024-06-24_00-11-56_epoch_57','ILKT/2024-06-24_00-11-56_epoch_58','ILKT/2024-06-24_00-11-56_epoch_59','ILKT/2024-06-24_00-11-56_epoch_60','ILKT/2024-06-24_00-11-56_epoch_61','ILKT/2024-06-24_00-11-56_epoch_62','ILKT/2024-06-24_00-11-56_epoch_63','ILKT/2024-06-24_00-11-56_epoch_64','ILKT/2024-06-24_00-11-56_epoch_65','ILKT/2024-06-24_00-11-56_epoch_66','ILKT/2024-06-24_00-11-56_epoch_67','ILKT/2024-06-24_00-11-56_epoch_68','ILKT/2024-06-24_00-11-56_epoch_69','ILKT/2024-06-24_00-11-56_epoch_70','ILKT/2024-06-24_00-11-56_epoch_71','ILKT/2024-06-24_00-11-56_epoch_72','ILKT/2024-06-24_00-11-56_epoch_73','ILKT/2024-06-24_00-11-56_epoch_74','ILKT/2024-06-24_00-11-56_epoch_75','ILKT/2024-06-22_12-37-29_epoch_16','ILKT/2024-06-22_12-37-29_epoch_17','ILKT/2024-06-22_12-37-29_epoch_18','ILKT/2024-06-22_12-37-29_epoch_19','ILKT/2024-06-22_12-37-29_epoch_20','ILKT/2024-06-22_12-37-29_epoch_21','ILKT/2024-06-22_12-37-29_epoch_22','ILKT/2024-06-22_12-37-29_epoch_23','ILKT/2024-06-22_12-37-29_epoch_24','ILKT/2024-06-22_12-37-29_epoch_25','ILKT/2024-06-22_12-37-29_epoch_26','ILKT/2024-06-22_12-37-29_epoch_27','ILKT/2024-06-22_12-37-29_epoch_28','ILKT/2024-06-22_12-37-29_epoch_29','ILKT/2024-06-22_12-37-29_epoch_30','ILKT/2024-06-22_12-37-29_epoch_31','ILKT/2024-06-22_12-37-29_epoch_32','ILKT/2024-06-22_12-37-29_epoch_33','ILKT/2024-06-22_12-37-29_epoch_34','ILKT/2024-06-22_12-37-29_epoch_35','ILKT/2024-06-22_12-37-29_epoch_36','ILKT/2024-06-22_12-37-29_epoch_37','ILKT/2024-06-22_12-37-29_epoch_38','ILKT/2024-06-22_12-37-29_epoch_39','ILKT/2024-06-22_12-37-29_epoch_40','ILKT/2024-06-22_12-37-29_epoch_41','ILKT/2024-06-22_12-37-29_epoch_42','ILKT/2024-06-22_12-37-29_epoch_43','ILKT/2024-06-22_12-37-29_epoch_44','ILKT/2024-06-22_12-37-29_epoch_45','ILKT/2024-06-22_12-37-29_epoch_46','ILKT/2024-06-22_12-37-29_epoch_47','ILKT/2024-06-22_12-37-29_epoch_48','ILKT/2024-06-22_12-37-29_epoch_49','ILKT/2024-06-22_12-37-29_epoch_50','ILKT/2024-06-22_12-37-29_epoch_51','ILKT/2024-06-22_12-37-29_epoch_52','ILKT/2024-06-22_12-37-29_epoch_53','ILKT/2024-06-22_12-37-29_epoch_54','ILKT/2024-06-22_12-37-29_epoch_55','ILKT/2024-06-22_12-37-29_epoch_56','ILKT/2024-06-22_12-37-29_epoch_57','ILKT/2024-06-22_12-37-29_epoch_58','ILKT/2024-06-22_12-37-29_epoch_59','ILKT/2024-06-22_12-37-29_epoch_60','ILKT/2024-06-22_12-37-29_epoch_61','ILKT/2024-06-22_12-37-29_epoch_62','ILKT/2024-06-22_12-37-29_epoch_63','ILKT/2024-06-22_12-37-29_epoch_64','ILKT/2024-06-22_12-37-29_epoch_65','ILKT/2024-06-22_12-37-29_epoch_66','ILKT/2024-06-22_12-37-29_epoch_67','ILKT/2024-06-22_12-37-29_epoch_68','ILKT/2024-06-22_12-37-29_epoch_69','ILKT/2024-06-22_12-37-29_epoch_70','ILKT/2024-06-22_12-37-29_epoch_71','ILKT/2024-06-22_12-37-29_epoch_72','ILKT/2024-06-22_12-37-29_epoch_73','ILKT/2024-06-22_12-37-29_epoch_74','ILKT/2024-06-22_12-37-29_epoch_75','Lajavaness/bilingual-embedding-large-8k','Alibaba-NLP/gte-Qwen2-1.5B-instruct','Jaume/gemma-2b-embeddings','lier007/xiaobu-embedding-v2','chihlunLee/NoInstruct-small-Embedding-v0-Q4_0-GGUF','fine-tuned/jinaai_jina-embeddings-v2-base-es-472024-aqk1-webapp','second-state/gte-Qwen2-1.5B-instruct-GGUF','gaianet/gte-Qwen2-1.5B-instruct-GGUF','yco/bilingual-embedding-base','fine-tuned/jinaai_jina-embeddings-v2-base-en-05072024-aj6g-webapp','AbderrahmanSkiredj1/arabic_text_embedding_sts_arabertv02_arabicnlitriplet','AbderrahmanSkiredj1/Arabic_text_embedding_for_sts','dimcha/mxbai-embed-large-v1-Q4_K_M-GGUF','fine-tuned/BAAI_bge-m3-782024-wl54-webapp','nvidia/NV-Retriever-v1','fine-tuned/jinaai_jina-embeddings-v2-base-en-792024-tyen-webapp','fine-tuned/jinaai_jina-embeddings-v2-base-en-11072024-bh6v-webapp','archit28/bge-large-en-v1.5-Q4_K_S-GGUF','dunzhang/stella_en_1.5B_v5','dunzhang/stella_en_400M_v5','niancheng/gte-Qwen2-1.5B-instruct-Q4_K_M-GGUF','niancheng/gte-Qwen2-7B-instruct-Q4_K_M-GGUF','fine-tuned/jinaai_jina-embeddings-v2-base-en-15072024-5xy1-webapp','fine-tuned/BAAI_bge-small-en-v1_5-7152024-w1z0-webapp', 'Cohere/Cohere-embed-english-v3.0','Cohere/Cohere-embed-english-v3.0','Cohere/Cohere-embed-multilingual-light-v3.0','Cohere/Cohere-embed-multilingual-v3.0','vesteinn/DanskBERT','jhu-clsp/FollowIR-7B','GritLM/GritLM-7B','GritLM/GritLM-7B','McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-supervised','McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-unsup-simcse','McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-supervised','McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-unsup-simcse','McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised','McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-unsup-simcse','McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-supervised','McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-unsup-simcse','sentence-transformers/LaBSE','Linq-AI-Research/Linq-Embed-Mistral','nvidia/NV-Embed-v1','nvidia/NV-Retriever-v1','Salesforce/SFR-Embedding-Mistral','sentence-transformers/all-MiniLM-L12-v2','sentence-transformers/all-MiniLM-L12-v2','sentence-transformers/all-MiniLM-L6-v2','sentence-transformers/all-MiniLM-L6-v2','sentence-transformers/all-mpnet-base-v2','sentence-transformers/all-mpnet-base-v2','sentence-transformers/allenai-specter','Geotrend/bert-base-10lang-cased','Geotrend/bert-base-15lang-cased','Geotrend/bert-base-25lang-cased','google-bert/bert-base-multilingual-cased','google-bert/bert-base-multilingual-uncased','KB/bert-base-swedish-cased','bert-base-uncased','BAAI/bge-base-en-v1.5','BAAI/bge-base-en-v1.5','BAAI/bge-base-zh-v1.5','BAAI/bge-large-en-v1.5','BAAI/bge-large-en-v1.5','BAAI/bge-large-zh-noinstruct','BAAI/bge-large-zh-v1.5','BAAI/bge-m3','BAAI/bge-m3','BAAI/bge-small-en-v1.5','BAAI/bge-small-en-v1.5','BAAI/bge-small-zh-v1.5','almanach/camembert-base','almanach/camembert-large','nthakur/contriever-base-msmarco','facebook/contriever','facebook/contriever','T-Systems-onsite/cross-en-de-roberta-sentence-transformer','chcaa/dfm-encoder-large-v1','chcaa/dfm-encoder-large-v1','Geotrend/distilbert-base-25lang-cased','Geotrend/distilbert-base-en-fr-cased','Geotrend/distilbert-base-en-fr-es-pt-it-cased','Geotrend/distilbert-base-fr-cased','distilbert-base-uncased','sentence-transformers/distiluse-base-multilingual-cased-v2','dwzhu/e5-base-4k','intfloat/e5-base-v2','intfloat/e5-base','intfloat/e5-large-v2','intfloat/e5-large','intfloat/e5-mistral-7b-instruct','intfloat/e5-mistral-7b-instruct-noinstruct','intfloat/e5-small','jonfd/electra-small-nordic','KBLab/electra-small-swedish-cased-discriminator','google/flan-t5-base','google/flan-t5-large','flaubert/flaubert_base_cased','flaubert/flaubert_base_uncased','flaubert/flaubert_large_cased','deepset/gbert-base','deepset/gbert-large','deepset/gelectra-base','deepset/gelectra-large','sentence-transformers/average_word_embeddings_glove.6B.300d','uklfr/gottbert-base','Alibaba-NLP/gte-Qwen1.5-7B-instruct','Alibaba-NLP/gte-Qwen2-7B-instruct','sentence-transformers/gtr-t5-base','sentence-transformers/gtr-t5-large','sentence-transformers/gtr-t5-xl','sentence-transformers/gtr-t5-xxl','ipipan/herbert-base-retrieval-v2','hkunlp/instructor-base','hkunlp/instructor-large','hkunlp/instructor-xl','jinaai/jina-embeddings-v2-base-en','sentence-transformers/average_word_embeddings_komninos','meta-llama/Llama-2-7b-chat-hf','silk-road/luotuo-bert-medium','moka-ai/m3e-base','moka-ai/m3e-large','mistralai/Mistral-7B-Instruct-v0.2','castorini/monobert-large-msmarco','castorini/monot5-3b-msmarco-10k','castorini/monot5-base-msmarco-10k','sentence-transformers/msmarco-bert-co-condensor','sentence-transformers/multi-qa-MiniLM-L6-cos-v1','intfloat/multilingual-e5-base','intfloat/multilingual-e5-large','intfloat/multilingual-e5-small','NbAiLab/nb-bert-base','NbAiLab/nb-bert-large','nomic-ai/nomic-embed-text-v1','nomic-ai/nomic-embed-text-v1.5','nomic-ai/nomic-embed-text-v1.5','nomic-ai/nomic-embed-text-v1.5','nomic-ai/nomic-embed-text-v1.5','ltg/norbert3-base','ltg/norbert3-large','sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2','sentence-transformers/paraphrase-multilingual-mpnet-base-v2','KBLab/sentence-bert-swedish-cased','dangvantuan/sentence-camembert-base','dangvantuan/sentence-camembert-large','Wissam42/sentence-croissant-llm-base','sentence-transformers/sentence-t5-base','sentence-transformers/sentence-t5-large','sentence-transformers/sentence-t5-xl','sentence-transformers/sentence-t5-xxl','ipipan/silver-retriever-base-v1','sdadas/st-polish-paraphrase-from-distilroberta','sdadas/st-polish-paraphrase-from-mpnet','princeton-nlp/sup-simcse-bert-base-uncased','orionweller/tart-dual-contriever-msmarco','facebook/tart-full-flan-t5-xl','shibing624/text2vec-base-chinese','GanymedeNil/text2vec-large-chinese','izhx/udever-bloom-1b1','izhx/udever-bloom-560m','vprelovac/universal-sentence-encoder-multilingual-3','vprelovac/universal-sentence-encoder-multilingual-large-3','princeton-nlp/unsup-simcse-bert-base-uncased','sentence-transformers/use-cmlm-multilingual','xlm-roberta-base','xlm-roberta-large']
-
# Possible changes:
# Could add graphs / other visual content
# Could add verification marks