diff --git "a/app.py" "b/app.py" --- "a/app.py" +++ "b/app.py" @@ -1,40 +1,1263 @@ -from functools import reduce +from functools import partial, reduce +import json +import os import re +from datasets import load_dataset import gradio as gr +from huggingface_hub import HfApi, hf_hub_download +from huggingface_hub.repocard import metadata_load import pandas as pd +from tqdm.autonotebook import tqdm -from envs import REPO_ID -from refresh import BOARDS_CONFIG, TASKS, TASKS_CONFIG, TASK_DESCRIPTIONS, PRETTY_NAMES, load_results, make_clickable_model -from refresh import PROPRIETARY_MODELS, SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS, CROSS_ENCODERS, BI_ENCODERS, INSTRUCT_MODELS, NOINSTRUCT_MODELS, EXTERNAL_MODEL_TO_LINK +from utils.model_size import get_model_parameters_memory +TASKS = [ + "BitextMining", + "Classification", + "Clustering", + "PairClassification", + "Reranking", + "Retrieval", + "STS", + "Summarization", +] + +TASK_LIST_BITEXT_MINING = ['BUCC (de-en)', 'BUCC (fr-en)', 'BUCC (ru-en)', 'BUCC (zh-en)', 'Tatoeba (afr-eng)', 'Tatoeba (amh-eng)', 'Tatoeba (ang-eng)', 'Tatoeba (ara-eng)', 'Tatoeba (arq-eng)', 'Tatoeba (arz-eng)', 'Tatoeba (ast-eng)', 'Tatoeba (awa-eng)', 'Tatoeba (aze-eng)', 'Tatoeba (bel-eng)', 'Tatoeba (ben-eng)', 'Tatoeba (ber-eng)', 'Tatoeba (bos-eng)', 'Tatoeba (bre-eng)', 'Tatoeba (bul-eng)', 'Tatoeba (cat-eng)', 'Tatoeba (cbk-eng)', 'Tatoeba (ceb-eng)', 'Tatoeba (ces-eng)', 'Tatoeba (cha-eng)', 'Tatoeba (cmn-eng)', 'Tatoeba (cor-eng)', 'Tatoeba (csb-eng)', 'Tatoeba (cym-eng)', 'Tatoeba (dan-eng)', 'Tatoeba (deu-eng)', 'Tatoeba (dsb-eng)', 'Tatoeba (dtp-eng)', 'Tatoeba (ell-eng)', 'Tatoeba (epo-eng)', 'Tatoeba (est-eng)', 'Tatoeba (eus-eng)', 'Tatoeba (fao-eng)', 'Tatoeba (fin-eng)', 'Tatoeba (fra-eng)', 'Tatoeba (fry-eng)', 'Tatoeba (gla-eng)', 'Tatoeba (gle-eng)', 'Tatoeba (glg-eng)', 'Tatoeba (gsw-eng)', 'Tatoeba (heb-eng)', 'Tatoeba (hin-eng)', 'Tatoeba (hrv-eng)', 'Tatoeba (hsb-eng)', 'Tatoeba (hun-eng)', 'Tatoeba (hye-eng)', 'Tatoeba (ido-eng)', 'Tatoeba (ile-eng)', 'Tatoeba (ina-eng)', 'Tatoeba (ind-eng)', 'Tatoeba (isl-eng)', 'Tatoeba (ita-eng)', 'Tatoeba (jav-eng)', 'Tatoeba (jpn-eng)', 'Tatoeba (kab-eng)', 'Tatoeba (kat-eng)', 'Tatoeba (kaz-eng)', 'Tatoeba (khm-eng)', 'Tatoeba (kor-eng)', 'Tatoeba (kur-eng)', 'Tatoeba (kzj-eng)', 'Tatoeba (lat-eng)', 'Tatoeba (lfn-eng)', 'Tatoeba (lit-eng)', 'Tatoeba (lvs-eng)', 'Tatoeba (mal-eng)', 'Tatoeba (mar-eng)', 'Tatoeba (max-eng)', 'Tatoeba (mhr-eng)', 'Tatoeba (mkd-eng)', 'Tatoeba (mon-eng)', 'Tatoeba (nds-eng)', 'Tatoeba (nld-eng)', 'Tatoeba (nno-eng)', 'Tatoeba (nob-eng)', 'Tatoeba (nov-eng)', 'Tatoeba (oci-eng)', 'Tatoeba (orv-eng)', 'Tatoeba (pam-eng)', 'Tatoeba (pes-eng)', 'Tatoeba (pms-eng)', 'Tatoeba (pol-eng)', 'Tatoeba (por-eng)', 'Tatoeba (ron-eng)', 'Tatoeba (rus-eng)', 'Tatoeba (slk-eng)', 'Tatoeba (slv-eng)', 'Tatoeba (spa-eng)', 'Tatoeba (sqi-eng)', 'Tatoeba (srp-eng)', 'Tatoeba (swe-eng)', 'Tatoeba (swg-eng)', 'Tatoeba (swh-eng)', 'Tatoeba (tam-eng)', 'Tatoeba (tat-eng)', 'Tatoeba (tel-eng)', 'Tatoeba (tgl-eng)', 'Tatoeba (tha-eng)', 'Tatoeba (tuk-eng)', 'Tatoeba (tur-eng)', 'Tatoeba (tzl-eng)', 'Tatoeba (uig-eng)', 'Tatoeba (ukr-eng)', 'Tatoeba (urd-eng)', 'Tatoeba (uzb-eng)', 'Tatoeba (vie-eng)', 'Tatoeba (war-eng)', 'Tatoeba (wuu-eng)', 'Tatoeba (xho-eng)', 'Tatoeba (yid-eng)', 'Tatoeba (yue-eng)', 'Tatoeba (zsm-eng)'] +TASK_LIST_BITEXT_MINING_DA = ["BornholmBitextMining"] + +TASK_LIST_CLASSIFICATION = [ + "AmazonCounterfactualClassification (en)", + "AmazonPolarityClassification", + "AmazonReviewsClassification (en)", + "Banking77Classification", + "EmotionClassification", + "ImdbClassification", + "MassiveIntentClassification (en)", + "MassiveScenarioClassification (en)", + "MTOPDomainClassification (en)", + "MTOPIntentClassification (en)", + "ToxicConversationsClassification", + "TweetSentimentExtractionClassification", +] + +TASK_LIST_CLASSIFICATION_DA = [ + "AngryTweetsClassification", + "DanishPoliticalCommentsClassification", + "DKHateClassification", + "LccSentimentClassification", + "MassiveIntentClassification (da)", + "MassiveScenarioClassification (da)", + "NordicLangClassification", + "ScalaDaClassification", +] + +TASK_LIST_CLASSIFICATION_FR = [ + "AmazonReviewsClassification (fr)", + "MasakhaNEWSClassification (fra)", + "MassiveIntentClassification (fr)", + "MassiveScenarioClassification (fr)", + "MTOPDomainClassification (fr)", + "MTOPIntentClassification (fr)", +] + +TASK_LIST_CLASSIFICATION_NB = [ + "NoRecClassification", + "NordicLangClassification", + "NorwegianParliament", + "MassiveIntentClassification (nb)", + "MassiveScenarioClassification (nb)", + "ScalaNbClassification", +] + +TASK_LIST_CLASSIFICATION_PL = [ + "AllegroReviews", + "CBD", + "MassiveIntentClassification (pl)", + "MassiveScenarioClassification (pl)", + "PAC", + "PolEmo2.0-IN", + "PolEmo2.0-OUT", +] + +TASK_LIST_CLASSIFICATION_SV = [ + "DalajClassification", + "MassiveIntentClassification (sv)", + "MassiveScenarioClassification (sv)", + "NordicLangClassification", + "ScalaSvClassification", + "SweRecClassification", +] + +TASK_LIST_CLASSIFICATION_ZH = [ + "AmazonReviewsClassification (zh)", + "IFlyTek", + "JDReview", + "MassiveIntentClassification (zh-CN)", + "MassiveScenarioClassification (zh-CN)", + "MultilingualSentiment", + "OnlineShopping", + "TNews", + "Waimai", +] + +TASK_LIST_CLASSIFICATION_OTHER = ['AmazonCounterfactualClassification (de)', 'AmazonCounterfactualClassification (ja)', 'AmazonReviewsClassification (de)', 'AmazonReviewsClassification (es)', 'AmazonReviewsClassification (fr)', 'AmazonReviewsClassification (ja)', 'AmazonReviewsClassification (zh)', 'MTOPDomainClassification (de)', 'MTOPDomainClassification (es)', 'MTOPDomainClassification (fr)', 'MTOPDomainClassification (hi)', 'MTOPDomainClassification (th)', 'MTOPIntentClassification (de)', 'MTOPIntentClassification (es)', 'MTOPIntentClassification (fr)', 'MTOPIntentClassification (hi)', 'MTOPIntentClassification (th)', 'MassiveIntentClassification (af)', 'MassiveIntentClassification (am)', 'MassiveIntentClassification (ar)', 'MassiveIntentClassification (az)', 'MassiveIntentClassification (bn)', 'MassiveIntentClassification (cy)', 'MassiveIntentClassification (de)', 'MassiveIntentClassification (el)', 'MassiveIntentClassification (es)', 'MassiveIntentClassification (fa)', 'MassiveIntentClassification (fi)', 'MassiveIntentClassification (fr)', 'MassiveIntentClassification (he)', 'MassiveIntentClassification (hi)', 'MassiveIntentClassification (hu)', 'MassiveIntentClassification (hy)', 'MassiveIntentClassification (id)', 'MassiveIntentClassification (is)', 'MassiveIntentClassification (it)', 'MassiveIntentClassification (ja)', 'MassiveIntentClassification (jv)', 'MassiveIntentClassification (ka)', 'MassiveIntentClassification (km)', 'MassiveIntentClassification (kn)', 'MassiveIntentClassification (ko)', 'MassiveIntentClassification (lv)', 'MassiveIntentClassification (ml)', 'MassiveIntentClassification (mn)', 'MassiveIntentClassification (ms)', 'MassiveIntentClassification (my)', 'MassiveIntentClassification (nl)', 'MassiveIntentClassification (pt)', 'MassiveIntentClassification (ro)', 'MassiveIntentClassification (ru)', 'MassiveIntentClassification (sl)', 'MassiveIntentClassification (sq)', 'MassiveIntentClassification (sw)', 'MassiveIntentClassification (ta)', 'MassiveIntentClassification (te)', 'MassiveIntentClassification (th)', 'MassiveIntentClassification (tl)', 'MassiveIntentClassification (tr)', 'MassiveIntentClassification (ur)', 'MassiveIntentClassification (vi)', 'MassiveIntentClassification (zh-TW)', 'MassiveScenarioClassification (af)', 'MassiveScenarioClassification (am)', 'MassiveScenarioClassification (ar)', 'MassiveScenarioClassification (az)', 'MassiveScenarioClassification (bn)', 'MassiveScenarioClassification (cy)', 'MassiveScenarioClassification (de)', 'MassiveScenarioClassification (el)', 'MassiveScenarioClassification (es)', 'MassiveScenarioClassification (fa)', 'MassiveScenarioClassification (fi)', 'MassiveScenarioClassification (fr)', 'MassiveScenarioClassification (he)', 'MassiveScenarioClassification (hi)', 'MassiveScenarioClassification (hu)', 'MassiveScenarioClassification (hy)', 'MassiveScenarioClassification (id)', 'MassiveScenarioClassification (is)', 'MassiveScenarioClassification (it)', 'MassiveScenarioClassification (ja)', 'MassiveScenarioClassification (jv)', 'MassiveScenarioClassification (ka)', 'MassiveScenarioClassification (km)', 'MassiveScenarioClassification (kn)', 'MassiveScenarioClassification (ko)', 'MassiveScenarioClassification (lv)', 'MassiveScenarioClassification (ml)', 'MassiveScenarioClassification (mn)', 'MassiveScenarioClassification (ms)', 'MassiveScenarioClassification (my)', 'MassiveScenarioClassification (nl)', 'MassiveScenarioClassification (pt)', 'MassiveScenarioClassification (ro)', 'MassiveScenarioClassification (ru)', 'MassiveScenarioClassification (sl)', 'MassiveScenarioClassification (sq)', 'MassiveScenarioClassification (sw)', 'MassiveScenarioClassification (ta)', 'MassiveScenarioClassification (te)', 'MassiveScenarioClassification (th)', 'MassiveScenarioClassification (tl)', 'MassiveScenarioClassification (tr)', 'MassiveScenarioClassification (ur)', 'MassiveScenarioClassification (vi)', 'MassiveScenarioClassification (zh-TW)'] + +TASK_LIST_CLUSTERING = [ + "ArxivClusteringP2P", + "ArxivClusteringS2S", + "BiorxivClusteringP2P", + "BiorxivClusteringS2S", + "MedrxivClusteringP2P", + "MedrxivClusteringS2S", + "RedditClustering", + "RedditClusteringP2P", + "StackExchangeClustering", + "StackExchangeClusteringP2P", + "TwentyNewsgroupsClustering", +] + + +TASK_LIST_CLUSTERING_DE = [ + "BlurbsClusteringP2P", + "BlurbsClusteringS2S", + "TenKGnadClusteringP2P", + "TenKGnadClusteringS2S", +] + +TASK_LIST_CLUSTERING_FR = [ + "AlloProfClusteringP2P", + "AlloProfClusteringS2S", + "HALClusteringS2S", + "MLSUMClusteringP2P", + "MLSUMClusteringS2S", + "MasakhaNEWSClusteringP2P (fra)", + "MasakhaNEWSClusteringS2S (fra)", +] + +TASK_LIST_CLUSTERING_PL = [ + "8TagsClustering", +] + +TASK_LIST_CLUSTERING_ZH = [ + "CLSClusteringP2P", + "CLSClusteringS2S", + "ThuNewsClusteringP2P", + "ThuNewsClusteringS2S", +] + +TASK_LIST_PAIR_CLASSIFICATION = [ + "SprintDuplicateQuestions", + "TwitterSemEval2015", + "TwitterURLCorpus", +] + +TASK_LIST_PAIR_CLASSIFICATION_FR = [ + "OpusparcusPC (fr)", + "PawsX (fr)", +] + +TASK_LIST_PAIR_CLASSIFICATION_PL = [ + "CDSC-E", + "PPC", + "PSC", + "SICK-E-PL", +] + +TASK_LIST_PAIR_CLASSIFICATION_ZH = [ + "Cmnli", + "Ocnli", +] + +TASK_LIST_RERANKING = [ + "AskUbuntuDupQuestions", + "MindSmallReranking", + "SciDocsRR", + "StackOverflowDupQuestions", +] + +TASK_LIST_RERANKING_FR = [ + "AlloprofReranking", + "SyntecReranking", +] + +TASK_LIST_RERANKING_ZH = [ + "CMedQAv1", + "CMedQAv2", + "MMarcoReranking", + "T2Reranking", +] + +TASK_LIST_RETRIEVAL = [ + "ArguAna", + "ClimateFEVER", + "CQADupstackRetrieval", + "DBPedia", + "FEVER", + "FiQA2018", + "HotpotQA", + "MSMARCO", + "NFCorpus", + "NQ", + "QuoraRetrieval", + "SCIDOCS", + "SciFact", + "Touche2020", + "TRECCOVID", +] + +TASK_LIST_RETRIEVAL_FR = [ + "AlloprofRetrieval", + "BSARDRetrieval", + "MintakaRetrieval (fr)", +# "MultiLongDocRetrieval", + "SyntecRetrieval", + "XPQARetrieval (fr)", +] + +TASK_LIST_RETRIEVAL_LAW = [ + "AILACasedocs", + "AILAStatutes", + "GerDaLIRSmall", + "LeCaRDv2", + "LegalBenchConsumerContractsQA", + "LegalBenchCorporateLobbying", + "LegalQuAD", + "LegalSummarization", +] + +TASK_LIST_RETRIEVAL_PL = [ + "ArguAna-PL", + "DBPedia-PL", + "FiQA-PL", + "HotpotQA-PL", + "MSMARCO-PL", + "NFCorpus-PL", + "NQ-PL", + "Quora-PL", + "SCIDOCS-PL", + "SciFact-PL", + "TRECCOVID-PL", +] + +TASK_LIST_RETRIEVAL_ZH = [ + "CmedqaRetrieval", + "CovidRetrieval", + "DuRetrieval", + "EcomRetrieval", + "MedicalRetrieval", + "MMarcoRetrieval", + "T2Retrieval", + "VideoRetrieval", +] + +TASK_LIST_RETRIEVAL_NORM = TASK_LIST_RETRIEVAL + [ + "CQADupstackAndroidRetrieval", + "CQADupstackEnglishRetrieval", + "CQADupstackGamingRetrieval", + "CQADupstackGisRetrieval", + "CQADupstackMathematicaRetrieval", + "CQADupstackPhysicsRetrieval", + "CQADupstackProgrammersRetrieval", + "CQADupstackStatsRetrieval", + "CQADupstackTexRetrieval", + "CQADupstackUnixRetrieval", + "CQADupstackWebmastersRetrieval", + "CQADupstackWordpressRetrieval" +] + +TASK_LIST_STS = [ + "BIOSSES", + "SICK-R", + "STS12", + "STS13", + "STS14", + "STS15", + "STS16", + "STS17 (en-en)", + "STS22 (en)", + "STSBenchmark", +] + +TASK_LIST_STS_FR = [ + "STS22 (fr)", + "STSBenchmarkMultilingualSTS (fr)", + "SICKFr", +] + +TASK_LIST_STS_PL = [ + "CDSC-R", + "SICK-R-PL", + "STS22 (pl)", +] + +TASK_LIST_STS_ZH = [ + "AFQMC", + "ATEC", + "BQ", + "LCQMC", + "PAWSX", + "QBQTC", + "STS22 (zh)", + "STSB", +] + +TASK_LIST_STS_OTHER = ["STS17 (ar-ar)", "STS17 (en-ar)", "STS17 (en-de)", "STS17 (en-tr)", "STS17 (es-en)", "STS17 (es-es)", "STS17 (fr-en)", "STS17 (it-en)", "STS17 (ko-ko)", "STS17 (nl-en)", "STS22 (ar)", "STS22 (de)", "STS22 (de-en)", "STS22 (de-fr)", "STS22 (de-pl)", "STS22 (es)", "STS22 (es-en)", "STS22 (es-it)", "STS22 (fr)", "STS22 (fr-pl)", "STS22 (it)", "STS22 (pl)", "STS22 (pl-en)", "STS22 (ru)", "STS22 (tr)", "STS22 (zh-en)", "STSBenchmark",] + +TASK_LIST_SUMMARIZATION = ["SummEval",] + +TASK_LIST_SUMMARIZATION_FR = ["SummEvalFr"] + +TASK_LIST_EN = TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS + TASK_LIST_SUMMARIZATION +TASK_LIST_FR = TASK_LIST_CLASSIFICATION_FR + TASK_LIST_CLUSTERING_FR + TASK_LIST_PAIR_CLASSIFICATION_FR + TASK_LIST_RERANKING_FR + TASK_LIST_RETRIEVAL_FR + TASK_LIST_STS_FR + TASK_LIST_SUMMARIZATION_FR +TASK_LIST_PL = TASK_LIST_CLASSIFICATION_PL + TASK_LIST_CLUSTERING_PL + TASK_LIST_PAIR_CLASSIFICATION_PL + TASK_LIST_RETRIEVAL_PL + TASK_LIST_STS_PL +TASK_LIST_ZH = TASK_LIST_CLASSIFICATION_ZH + TASK_LIST_CLUSTERING_ZH + TASK_LIST_PAIR_CLASSIFICATION_ZH + TASK_LIST_RERANKING_ZH + TASK_LIST_RETRIEVAL_ZH + TASK_LIST_STS_ZH + +TASK_TO_METRIC = { + "BitextMining": "f1", + "Clustering": "v_measure", + "Classification": "accuracy", + "PairClassification": "cos_sim_ap", + "Reranking": "map", + "Retrieval": "ndcg_at_10", + "STS": "cos_sim_spearman", + "Summarization": "cos_sim_spearman", +} + +def make_clickable_model(model_name, link=None): + if link is None: + link = "https://huggingface.co/" + model_name + # Remove user from model name + return ( + f'{model_name.split("/")[-1]}' + ) + +# Models without metadata, thus we cannot fetch their results naturally +EXTERNAL_MODELS = [ + "Baichuan-text-embedding", + "Cohere-embed-english-v3.0", + "Cohere-embed-multilingual-v3.0", + "Cohere-embed-multilingual-light-v3.0", + "DanskBERT", + "LASER2", + "LaBSE", + "OpenSearch-text-hybrid", + "all-MiniLM-L12-v2", + "all-MiniLM-L6-v2", + "all-mpnet-base-v2", + "allenai-specter", + "bert-base-10lang-cased", + "bert-base-15lang-cased", + "bert-base-25lang-cased", + "bert-base-multilingual-cased", + "bert-base-multilingual-uncased", + "bert-base-swedish-cased", + "bert-base-uncased", + "bge-base-zh-v1.5", + "bge-large-en-v1.5", + "bge-large-zh-v1.5", + "bge-large-zh-noinstruct", + "bge-small-zh-v1.5", + "contriever-base-msmarco", + "cross-en-de-roberta-sentence-transformer", + "dfm-encoder-large-v1", + "dfm-sentence-encoder-large-1", + "distiluse-base-multilingual-cased-v2", + "e5-base", + "e5-large", + "e5-mistral-7b-instruct", + "e5-small", + "electra-small-nordic", + "electra-small-swedish-cased-discriminator", + "flaubert_base_cased", + "flaubert_base_uncased", + "flaubert_large_cased", + "gbert-base", + "gbert-large", + "gelectra-base", + "gelectra-large", + "glove.6B.300d", + "google-gecko.text-embedding-preview-0409", + "google-gecko-256.text-embedding-preview-0409", + "gottbert-base", + "gtr-t5-base", + "gtr-t5-large", + "gtr-t5-xl", + "gtr-t5-xxl", + "herbert-base-retrieval-v2", + "komninos", + "luotuo-bert-medium", + "m3e-base", + "m3e-large", + "mistral-embed", + "msmarco-bert-co-condensor", + "multi-qa-MiniLM-L6-cos-v1", + "multilingual-e5-base", + "multilingual-e5-large", + "multilingual-e5-small", + "nb-bert-base", + "nb-bert-large", + "nomic-embed-text-v1.5-64", + "nomic-embed-text-v1.5-128", + "nomic-embed-text-v1.5-256", + "nomic-embed-text-v1.5-512", + "norbert3-base", + "norbert3-large", + "paraphrase-multilingual-MiniLM-L12-v2", + "paraphrase-multilingual-mpnet-base-v2", + "sentence-bert-swedish-cased", + "sentence-camembert-base", + "sentence-camembert-large", + "sentence-croissant-llm-base", + "sentence-t5-base", + "sentence-t5-large", + "sentence-t5-xl", + "sentence-t5-xxl", + "silver-retriever-base-v1", + "sup-simcse-bert-base-uncased", + "st-polish-paraphrase-from-distilroberta", + "st-polish-paraphrase-from-mpnet", + "text2vec-base-chinese", + "text2vec-base-multilingual", + "text2vec-large-chinese", + "text-embedding-3-small", + "text-embedding-3-large", + "text-embedding-3-large-256", + "text-embedding-ada-002", + "text-similarity-ada-001", + "text-similarity-babbage-001", + "text-similarity-curie-001", + "text-similarity-davinci-001", + "text-search-ada-doc-001", + "text-search-ada-001", + "text-search-babbage-001", + "text-search-curie-001", + "text-search-davinci-001", + "titan-embed-text-v1", + "udever-bloom-1b1", + "udever-bloom-560m", + "universal-sentence-encoder-multilingual-3", + "universal-sentence-encoder-multilingual-large-3", + "unsup-simcse-bert-base-uncased", + "use-cmlm-multilingual", + "voyage-2", + "voyage-code-2", + "voyage-law-2", + "voyage-lite-01-instruct", + "voyage-lite-02-instruct", + "xlm-roberta-base", + "xlm-roberta-large", +] + +EXTERNAL_MODEL_TO_LINK = { + "Cohere-embed-english-v3.0": "https://huggingface.co/Cohere/Cohere-embed-english-v3.0", + "Cohere-embed-multilingual-v3.0": "https://huggingface.co/Cohere/Cohere-embed-multilingual-v3.0", + "Cohere-embed-multilingual-light-v3.0": "https://huggingface.co/Cohere/Cohere-embed-multilingual-light-v3.0", + "allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter", + "allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter", + "all-MiniLM-L12-v2": "https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2", + "all-MiniLM-L6-v2": "https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2", + "all-mpnet-base-v2": "https://huggingface.co/sentence-transformers/all-mpnet-base-v2", + "Baichuan-text-embedding": "https://platform.baichuan-ai.com/docs/text-Embedding", + "bert-base-10lang-cased": "https://huggingface.co/Geotrend/bert-base-10lang-cased", + "bert-base-15lang-cased": "https://huggingface.co/Geotrend/bert-base-15lang-cased", + "bert-base-25lang-cased": "https://huggingface.co/Geotrend/bert-base-25lang-cased", + "bert-base-multilingual-cased": "https://huggingface.co/google-bert/bert-base-multilingual-cased", + "bert-base-multilingual-uncased": "https://huggingface.co/google-bert/bert-base-multilingual-uncased", + "bert-base-swedish-cased": "https://huggingface.co/KB/bert-base-swedish-cased", + "bert-base-uncased": "https://huggingface.co/bert-base-uncased", + "bge-base-zh-v1.5": "https://huggingface.co/BAAI/bge-base-zh-v1.5", + "bge-large-en-v1.5": "https://huggingface.co/BAAI/bge-large-en-v1.5", + "bge-large-zh-v1.5": "https://huggingface.co/BAAI/bge-large-zh-v1.5", + "bge-large-zh-noinstruct": "https://huggingface.co/BAAI/bge-large-zh-noinstruct", + "bge-small-zh-v1.5": "https://huggingface.co/BAAI/bge-small-zh-v1.5", + "camembert-base": "https://huggingface.co/almanach/camembert-base", + "camembert-large": "https://huggingface.co/almanach/camembert-large", + "contriever-base-msmarco": "https://huggingface.co/nthakur/contriever-base-msmarco", + "cross-en-de-roberta-sentence-transformer": "https://huggingface.co/T-Systems-onsite/cross-en-de-roberta-sentence-transformer", + "DanskBERT": "https://huggingface.co/vesteinn/DanskBERT", + "distilbert-base-25lang-cased": "https://huggingface.co/Geotrend/distilbert-base-25lang-cased", + "distilbert-base-en-fr-cased": "https://huggingface.co/Geotrend/distilbert-base-en-fr-cased", + "distilbert-base-en-fr-es-pt-it-cased": "https://huggingface.co/Geotrend/distilbert-base-en-fr-es-pt-it-cased", + "distilbert-base-fr-cased": "https://huggingface.co/Geotrend/distilbert-base-fr-cased", + "distilbert-base-uncased": "https://huggingface.co/distilbert-base-uncased", + "distiluse-base-multilingual-cased-v2": "https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2", + "dfm-encoder-large-v1": "https://huggingface.co/chcaa/dfm-encoder-large-v1", + "dfm-sentence-encoder-large-1": "https://huggingface.co/chcaa/dfm-encoder-large-v1", + "e5-base": "https://huggingface.co/intfloat/e5-base", + "e5-large": "https://huggingface.co/intfloat/e5-large", + "e5-mistral-7b-instruct": "https://huggingface.co/intfloat/e5-mistral-7b-instruct", + "e5-small": "https://huggingface.co/intfloat/e5-small", + "electra-small-nordic": "https://huggingface.co/jonfd/electra-small-nordic", + "electra-small-swedish-cased-discriminator": "https://huggingface.co/KBLab/electra-small-swedish-cased-discriminator", + "flaubert_base_cased": "https://huggingface.co/flaubert/flaubert_base_cased", + "flaubert_base_uncased": "https://huggingface.co/flaubert/flaubert_base_uncased", + "flaubert_large_cased": "https://huggingface.co/flaubert/flaubert_large_cased", + "gbert-base": "https://huggingface.co/deepset/gbert-base", + "gbert-large": "https://huggingface.co/deepset/gbert-large", + "gelectra-base": "https://huggingface.co/deepset/gelectra-base", + "gelectra-large": "https://huggingface.co/deepset/gelectra-large", + "glove.6B.300d": "https://huggingface.co/sentence-transformers/average_word_embeddings_glove.6B.300d", + "google-gecko.text-embedding-preview-0409": "https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings/get-text-embeddings#latest_models", + "google-gecko-256.text-embedding-preview-0409": "https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings/get-text-embeddings#latest_models", + "gottbert-base": "https://huggingface.co/uklfr/gottbert-base", + "gtr-t5-base": "https://huggingface.co/sentence-transformers/gtr-t5-base", + "gtr-t5-large": "https://huggingface.co/sentence-transformers/gtr-t5-large", + "gtr-t5-xl": "https://huggingface.co/sentence-transformers/gtr-t5-xl", + "gtr-t5-xxl": "https://huggingface.co/sentence-transformers/gtr-t5-xxl", + "herbert-base-retrieval-v2": "https://huggingface.co/ipipan/herbert-base-retrieval-v2", + "komninos": "https://huggingface.co/sentence-transformers/average_word_embeddings_komninos", + "luotuo-bert-medium": "https://huggingface.co/silk-road/luotuo-bert-medium", + "LASER2": "https://github.com/facebookresearch/LASER", + "LaBSE": "https://huggingface.co/sentence-transformers/LaBSE", + "m3e-base": "https://huggingface.co/moka-ai/m3e-base", + "m3e-large": "https://huggingface.co/moka-ai/m3e-large", + "mistral-embed": "https://docs.mistral.ai/guides/embeddings", + "msmarco-bert-co-condensor": "https://huggingface.co/sentence-transformers/msmarco-bert-co-condensor", + "multi-qa-MiniLM-L6-cos-v1": "https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1", + "multilingual-e5-base": "https://huggingface.co/intfloat/multilingual-e5-base", + "multilingual-e5-large": "https://huggingface.co/intfloat/multilingual-e5-large", + "multilingual-e5-small": "https://huggingface.co/intfloat/multilingual-e5-small", + "nb-bert-base": "https://huggingface.co/NbAiLab/nb-bert-base", + "nb-bert-large": "https://huggingface.co/NbAiLab/nb-bert-large", + "nomic-embed-text-v1.5-64": "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5", + "nomic-embed-text-v1.5-128": "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5", + "nomic-embed-text-v1.5-256": "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5", + "nomic-embed-text-v1.5-512": "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5", + "norbert3-base": "https://huggingface.co/ltg/norbert3-base", + "norbert3-large": "https://huggingface.co/ltg/norbert3-large", + "OpenSearch-text-hybrid": "https://help.aliyun.com/zh/open-search/vector-search-edition/hybrid-retrieval", + "paraphrase-multilingual-mpnet-base-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2", + "paraphrase-multilingual-MiniLM-L12-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", + "sentence-camembert-base": "https://huggingface.co/dangvantuan/sentence-camembert-base", + "sentence-camembert-large": "https://huggingface.co/dangvantuan/sentence-camembert-large", + "sentence-croissant-llm-base": "https://huggingface.co/Wissam42/sentence-croissant-llm-base", + "sentence-bert-swedish-cased": "https://huggingface.co/KBLab/sentence-bert-swedish-cased", + "sentence-t5-base": "https://huggingface.co/sentence-transformers/sentence-t5-base", + "sentence-t5-large": "https://huggingface.co/sentence-transformers/sentence-t5-large", + "sentence-t5-xl": "https://huggingface.co/sentence-transformers/sentence-t5-xl", + "sentence-t5-xxl": "https://huggingface.co/sentence-transformers/sentence-t5-xxl", + "silver-retriever-base-v1": "https://huggingface.co/ipipan/silver-retriever-base-v1", + "sup-simcse-bert-base-uncased": "https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased", + "st-polish-paraphrase-from-distilroberta": "https://huggingface.co/sdadas/st-polish-paraphrase-from-distilroberta", + "st-polish-paraphrase-from-mpnet": "https://huggingface.co/sdadas/st-polish-paraphrase-from-mpnet", + "text2vec-base-chinese": "https://huggingface.co/shibing624/text2vec-base-chinese", + "text2vec-large-chinese": "https://huggingface.co/GanymedeNil/text2vec-large-chinese", + "text-embedding-3-small": "https://openai.com/blog/new-embedding-models-and-api-updates", + "text-embedding-3-large": "https://openai.com/blog/new-embedding-models-and-api-updates", + "text-embedding-3-large-256": "https://openai.com/blog/new-embedding-models-and-api-updates", + "text-embedding-ada-002": "https://openai.com/blog/new-and-improved-embedding-model", + "text-similarity-ada-001": "https://openai.com/blog/introducing-text-and-code-embeddings", + "text-similarity-babbage-001": "https://openai.com/blog/introducing-text-and-code-embeddings", + "text-similarity-curie-001": "https://openai.com/blog/introducing-text-and-code-embeddings", + "text-similarity-davinci-001": "https://openai.com/blog/introducing-text-and-code-embeddings", + "text-search-ada-doc-001": "https://openai.com/blog/introducing-text-and-code-embeddings", + "text-search-ada-query-001": "https://openai.com/blog/introducing-text-and-code-embeddings", + "text-search-ada-001": "https://openai.com/blog/introducing-text-and-code-embeddings", + "text-search-curie-001": "https://openai.com/blog/introducing-text-and-code-embeddings", + "text-search-babbage-001": "https://openai.com/blog/introducing-text-and-code-embeddings", + "text-search-davinci-001": "https://openai.com/blog/introducing-text-and-code-embeddings", + "titan-embed-text-v1": "https://docs.aws.amazon.com/bedrock/latest/userguide/embeddings.html", + "udever-bloom-1b1": "https://huggingface.co/izhx/udever-bloom-1b1", + "udever-bloom-560m": "https://huggingface.co/izhx/udever-bloom-560m", + "universal-sentence-encoder-multilingual-3": "https://huggingface.co/vprelovac/universal-sentence-encoder-multilingual-3", + "universal-sentence-encoder-multilingual-large-3": "https://huggingface.co/vprelovac/universal-sentence-encoder-multilingual-large-3", + "unsup-simcse-bert-base-uncased": "https://huggingface.co/princeton-nlp/unsup-simcse-bert-base-uncased", + "use-cmlm-multilingual": "https://huggingface.co/sentence-transformers/use-cmlm-multilingual", + "voyage-2": "https://docs.voyageai.com/embeddings/", + "voyage-code-2": "https://docs.voyageai.com/embeddings/", + "voyage-law-2": "https://docs.voyageai.com/embeddings/", + "voyage-lite-01-instruct": "https://docs.voyageai.com/embeddings/", + "voyage-lite-02-instruct": "https://docs.voyageai.com/embeddings/", + "xlm-roberta-base": "https://huggingface.co/xlm-roberta-base", + "xlm-roberta-large": "https://huggingface.co/xlm-roberta-large", +} + +EXTERNAL_MODEL_TO_DIM = { + "Cohere-embed-english-v3.0": 1024, + "Cohere-embed-multilingual-v3.0": 1024, + "Cohere-embed-multilingual-light-v3.0": 384, + "all-MiniLM-L12-v2": 384, + "all-MiniLM-L6-v2": 384, + "all-mpnet-base-v2": 768, + "allenai-specter": 768, + "Baichuan-text-embedding": 1024, + "bert-base-10lang-cased": 768, + "bert-base-15lang-cased": 768, + "bert-base-25lang-cased": 768, + "bert-base-multilingual-cased": 768, + "bert-base-multilingual-uncased": 768, + "bert-base-swedish-cased": 768, + "bert-base-uncased": 768, + "bge-base-zh-v1.5": 768, + "bge-large-en-v1.5": 1024, + "bge-large-zh-v1.5": 1024, + "bge-large-zh-noinstruct": 1024, + "bge-small-zh-v1.5": 512, + "camembert-base": 512, + "camembert-large": 768, + "contriever-base-msmarco": 768, + "cross-en-de-roberta-sentence-transformer": 768, + "DanskBERT": 768, + "distilbert-base-25lang-cased": 768, + "distilbert-base-en-fr-cased": 768, + "distilbert-base-en-fr-es-pt-it-cased": 768, + "distilbert-base-fr-cased": 768, + "distilbert-base-uncased": 768, + "distiluse-base-multilingual-cased-v2": 512, + "dfm-encoder-large-v1": 1024, + "dfm-sentence-encoder-large-1": 1024, + "e5-base": 768, + "e5-large": 1024, + "e5-mistral-7b-instruct": 4096, + "e5-small": 384, + "electra-small-nordic": 256, + "electra-small-swedish-cased-discriminator": 256, + "flaubert_base_cased": 768, + "flaubert_base_uncased": 768, + "flaubert_large_cased": 1024, + "luotuo-bert-medium": 768, + "LASER2": 1024, + "LaBSE": 768, + "gbert-base": 768, + "gbert-large": 1024, + "gelectra-base": 768, + "gelectra-large": 1024, + "glove.6B.300d": 300, + "google-gecko.text-embedding-preview-0409": 768, + "google-gecko-256.text-embedding-preview-0409": 256, + "gottbert-base": 768, + "gtr-t5-base": 768, + "gtr-t5-large": 768, + "gtr-t5-xl": 768, + "gtr-t5-xxl": 768, + "herbert-base-retrieval-v2": 768, + "komninos": 300, + "m3e-base": 768, + "m3e-large": 768, + "mistral-embed": 1024, + "msmarco-bert-co-condensor": 768, + "multi-qa-MiniLM-L6-cos-v1": 384, + "multilingual-e5-base": 768, + "multilingual-e5-small": 384, + "multilingual-e5-large": 1024, + "nb-bert-base": 768, + "nb-bert-large": 1024, + "nomic-embed-text-v1.5-64": 64, + "nomic-embed-text-v1.5-128": 128, + "nomic-embed-text-v1.5-256": 256, + "nomic-embed-text-v1.5-512": 512, + "norbert3-base": 768, + "norbert3-large": 1024, + "OpenSearch-text-hybrid": 1792, + "paraphrase-multilingual-MiniLM-L12-v2": 384, + "paraphrase-multilingual-mpnet-base-v2": 768, + "sentence-camembert-base": 768, + "sentence-camembert-large": 1024, + "sentence-croissant-llm-base": 2048, + "sentence-bert-swedish-cased": 768, + "sentence-t5-base": 768, + "sentence-t5-large": 768, + "sentence-t5-xl": 768, + "sentence-t5-xxl": 768, + "silver-retriever-base-v1": 768, + "sup-simcse-bert-base-uncased": 768, + "st-polish-paraphrase-from-distilroberta": 768, + "st-polish-paraphrase-from-mpnet": 768, + "text2vec-base-chinese": 768, + "text2vec-large-chinese": 1024, + "text-embedding-3-large": 3072, + "text-embedding-3-large-256": 256, + "text-embedding-3-small": 1536, + "text-embedding-ada-002": 1536, + "text-similarity-ada-001": 1024, + "text-similarity-babbage-001": 2048, + "text-similarity-curie-001": 4096, + "text-similarity-davinci-001": 12288, + "text-search-ada-doc-001": 1024, + "text-search-ada-query-001": 1024, + "text-search-ada-001": 1024, + "text-search-babbage-001": 2048, + "text-search-curie-001": 4096, + "text-search-davinci-001": 12288, + "titan-embed-text-v1": 1536, + "udever-bloom-1b1": 1536, + "udever-bloom-560m": 1024, + "universal-sentence-encoder-multilingual-3": 512, + "universal-sentence-encoder-multilingual-large-3": 512, + "unsup-simcse-bert-base-uncased": 768, + "use-cmlm-multilingual": 768, + "voyage-2": 1024, + "voyage-code-2": 1536, + "voyage-law-2": 1024, + "voyage-lite-01-instruct": 1024, + "voyage-lite-02-instruct": 1024, + "xlm-roberta-base": 768, + "xlm-roberta-large": 1024, +} + +EXTERNAL_MODEL_TO_SEQLEN = { + "Cohere-embed-english-v3.0": 512, + "Cohere-embed-multilingual-v3.0": 512, + "Cohere-embed-multilingual-light-v3.0": 512, + "all-MiniLM-L12-v2": 512, + "all-MiniLM-L6-v2": 512, + "all-mpnet-base-v2": 514, + "allenai-specter": 512, + "Baichuan-text-embedding": 512, + "bert-base-10lang-cased": 512, + "bert-base-15lang-cased": 512, + "bert-base-25lang-cased": 512, + "bert-base-multilingual-cased": 512, + "bert-base-multilingual-uncased": 512, + "bert-base-swedish-cased": 512, + "bert-base-uncased": 512, + "bge-base-zh-v1.5": 512, + "bge-large-en-v1.5": 512, + "bge-large-zh-v1.5": 512, + "bge-large-zh-noinstruct": 512, + "bge-small-zh-v1.5": 512, + "camembert-base": 512, + "camembert-large": 512, + "contriever-base-msmarco": 512, + "cross-en-de-roberta-sentence-transformer": 514, + "distilbert-base-25lang-cased": 512, + "distilbert-base-en-fr-cased": 512, + "distilbert-base-en-fr-es-pt-it-cased": 512, + "distilbert-base-fr-cased": 512, + "distilbert-base-uncased": 512, + "DanskBERT": 514, + "dfm-encoder-large-v1": 512, + "dfm-sentence-encoder-large-1": 512, + "distiluse-base-multilingual-cased-v2": 512, + "e5-base": 512, + "e5-large": 512, + "e5-mistral-7b-instruct": 32768, + "e5-small": 512, + "electra-small-nordic": 512, + "electra-small-swedish-cased-discriminator": 512, + "flaubert_base_cased": 512, + "flaubert_base_uncased": 512, + "flaubert_large_cased": 512, + "gbert-base": 512, + "gbert-large": 512, + "gelectra-base": 512, + "gelectra-large": 512, + "google-gecko.text-embedding-preview-0409": 2048, + "google-gecko-256.text-embedding-preview-0409": 2048, + "gottbert-base": 512, + "glove.6B.300d": "N/A", + "gtr-t5-base": 512, + "gtr-t5-large": 512, + "gtr-t5-xl": 512, + "gtr-t5-xxl": 512, + "herbert-base-retrieval-v2": 514, + "komninos": "N/A", + "luotuo-bert-medium": 512, + "LASER2": "N/A", + "LaBSE": 512, + "m3e-base": 512, + "m3e-large": 512, +# "mistral-embed": "?", + "msmarco-bert-co-condensor": 512, + "multi-qa-MiniLM-L6-cos-v1": 512, + "multilingual-e5-base": 514, + "multilingual-e5-large": 514, + "multilingual-e5-small": 512, + "nb-bert-base": 512, + "nb-bert-large": 512, + "nomic-embed-text-v1.5-64": 8192, + "nomic-embed-text-v1.5-128": 8192, + "nomic-embed-text-v1.5-256": 8192, + "nomic-embed-text-v1.5-512": 8192, + "norbert3-base": 512, + "norbert3-large": 512, + "OpenSearch-text-hybrid": 512, + "paraphrase-multilingual-MiniLM-L12-v2": 512, + "paraphrase-multilingual-mpnet-base-v2": 514, + "sentence-camembert-base": 512, + "sentence-camembert-large": 512, + "sentence-croissant-llm-base": 2048, + "sentence-bert-swedish-cased": 512, + "sentence-t5-base": 512, + "sentence-t5-large": 512, + "sentence-t5-xl": 512, + "sentence-t5-xxl": 512, + "silver-retriever-base-v1": 514, + "sup-simcse-bert-base-uncased": 512, + "st-polish-paraphrase-from-distilroberta": 514, + "st-polish-paraphrase-from-mpnet": 514, + "text2vec-base-chinese": 512, + "text2vec-large-chinese": 512, + "text-embedding-3-large": 8191, + "text-embedding-3-large-256": 8191, + "text-embedding-3-small": 8191, + "text-embedding-ada-002": 8191, + "text-similarity-ada-001": 2046, + "text-similarity-babbage-001": 2046, + "text-similarity-curie-001": 2046, + "text-similarity-davinci-001": 2046, + "text-search-ada-doc-001": 2046, + "text-search-ada-query-001": 2046, + "text-search-ada-001": 2046, + "text-search-babbage-001": 2046, + "text-search-curie-001": 2046, + "text-search-davinci-001": 2046, + "titan-embed-text-v1": 8000, + "udever-bloom-1b1": 2048, + "udever-bloom-560m": 2048, + "universal-sentence-encoder-multilingual-3": 512, + "universal-sentence-encoder-multilingual-large-3": 512, + "use-cmlm-multilingual": 512, + "unsup-simcse-bert-base-uncased": 512, + "voyage-2": 1024, + "voyage-code-2": 16000, + "voyage-law-2": 4000, + "voyage-lite-01-instruct": 4000, + "voyage-lite-02-instruct": 4000, + "xlm-roberta-base": 514, + "xlm-roberta-large": 514, +} + +EXTERNAL_MODEL_TO_SIZE = { + "allenai-specter": 110, + "all-MiniLM-L12-v2": 33, + "all-MiniLM-L6-v2": 23, + "all-mpnet-base-v2": 110, + "bert-base-10lang-cased": 138, + "bert-base-15lang-cased": 138, + "bert-base-25lang-cased": 138, + "bert-base-multilingual-cased": 179, + "bert-base-multilingual-uncased": 168, + "bert-base-uncased": 110, + "bert-base-swedish-cased": 125, + "bge-base-zh-v1.5": 102, + "bge-large-zh-v1.5": 326, + "bge-large-zh-noinstruct": 326, + "bge-small-zh-v1.5": 24, + "camembert-base": 111, + "camembert-large": 338, + "cross-en-de-roberta-sentence-transformer": 278, + "contriever-base-msmarco": 110, + "distilbert-base-25lang-cased": 110, + "distilbert-base-en-fr-cased": 110, + "distilbert-base-en-fr-es-pt-it-cased": 110, + "distilbert-base-fr-cased": 110, + "distilbert-base-uncased": 110, + "DanskBERT": 125, + "distiluse-base-multilingual-cased-v2": 135, + "dfm-encoder-large-v1": 355, + "dfm-sentence-encoder-large-1": 355, + "e5-base": 110, + "e5-large": 335, + "e5-mistral-7b-instruct": 7111, + "e5-small": 33, + "electra-small-nordic": 23, + "electra-small-swedish-cased-discriminator": 16, + "flaubert_base_cased": 138, + "flaubert_base_uncased": 138, + "flaubert_large_cased": 372, + "gbert-base": 110, + "gbert-large": 337, + "gelectra-base": 110, + "gelectra-large": 335, + "glove.6B.300d": 120, + "google-gecko.text-embedding-preview-0409": 1200, + "google-gecko-256.text-embedding-preview-0409": 1200, + "gottbert-base": 127, + "gtr-t5-base": 110, + "gtr-t5-large": 168, + "gtr-t5-xl": 1240, + "gtr-t5-xxl": 4865, + "herbert-base-retrieval-v2": 125, + "komninos": 134, + "luotuo-bert-medium": 328, + "LASER2": 43, + "LaBSE": 471, + "m3e-base": 102, + "m3e-large": 102, + "msmarco-bert-co-condensor": 110, + "multi-qa-MiniLM-L6-cos-v1": 23, + "multilingual-e5-base": 278, + "multilingual-e5-small": 118, + "multilingual-e5-large": 560, + "nb-bert-base": 179, + "nb-bert-large": 355, + "nomic-embed-text-v1.5-64": 138, + "nomic-embed-text-v1.5-128": 138, + "nomic-embed-text-v1.5-256": 138, + "nomic-embed-text-v1.5-512": 138, + "norbert3-base": 131, + "norbert3-large": 368, + "paraphrase-multilingual-mpnet-base-v2": 278, + "paraphrase-multilingual-MiniLM-L12-v2": 118, + "sentence-camembert-base": 110, + "sentence-camembert-large": 337, + "sentence-croissant-llm-base": 1280, + "sentence-bert-swedish-cased": 125, + "sentence-t5-base": 110, + "sentence-t5-large": 168, + "sentence-t5-xl": 1240, + "sentence-t5-xxl": 4865, + "silver-retriever-base-v1": 125, + "sup-simcse-bert-base-uncased": 110, + "st-polish-paraphrase-from-distilroberta": 125, + "st-polish-paraphrase-from-mpnet": 125, + "text2vec-base-chinese": 102, + "text2vec-large-chinese": 326, + "unsup-simcse-bert-base-uncased": 110, + "use-cmlm-multilingual": 472, + #"voyage-law-2": 1220, + "voyage-lite-02-instruct": 1220, + "xlm-roberta-base": 279, + "xlm-roberta-large": 560, +} PROPRIETARY_MODELS = { - make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}")) + "Cohere-embed-english-v3.0", + "Cohere-embed-multilingual-v3.0", + "Cohere-embed-multilingual-light-v3.0", + "Baichuan-text-embedding", + "mistral-embed", + "OpenSearch-text-hybrid", + "text-embedding-3-small", + "text-embedding-3-large", + "text-embedding-3-large-256", + "text-embedding-ada-002", + "text-similarity-ada-001", + "text-similarity-babbage-001", + "text-similarity-curie-001", + "text-similarity-davinci-001", + "text-search-ada-doc-001", + "text-search-ada-query-001", + "text-search-ada-001", + "text-search-curie-001", + "text-search-babbage-001", + "text-search-davinci-001", + "titan-embed-text-v1", + "voyage-2", + "voyage-code-2", + "voyage-law-2", + "voyage-lite-01-instruct", + "voyage-lite-02-instruct", + "google-gecko.text-embedding-preview-0409", + "google-gecko-256.text-embedding-preview-0409", +} +PROPRIETARY_MODELS = { + make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, "https://huggingface.co/spaces/mteb/leaderboard")) for model in PROPRIETARY_MODELS } + SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = { - make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}")) - for model in SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS -} -INSTRUCT_MODELS = { - make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}")) - for model in INSTRUCT_MODELS -} -NOINSTRUCT_MODELS = { - make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}")) - for model in NOINSTRUCT_MODELS + "allenai-specter", + "allenai-specter", + "all-MiniLM-L12-v2", + "all-MiniLM-L6-v2", + "all-mpnet-base-v2", + "bert-base-10lang-cased", + "bert-base-15lang-cased", + "bert-base-25lang-cased", + "bert-base-multilingual-cased", + "bert-base-multilingual-uncased", + "bert-base-swedish-cased", + "bert-base-uncased", + "bge-base-zh-v1.5", + "bge-large-zh-v1.5", + "bge-large-zh-noinstruct", + "bge-small-zh-v1.5", + "camembert-base", + "camembert-large", + "contriever-base-msmarco", + "cross-en-de-roberta-sentence-transformer", + "DanskBERT", + "distilbert-base-25lang-cased", + "distilbert-base-en-fr-cased", + "distilbert-base-en-fr-es-pt-it-cased", + "distilbert-base-fr-cased", + "distilbert-base-uncased", + "distiluse-base-multilingual-cased-v2", + "dfm-encoder-large-v1", + "dfm-sentence-encoder-large-1", + "e5-base", + "e5-large", + "e5-mistral-7b-instruct", + "e5-small", + "electra-small-nordic", + "electra-small-swedish-cased-discriminator", + "flaubert_base_cased", + "flaubert_base_uncased", + "flaubert_large_cased", + "gbert-base", + "gbert-large", + "gelectra-base", + "gelectra-large", + "glove.6B.300d", + "gottbert-base", + "gtr-t5-base", + "gtr-t5-large", + "gtr-t5-xl", + "gtr-t5-xxl", + "herbert-base-retrieval-v2", + "komninos", + "luotuo-bert-medium", + "LaBSE", + "m3e-base", + "m3e-large", + "msmarco-bert-co-condensor", + "multi-qa-MiniLM-L6-cos-v1", + "multilingual-e5-base", + "multilingual-e5-large", + "multilingual-e5-small", + "nb-bert-base", + "nb-bert-large", + "nomic-embed-text-v1.5-64", + "nomic-embed-text-v1.5-128", + "nomic-embed-text-v1.5-256", + "nomic-embed-text-v1.5-512", + "norbert3-base", + "norbert3-large", + "paraphrase-multilingual-mpnet-base-v2", + "paraphrase-multilingual-MiniLM-L12-v2", + "sentence-camembert-base", + "sentence-camembert-large", + "sentence-croissant-llm-base", + "sentence-bert-swedish-cased", + "sentence-t5-base", + "sentence-t5-large", + "sentence-t5-xl", + "sentence-t5-xxl", + "silver-retriever-base-v1", + "sup-simcse-bert-base-uncased", + "st-polish-paraphrase-from-distilroberta", + "st-polish-paraphrase-from-mpnet", + "text2vec-base-chinese", + "text2vec-large-chinese", + "udever-bloom-1b1", + "udever-bloom-560m", + "universal-sentence-encoder-multilingual-3", + "universal-sentence-encoder-multilingual-large-3", + "unsup-simcse-bert-base-uncased", + "use-cmlm-multilingual", + "xlm-roberta-base", + "xlm-roberta-large", } -CROSS_ENCODERS = { - make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}")) - for model in CROSS_ENCODERS +SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = { + make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, "https://huggingface.co/spaces/mteb/leaderboard")) + for model in SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS } -BI_ENCODERS = { - make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}")) - for model in BI_ENCODERS + +MODELS_TO_SKIP = { + "baseplate/instructor-large-1", # Duplicate + "radames/e5-large", # Duplicate + "gentlebowl/instructor-large-safetensors", # Duplicate + "Consensus/instructor-base", # Duplicate + "GovCompete/instructor-xl", # Duplicate + "GovCompete/e5-large-v2", # Duplicate + "t12e/instructor-base", # Duplicate + "michaelfeil/ct2fast-e5-large-v2", + "michaelfeil/ct2fast-e5-large", + "michaelfeil/ct2fast-e5-small-v2", + "newsrx/instructor-xl-newsrx", + "newsrx/instructor-large-newsrx", + "fresha/e5-large-v2-endpoint", + "ggrn/e5-small-v2", + "michaelfeil/ct2fast-e5-small", + "jncraton/e5-small-v2-ct2-int8", + "anttip/ct2fast-e5-small-v2-hfie", + "newsrx/instructor-large", + "newsrx/instructor-xl", + "dmlls/all-mpnet-base-v2", + "cgldo/semanticClone", + "Malmuk1/e5-large-v2_Sharded", + "jncraton/gte-small-ct2-int8", + "Einas/einas_ashkar", + "gruber/e5-small-v2-ggml", + "jncraton/bge-small-en-ct2-int8", + "vectoriseai/bge-small-en", + "recipe/embeddings", + "dhairya0907/thenlper-get-large", + "Narsil/bge-base-en", + "kozistr/fused-large-en", + "sionic-ai/sionic-ai-v2", # Wait for https://huggingface.co/sionic-ai/sionic-ai-v2/discussions/1 + "sionic-ai/sionic-ai-v1", # Wait for https://huggingface.co/sionic-ai/sionic-ai-v2/discussions/1 + "BAAI/bge-large-en", # Deprecated in favor of v1.5 + "BAAI/bge-base-en", # Deprecated in favor of v1.5 + "BAAI/bge-small-en", # Deprecated in favor of v1.5 + "d0rj/e5-large-en-ru", + "d0rj/e5-base-en-ru", + "d0rj/e5-small-en-ru", + "aident-ai/bge-base-en-onnx", + "barisaydin/bge-base-en", + "barisaydin/gte-large", + "barisaydin/gte-base", + "barisaydin/gte-small", + "barisaydin/bge-small-en", + "odunola/e5-base-v2", + "goldenrooster/multilingual-e5-large", + "davidpeer/gte-small", + "barisaydin/bge-large-en", + "jamesgpt1/english-large-v1", + "vectoriseai/bge-large-en-v1.5", + "vectoriseai/bge-base-en-v1.5", + "vectoriseai/instructor-large", + "vectoriseai/instructor-base", + "vectoriseai/gte-large", + "vectoriseai/gte-base", + "vectoriseai/e5-large-v2", + "vectoriseai/bge-small-en-v1.5", + "vectoriseai/e5-base-v2", + "vectoriseai/e5-large", + "vectoriseai/multilingual-e5-large", + "vectoriseai/gte-small", + "vectoriseai/ember-v1", + "vectoriseai/e5-base", + "vectoriseai/e5-small-v2", + "michaelfeil/ct2fast-bge-large-en-v1.5", + "michaelfeil/ct2fast-bge-large-en-v1.5", + "michaelfeil/ct2fast-bge-base-en-v1.5", + "michaelfeil/ct2fast-gte-large", + "michaelfeil/ct2fast-gte-base", + "michaelfeil/ct2fast-bge-small-en-v1.5", + "rizki/bgr-tf", + "ef-zulla/e5-multi-sml-torch", + "cherubhao/yogamodel", + "morgendigital/multilingual-e5-large-quantized", + "jncraton/gte-tiny-ct2-int8", + "Research2NLP/electrical_stella", + "Intel/bge-base-en-v1.5-sts-int8-static", + "Intel/bge-base-en-v1.5-sts-int8-dynamic", + "Intel/bge-base-en-v1.5-sst2", + "Intel/bge-base-en-v1.5-sst2-int8-static", + "Intel/bge-base-en-v1.5-sst2-int8-dynamic", + "Intel/bge-small-en-v1.5-sst2", + "Intel/bge-small-en-v1.5-sst2-int8-dynamic", + "Intel/bge-small-en-v1.5-sst2-int8-static", + "binqiangliu/EmbeddingModlebgelargeENv1.5", + "DecisionOptimizationSystem/DeepFeatEmbeddingLargeContext", + "woody72/multilingual-e5-base", + "Severian/embed", + "Frazic/udever-bloom-3b-sentence", + "jamesgpt1/zzz", + "karrar-alwaili/UAE-Large-V1", + "odunola/UAE-Large-VI", + "shubham-bgi/UAE-Large", + "retrainai/instructor-xl", + "weakit-v/bge-base-en-v1.5-onnx", + "ieasybooks/multilingual-e5-large-onnx", + "gizmo-ai/Cohere-embed-multilingual-v3.0", + "jingyeom/korean_embedding_model", + "barisaydin/text2vec-base-multilingual", + "mlx-community/multilingual-e5-large-mlx", + "mlx-community/multilingual-e5-base-mlx", + "mlx-community/multilingual-e5-small-mlx", + "maiyad/multilingual-e5-small", + "khoa-klaytn/bge-base-en-v1.5-angle", + "khoa-klaytn/bge-small-en-v1.5-angle", + "mixamrepijey/instructor-small", + "mixamrepijey/instructor-models", + "lsf1000/bge-evaluation", # Empty + "giulio98/placeholder", # Empty + "Severian/nomic", # Copy + "atian-chapters/Chapters-SFR-Embedding-Mistral", # Copy + "rlsChapters/Chapters-SFR-Embedding-Mistral", # Copy + "TitanML/jina-v2-base-en-embed", # Copy + "MaziyarPanahi/GritLM-8x7B-GGUF", # GGUF variant + "Geolumina/instructor-xl", # Duplicate + "krilecy/e5-mistral-7b-instruct", + "beademiguelperez/sentence-transformers-multilingual-e5-small", + "arcdev/SFR-Embedding-Mistral", + "arcdev/e5-mistral-7b-instruct", + "Koat/gte-tiny", + "SmartComponents/bge-micro-v2", } +def add_lang(examples): + if not(examples["eval_language"]): + examples["mteb_dataset_name_with_lang"] = examples["mteb_dataset_name"] + else: + examples["mteb_dataset_name_with_lang"] = examples["mteb_dataset_name"] + f' ({examples["eval_language"]})' + return examples + +def norm(names): return set([name.split(" ")[0] for name in names]) + +def add_task(examples): + # Could be added to the dataset loading script instead + if examples["mteb_dataset_name"] in norm(TASK_LIST_CLASSIFICATION + TASK_LIST_CLASSIFICATION_DA + TASK_LIST_CLASSIFICATION_FR + TASK_LIST_CLASSIFICATION_NB + TASK_LIST_CLASSIFICATION_PL + TASK_LIST_CLASSIFICATION_SV + TASK_LIST_CLASSIFICATION_ZH): + examples["mteb_task"] = "Classification" + elif examples["mteb_dataset_name"] in norm(TASK_LIST_CLUSTERING + TASK_LIST_CLUSTERING_DE + TASK_LIST_CLUSTERING_FR + TASK_LIST_CLUSTERING_PL + TASK_LIST_CLUSTERING_ZH): + examples["mteb_task"] = "Clustering" + elif examples["mteb_dataset_name"] in norm(TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_PAIR_CLASSIFICATION_FR + TASK_LIST_PAIR_CLASSIFICATION_PL + TASK_LIST_PAIR_CLASSIFICATION_ZH): + examples["mteb_task"] = "PairClassification" + elif examples["mteb_dataset_name"] in norm(TASK_LIST_RERANKING + TASK_LIST_RERANKING_FR + TASK_LIST_RERANKING_ZH): + examples["mteb_task"] = "Reranking" + elif examples["mteb_dataset_name"] in norm(TASK_LIST_RETRIEVAL_NORM + TASK_LIST_RETRIEVAL_FR + TASK_LIST_RETRIEVAL_PL + TASK_LIST_RETRIEVAL_ZH + TASK_LIST_RETRIEVAL_LAW): + examples["mteb_task"] = "Retrieval" + elif examples["mteb_dataset_name"] in norm(TASK_LIST_STS + TASK_LIST_STS_FR + TASK_LIST_STS_PL + TASK_LIST_STS_ZH): + examples["mteb_task"] = "STS" + elif examples["mteb_dataset_name"] in norm(TASK_LIST_SUMMARIZATION + TASK_LIST_SUMMARIZATION_FR): + examples["mteb_task"] = "Summarization" + elif examples["mteb_dataset_name"] in norm(TASK_LIST_BITEXT_MINING + TASK_LIST_BITEXT_MINING_DA): + examples["mteb_task"] = "BitextMining" + else: + print("WARNING: Task not found for dataset", examples["mteb_dataset_name"]) + examples["mteb_task"] = "Unknown" + return examples + +if os.path.exists("EXTERNAL_MODEL_RESULTS.json"): + with open("EXTERNAL_MODEL_RESULTS.json") as f: + EXTERNAL_MODEL_RESULTS = json.load(f) + # Update with models not contained + models_to_run = [] + for model in EXTERNAL_MODELS: + if model not in EXTERNAL_MODEL_RESULTS: + models_to_run.append(model) + EXTERNAL_MODEL_RESULTS[model] = {k: {v: []} for k, v in TASK_TO_METRIC.items()} +else: + EXTERNAL_MODEL_RESULTS = {model: {k: {v: []} for k, v in TASK_TO_METRIC.items()} for model in EXTERNAL_MODELS} + models_to_run = EXTERNAL_MODELS + +pbar = tqdm(models_to_run, desc="Fetching external model results") +for model in pbar: + pbar.set_description(f"Fetching external model results for {model!r}") + ds = load_dataset("mteb/results", model, trust_remote_code=True) + # For local debugging: + #, download_mode='force_redownload', verification_mode="no_checks") + ds = ds.map(add_lang) + ds = ds.map(add_task) + base_dict = {"Model": make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, "https://huggingface.co/spaces/mteb/leaderboard"))} + # For now only one metric per task - Could add more metrics lateron + for task, metric in TASK_TO_METRIC.items(): + ds_dict = ds.filter(lambda x: (x["mteb_task"] == task) and (x["metric"] == metric))["test"].to_dict() + ds_dict = {k: round(v, 2) for k, v in zip(ds_dict["mteb_dataset_name_with_lang"], ds_dict["score"])} + EXTERNAL_MODEL_RESULTS[model][task][metric].append({**base_dict, **ds_dict}) + +# Save & cache EXTERNAL_MODEL_RESULTS +with open("EXTERNAL_MODEL_RESULTS.json", "w") as f: + json.dump(EXTERNAL_MODEL_RESULTS, f) + +def get_dim_seq_size(model): + filenames = [sib.rfilename for sib in model.siblings] + dim, seq = "", "" + for filename in filenames: + if re.match("\d+_Pooling/config.json", filename): + st_config_path = hf_hub_download(model.modelId, filename=filename) + dim = json.load(open(st_config_path)).get("word_embedding_dimension", "") + break + for filename in filenames: + if re.match("\d+_Dense/config.json", filename): + st_config_path = hf_hub_download(model.modelId, filename=filename) + dim = json.load(open(st_config_path)).get("out_features", dim) + if "config.json" in filenames: + config_path = hf_hub_download(model.modelId, filename="config.json") + config = json.load(open(config_path)) + if not dim: + dim = config.get("hidden_dim", config.get("hidden_size", config.get("d_model", ""))) + seq = config.get("n_positions", config.get("max_position_embeddings", config.get("n_ctx", config.get("seq_length", "")))) + # Get model file size without downloading. Parameters in million parameters and memory in GB + parameters, memory = get_model_parameters_memory(model) + return dim, seq, parameters, memory + def make_datasets_clickable(df): """Does not work""" if "BornholmBitextMining" in df.columns: @@ -43,6 +1266,405 @@ def make_datasets_clickable(df): columns={f'BornholmBitextMining': 'BornholmBitextMining',}) return df +def add_rank(df): + cols_to_rank = [col for col in df.columns if col not in ["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Embedding Dimensions", "Max Tokens"]] + if len(cols_to_rank) == 1: + df.sort_values(cols_to_rank[0], ascending=False, inplace=True) + else: + df.insert(len(df.columns) - len(cols_to_rank), "Average", df[cols_to_rank].mean(axis=1, skipna=False)) + df.sort_values("Average", ascending=False, inplace=True) + df.insert(0, "Rank", list(range(1, len(df) + 1))) + df = df.round(2) + # Fill NaN after averaging + df.fillna("", inplace=True) + return df + +def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_emb_dim=True, task_to_metric=TASK_TO_METRIC, rank=True): + api = HfApi() + models = api.list_models(filter="mteb") + # Initialize list to models that we cannot fetch metadata from + df_list = [] + for model in EXTERNAL_MODEL_RESULTS: + results_list = [res for task in tasks for res in EXTERNAL_MODEL_RESULTS[model][task][task_to_metric[task]]] + if len(datasets) > 0: + res = {k: v for d in results_list for k, v in d.items() if (k == "Model") or any([x in k for x in datasets])} + elif langs: + # Would be cleaner to rely on an extra language column instead + langs_format = [f"({lang})" for lang in langs] + res = {k: v for d in results_list for k, v in d.items() if any([k.split(" ")[-1] in (k, x) for x in langs_format])} + else: + res = {k: v for d in results_list for k, v in d.items()} + # Model & at least one result + if len(res) > 1: + if add_emb_dim: + res["Model Size (Million Parameters)"] = EXTERNAL_MODEL_TO_SIZE.get(model, "") + res["Memory Usage (GB, fp32)"] = round(res["Model Size (Million Parameters)"] * 1e6 * 4 / 1024**3, 2) if res["Model Size (Million Parameters)"] != "" else "" + res["Embedding Dimensions"] = EXTERNAL_MODEL_TO_DIM.get(model, "") + res["Max Tokens"] = EXTERNAL_MODEL_TO_SEQLEN.get(model, "") + df_list.append(res) + + for model in models: + if model.modelId in MODELS_TO_SKIP: continue + print("MODEL", model) + readme_path = hf_hub_download(model.modelId, filename="README.md") + meta = metadata_load(readme_path) + if "model-index" not in meta: + continue + # meta['model-index'][0]["results"] is list of elements like: + # { + # "task": {"type": "Classification"}, + # "dataset": { + # "type": "mteb/amazon_massive_intent", + # "name": "MTEB MassiveIntentClassification (nb)", + # "config": "nb", + # "split": "test", + # }, + # "metrics": [ + # {"type": "accuracy", "value": 39.81506388702084}, + # {"type": "f1", "value": 38.809586587791664}, + # ], + # }, + # Use "get" instead of dict indexing to skip incompat metadata instead of erroring out + if len(datasets) > 0: + task_results = [sub_res for sub_res in meta["model-index"][0]["results"] if (sub_res.get("task", {}).get("type", "") in tasks) and any([x in sub_res.get("dataset", {}).get("name", "") for x in datasets])] + elif langs: + task_results = [sub_res for sub_res in meta["model-index"][0]["results"] if (sub_res.get("task", {}).get("type", "") in tasks) and (sub_res.get("dataset", {}).get("config", "default") in ("default", *langs))] + else: + task_results = [sub_res for sub_res in meta["model-index"][0]["results"] if (sub_res.get("task", {}).get("type", "") in tasks)] + out = [{res["dataset"]["name"].replace("MTEB ", ""): [round(score["value"], 2) for score in res["metrics"] if score["type"] == task_to_metric.get(res["task"]["type"])][0]} for res in task_results] + out = {k: v for d in out for k, v in d.items()} + out["Model"] = make_clickable_model(model.modelId) + # Model & at least one result + if len(out) > 1: + if add_emb_dim: + try: + # Fails on gated repos, so we only include scores for them + out["Embedding Dimensions"], out["Max Tokens"], out["Model Size (Million Parameters)"], out["Memory Usage (GB, fp32)"] = get_dim_seq_size(model) + except: + pass + df_list.append(out) + if model.library_name == "sentence-transformers" or "sentence-transformers" in model.tags or "modules.json" in {file.rfilename for file in model.siblings}: + SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS.add(out["Model"]) + df = pd.DataFrame(df_list) + # If there are any models that are the same, merge them + # E.g. if out["Model"] has the same value in two places, merge & take whichever one is not NaN else just take the first one + df = df.groupby("Model", as_index=False).first() + # Put 'Model' column first + cols = sorted(list(df.columns)) + cols.insert(0, cols.pop(cols.index("Model"))) + df = df[cols] + if rank: + df = add_rank(df) + if fillna: + df.fillna("", inplace=True) + return df + +def get_mteb_average(): + global DATA_OVERALL, DATA_CLASSIFICATION_EN, DATA_CLUSTERING, DATA_PAIR_CLASSIFICATION, DATA_RERANKING, DATA_RETRIEVAL, DATA_STS_EN, DATA_SUMMARIZATION + DATA_OVERALL = get_mteb_data( + tasks=[ + "Classification", + "Clustering", + "PairClassification", + "Reranking", + "Retrieval", + "STS", + "Summarization", + ], + datasets=TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS + TASK_LIST_SUMMARIZATION, + fillna=False, + add_emb_dim=True, + rank=False, + ) + # Debugging: + # DATA_OVERALL.to_csv("overall.csv") + + DATA_OVERALL.insert(1, f"Average ({len(TASK_LIST_EN)} datasets)", DATA_OVERALL[TASK_LIST_EN].mean(axis=1, skipna=False)) + DATA_OVERALL.insert(2, f"Classification Average ({len(TASK_LIST_CLASSIFICATION)} datasets)", DATA_OVERALL[TASK_LIST_CLASSIFICATION].mean(axis=1, skipna=False)) + DATA_OVERALL.insert(3, f"Clustering Average ({len(TASK_LIST_CLUSTERING)} datasets)", DATA_OVERALL[TASK_LIST_CLUSTERING].mean(axis=1, skipna=False)) + DATA_OVERALL.insert(4, f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION)} datasets)", DATA_OVERALL[TASK_LIST_PAIR_CLASSIFICATION].mean(axis=1, skipna=False)) + DATA_OVERALL.insert(5, f"Reranking Average ({len(TASK_LIST_RERANKING)} datasets)", DATA_OVERALL[TASK_LIST_RERANKING].mean(axis=1, skipna=False)) + DATA_OVERALL.insert(6, f"Retrieval Average ({len(TASK_LIST_RETRIEVAL)} datasets)", DATA_OVERALL[TASK_LIST_RETRIEVAL].mean(axis=1, skipna=False)) + DATA_OVERALL.insert(7, f"STS Average ({len(TASK_LIST_STS)} datasets)", DATA_OVERALL[TASK_LIST_STS].mean(axis=1, skipna=False)) + DATA_OVERALL.insert(8, f"Summarization Average ({len(TASK_LIST_SUMMARIZATION)} dataset)", DATA_OVERALL[TASK_LIST_SUMMARIZATION].mean(axis=1, skipna=False)) + DATA_OVERALL.sort_values(f"Average ({len(TASK_LIST_EN)} datasets)", ascending=False, inplace=True) + # Start ranking from 1 + DATA_OVERALL.insert(0, "Rank", list(range(1, len(DATA_OVERALL) + 1))) + + DATA_OVERALL = DATA_OVERALL.round(2) + + DATA_CLASSIFICATION_EN = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_CLASSIFICATION]) + # Only keep rows with at least one score in addition to the "Model" & rank column + DATA_CLASSIFICATION_EN = DATA_CLASSIFICATION_EN[DATA_CLASSIFICATION_EN.iloc[:, 4:].ne("").any(axis=1)] + + DATA_CLUSTERING = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_CLUSTERING]) + DATA_CLUSTERING = DATA_CLUSTERING[DATA_CLUSTERING.iloc[:, 4:].ne("").any(axis=1)] + + DATA_PAIR_CLASSIFICATION = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_PAIR_CLASSIFICATION]) + DATA_PAIR_CLASSIFICATION = DATA_PAIR_CLASSIFICATION[DATA_PAIR_CLASSIFICATION.iloc[:, 4:].ne("").any(axis=1)] + + DATA_RERANKING = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_RERANKING]) + DATA_RERANKING = DATA_RERANKING[DATA_RERANKING.iloc[:, 4:].ne("").any(axis=1)] + + DATA_RETRIEVAL = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_RETRIEVAL]) + DATA_RETRIEVAL = DATA_RETRIEVAL[DATA_RETRIEVAL.iloc[:, 4:].ne("").any(axis=1)] + + DATA_STS_EN = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_STS]) + DATA_STS_EN = DATA_STS_EN[DATA_STS_EN.iloc[:, 4:].ne("").any(axis=1)] + + DATA_SUMMARIZATION = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_SUMMARIZATION]) + DATA_SUMMARIZATION = DATA_SUMMARIZATION[DATA_SUMMARIZATION.iloc[:, 1:].ne("").any(axis=1)] + + # Fill NaN after averaging + DATA_OVERALL.fillna("", inplace=True) + + DATA_OVERALL = DATA_OVERALL[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Embedding Dimensions", "Max Tokens", f"Average ({len(TASK_LIST_EN)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION)} datasets)", f"Reranking Average ({len(TASK_LIST_RERANKING)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL)} datasets)", f"STS Average ({len(TASK_LIST_STS)} datasets)", f"Summarization Average ({len(TASK_LIST_SUMMARIZATION)} dataset)"]] + DATA_OVERALL = DATA_OVERALL[DATA_OVERALL.iloc[:, 5:].ne("").any(axis=1)] + + return DATA_OVERALL + +def get_mteb_average_zh(): + global DATA_OVERALL_ZH, DATA_CLASSIFICATION_ZH, DATA_CLUSTERING_ZH, DATA_PAIR_CLASSIFICATION_ZH, DATA_RERANKING_ZH, DATA_RETRIEVAL_ZH, DATA_STS_ZH + DATA_OVERALL_ZH = get_mteb_data( + tasks=[ + "Classification", + "Clustering", + "PairClassification", + "Reranking", + "Retrieval", + "STS", + ], + datasets=TASK_LIST_CLASSIFICATION_ZH + TASK_LIST_CLUSTERING_ZH + TASK_LIST_PAIR_CLASSIFICATION_ZH + TASK_LIST_RERANKING_ZH + TASK_LIST_RETRIEVAL_ZH + TASK_LIST_STS_ZH, + fillna=False, + add_emb_dim=True, + rank=False, + ) + # Debugging: + # DATA_OVERALL_ZH.to_csv("overall.csv") + + DATA_OVERALL_ZH.insert(1, f"Average ({len(TASK_LIST_ZH)} datasets)", DATA_OVERALL_ZH[TASK_LIST_ZH].mean(axis=1, skipna=False)) + DATA_OVERALL_ZH.insert(2, f"Classification Average ({len(TASK_LIST_CLASSIFICATION_ZH)} datasets)", DATA_OVERALL_ZH[TASK_LIST_CLASSIFICATION_ZH].mean(axis=1, skipna=False)) + DATA_OVERALL_ZH.insert(3, f"Clustering Average ({len(TASK_LIST_CLUSTERING_ZH)} datasets)", DATA_OVERALL_ZH[TASK_LIST_CLUSTERING_ZH].mean(axis=1, skipna=False)) + DATA_OVERALL_ZH.insert(4, f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_ZH)} datasets)", DATA_OVERALL_ZH[TASK_LIST_PAIR_CLASSIFICATION_ZH].mean(axis=1, skipna=False)) + DATA_OVERALL_ZH.insert(5, f"Reranking Average ({len(TASK_LIST_RERANKING_ZH)} datasets)", DATA_OVERALL_ZH[TASK_LIST_RERANKING_ZH].mean(axis=1, skipna=False)) + DATA_OVERALL_ZH.insert(6, f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_ZH)} datasets)", DATA_OVERALL_ZH[TASK_LIST_RETRIEVAL_ZH].mean(axis=1, skipna=False)) + DATA_OVERALL_ZH.insert(7, f"STS Average ({len(TASK_LIST_STS_ZH)} datasets)", DATA_OVERALL_ZH[TASK_LIST_STS_ZH].mean(axis=1, skipna=False)) + DATA_OVERALL_ZH.sort_values(f"Average ({len(TASK_LIST_ZH)} datasets)", ascending=False, inplace=True) + # Start ranking from 1 + DATA_OVERALL_ZH.insert(0, "Rank", list(range(1, len(DATA_OVERALL_ZH) + 1))) + + DATA_OVERALL_ZH = DATA_OVERALL_ZH.round(2) + + DATA_CLASSIFICATION_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_CLASSIFICATION_ZH]) + # Only keep rows with at least one score in addition to the "Model" & rank column + DATA_CLASSIFICATION_ZH = DATA_CLASSIFICATION_ZH[DATA_CLASSIFICATION_ZH.iloc[:, 4:].ne("").any(axis=1)] + + DATA_CLUSTERING_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_CLUSTERING_ZH]) + DATA_CLUSTERING_ZH = DATA_CLUSTERING_ZH[DATA_CLUSTERING_ZH.iloc[:, 4:].ne("").any(axis=1)] + + DATA_PAIR_CLASSIFICATION_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_PAIR_CLASSIFICATION_ZH]) + DATA_PAIR_CLASSIFICATION_ZH = DATA_PAIR_CLASSIFICATION_ZH[DATA_PAIR_CLASSIFICATION_ZH.iloc[:, 4:].ne("").any(axis=1)] + + DATA_RERANKING_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_RERANKING_ZH]) + DATA_RERANKING_ZH = DATA_RERANKING_ZH[DATA_RERANKING_ZH.iloc[:, 4:].ne("").any(axis=1)] + + DATA_RETRIEVAL_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_RETRIEVAL_ZH]) + DATA_RETRIEVAL_ZH = DATA_RETRIEVAL_ZH[DATA_RETRIEVAL_ZH.iloc[:, 4:].ne("").any(axis=1)] + + DATA_STS_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_STS_ZH]) + DATA_STS_ZH = DATA_STS_ZH[DATA_STS_ZH.iloc[:, 4:].ne("").any(axis=1)] + + # Fill NaN after averaging + DATA_OVERALL_ZH.fillna("", inplace=True) + + DATA_OVERALL_ZH = DATA_OVERALL_ZH[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Embedding Dimensions", "Max Tokens", f"Average ({len(TASK_LIST_ZH)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION_ZH)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING_ZH)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_ZH)} datasets)", f"Reranking Average ({len(TASK_LIST_RERANKING_ZH)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_ZH)} datasets)", f"STS Average ({len(TASK_LIST_STS_ZH)} datasets)"]] + DATA_OVERALL_ZH = DATA_OVERALL_ZH[DATA_OVERALL_ZH.iloc[:, 5:].ne("").any(axis=1)] + + return DATA_OVERALL_ZH + +def get_mteb_average_fr(): + global DATA_OVERALL_FR, DATA_CLASSIFICATION_FR, DATA_CLUSTERING_FR, DATA_PAIR_CLASSIFICATION_FR, DATA_RERANKING_FR, DATA_RETRIEVAL_FR, DATA_STS_FR, DATA_SUMMARIZATION_FR + DATA_OVERALL_FR = get_mteb_data( + tasks=[ + "Classification", + "Clustering", + "PairClassification", + "Reranking", + "Retrieval", + "STS", + "Summarization" + ], + datasets=TASK_LIST_CLASSIFICATION_FR + TASK_LIST_CLUSTERING_FR + TASK_LIST_PAIR_CLASSIFICATION_FR + TASK_LIST_RERANKING_FR + TASK_LIST_RETRIEVAL_FR + TASK_LIST_STS_FR + TASK_LIST_SUMMARIZATION_FR, + fillna=False, + add_emb_dim=True, + rank=False, + ) + # Debugging: + # DATA_OVERALL_FR.to_csv("overall.csv") + + DATA_OVERALL_FR.insert(1, f"Average ({len(TASK_LIST_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_FR].mean(axis=1, skipna=False)) + DATA_OVERALL_FR.insert(2, f"Classification Average ({len(TASK_LIST_CLASSIFICATION_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_CLASSIFICATION_FR].mean(axis=1, skipna=False)) + DATA_OVERALL_FR.insert(3, f"Clustering Average ({len(TASK_LIST_CLUSTERING_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_CLUSTERING_FR].mean(axis=1, skipna=False)) + DATA_OVERALL_FR.insert(4, f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_PAIR_CLASSIFICATION_FR].mean(axis=1, skipna=False)) + DATA_OVERALL_FR.insert(5, f"Reranking Average ({len(TASK_LIST_RERANKING_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_RERANKING_FR].mean(axis=1, skipna=False)) + DATA_OVERALL_FR.insert(6, f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_RETRIEVAL_FR].mean(axis=1, skipna=False)) + DATA_OVERALL_FR.insert(7, f"STS Average ({len(TASK_LIST_STS_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_STS_FR].mean(axis=1, skipna=False)) + DATA_OVERALL_FR.insert(8, f"Summarization Average ({len(TASK_LIST_SUMMARIZATION_FR)} dataset)", DATA_OVERALL_FR[TASK_LIST_SUMMARIZATION_FR].mean(axis=1, skipna=False)) + DATA_OVERALL_FR.sort_values(f"Average ({len(TASK_LIST_FR)} datasets)", ascending=False, inplace=True) + # Start ranking from 1 + DATA_OVERALL_FR.insert(0, "Rank", list(range(1, len(DATA_OVERALL_FR) + 1))) + DATA_OVERALL_FR = DATA_OVERALL_FR.round(2) + + DATA_CLASSIFICATION_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_CLASSIFICATION_FR]) + DATA_CLASSIFICATION_FR = DATA_CLASSIFICATION_FR[DATA_CLASSIFICATION_FR.iloc[:, 4:].ne("").any(axis=1)] + + DATA_CLUSTERING_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_CLUSTERING_FR]) + DATA_CLUSTERING_FR = DATA_CLUSTERING_FR[DATA_CLUSTERING_FR.iloc[:, 4:].ne("").any(axis=1)] + + DATA_PAIR_CLASSIFICATION_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_PAIR_CLASSIFICATION_FR]) + DATA_PAIR_CLASSIFICATION_FR = DATA_PAIR_CLASSIFICATION_FR[DATA_PAIR_CLASSIFICATION_FR.iloc[:, 4:].ne("").any(axis=1)] + + DATA_RERANKING_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_RERANKING_FR]) + DATA_RERANKING_FR = DATA_RERANKING_FR[DATA_RERANKING_FR.iloc[:, 4:].ne("").any(axis=1)] + + DATA_RETRIEVAL_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_RETRIEVAL_FR]) + DATA_RETRIEVAL_FR = DATA_RETRIEVAL_FR[DATA_RETRIEVAL_FR.iloc[:, 4:].ne("").any(axis=1)] + + DATA_STS_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_STS_FR]) + DATA_STS_FR = DATA_STS_FR[DATA_STS_FR.iloc[:, 4:].ne("").any(axis=1)] + + DATA_SUMMARIZATION_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_SUMMARIZATION_FR]) + DATA_SUMMARIZATION_FR = DATA_SUMMARIZATION_FR[DATA_SUMMARIZATION_FR.iloc[:, 1:].ne("").any(axis=1)] + + # Fill NaN after averaging + DATA_OVERALL_FR.fillna("", inplace=True) + + DATA_OVERALL_FR = DATA_OVERALL_FR[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Embedding Dimensions", "Max Tokens", f"Average ({len(TASK_LIST_FR)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION_FR)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING_FR)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_FR)} datasets)", f"Reranking Average ({len(TASK_LIST_RERANKING_FR)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_FR)} datasets)", f"STS Average ({len(TASK_LIST_STS_FR)} datasets)", f"Summarization Average ({len(TASK_LIST_SUMMARIZATION_FR)} dataset)"]] + DATA_OVERALL_FR = DATA_OVERALL_FR[DATA_OVERALL_FR.iloc[:, 5:].ne("").any(axis=1)] + + return DATA_OVERALL_FR + +def get_mteb_average_pl(): + global DATA_OVERALL_PL, DATA_CLASSIFICATION_PL, DATA_CLUSTERING_PL, DATA_PAIR_CLASSIFICATION_PL, DATA_RETRIEVAL_PL, DATA_STS_PL + DATA_OVERALL_PL = get_mteb_data( + tasks=[ + "Classification", + "Clustering", + "PairClassification", + "Retrieval", + "STS", + ], + datasets=TASK_LIST_CLASSIFICATION_PL + TASK_LIST_CLUSTERING_PL + TASK_LIST_PAIR_CLASSIFICATION_PL + TASK_LIST_RETRIEVAL_PL + TASK_LIST_STS_PL, + fillna=False, + add_emb_dim=True, + rank=False, + ) + # Debugging: + # DATA_OVERALL_PL.to_csv("overall.csv") + + DATA_OVERALL_PL.insert(1, f"Average ({len(TASK_LIST_PL)} datasets)", DATA_OVERALL_PL[TASK_LIST_PL].mean(axis=1, skipna=False)) + DATA_OVERALL_PL.insert(2, f"Classification Average ({len(TASK_LIST_CLASSIFICATION_PL)} datasets)", DATA_OVERALL_PL[TASK_LIST_CLASSIFICATION_PL].mean(axis=1, skipna=False)) + DATA_OVERALL_PL.insert(3, f"Clustering Average ({len(TASK_LIST_CLUSTERING_PL)} datasets)", DATA_OVERALL_PL[TASK_LIST_CLUSTERING_PL].mean(axis=1, skipna=False)) + DATA_OVERALL_PL.insert(4, f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_PL)} datasets)", DATA_OVERALL_PL[TASK_LIST_PAIR_CLASSIFICATION_PL].mean(axis=1, skipna=False)) + DATA_OVERALL_PL.insert(5, f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_PL)} datasets)", DATA_OVERALL_PL[TASK_LIST_RETRIEVAL_PL].mean(axis=1, skipna=False)) + DATA_OVERALL_PL.insert(6, f"STS Average ({len(TASK_LIST_STS_PL)} datasets)", DATA_OVERALL_PL[TASK_LIST_STS_PL].mean(axis=1, skipna=False)) + DATA_OVERALL_PL.sort_values(f"Average ({len(TASK_LIST_PL)} datasets)", ascending=False, inplace=True) + # Start ranking from 1 + DATA_OVERALL_PL.insert(0, "Rank", list(range(1, len(DATA_OVERALL_PL) + 1))) + + DATA_OVERALL_PL = DATA_OVERALL_PL.round(2) + + DATA_CLASSIFICATION_PL = add_rank(DATA_OVERALL_PL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_CLASSIFICATION_PL]) + # Only keep rows with at least one score in addition to the "Model" & rank column + DATA_CLASSIFICATION_PL = DATA_CLASSIFICATION_PL[DATA_CLASSIFICATION_PL.iloc[:, 4:].ne("").any(axis=1)] + + DATA_CLUSTERING_PL = add_rank(DATA_OVERALL_PL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_CLUSTERING_PL]) + DATA_CLUSTERING_PL = DATA_CLUSTERING_PL[DATA_CLUSTERING_PL.iloc[:, 4:].ne("").any(axis=1)] + + DATA_PAIR_CLASSIFICATION_PL = add_rank(DATA_OVERALL_PL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_PAIR_CLASSIFICATION_PL]) + DATA_PAIR_CLASSIFICATION_PL = DATA_PAIR_CLASSIFICATION_PL[DATA_PAIR_CLASSIFICATION_PL.iloc[:, 4:].ne("").any(axis=1)] + + DATA_RETRIEVAL_PL = add_rank(DATA_OVERALL_PL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_RETRIEVAL_PL]) + DATA_RETRIEVAL_PL = DATA_RETRIEVAL_PL[DATA_RETRIEVAL_PL.iloc[:, 4:].ne("").any(axis=1)] + + DATA_STS_PL = add_rank(DATA_OVERALL_PL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_STS_PL]) + DATA_STS_PL = DATA_STS_PL[DATA_STS_PL.iloc[:, 4:].ne("").any(axis=1)] + + # Fill NaN after averaging + DATA_OVERALL_PL.fillna("", inplace=True) + + DATA_OVERALL_PL = DATA_OVERALL_PL[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Embedding Dimensions", "Max Tokens", f"Average ({len(TASK_LIST_PL)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION_PL)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING_PL)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_PL)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_PL)} datasets)", f"STS Average ({len(TASK_LIST_STS_PL)} datasets)"]] + DATA_OVERALL_PL = DATA_OVERALL_PL[DATA_OVERALL_PL.iloc[:, 5:].ne("").any(axis=1)] + + return DATA_OVERALL_PL + +get_mteb_average() +get_mteb_average_fr() +get_mteb_average_pl() +get_mteb_average_zh() +DATA_BITEXT_MINING = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_BITEXT_MINING] +DATA_BITEXT_MINING_DA = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING_DA)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_BITEXT_MINING_DA] +DATA_CLASSIFICATION_DA = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_DA)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_CLASSIFICATION_DA] +DATA_CLASSIFICATION_NB = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_NB)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_CLASSIFICATION_NB] +DATA_CLASSIFICATION_SV = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_SV)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_CLASSIFICATION_SV] +DATA_CLASSIFICATION_OTHER = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_OTHER)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_CLASSIFICATION_OTHER] +DATA_CLUSTERING_DE = get_mteb_data(["Clustering"], [], TASK_LIST_CLUSTERING_DE)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_CLUSTERING_DE] +DATA_STS_OTHER = get_mteb_data(["STS"], [], TASK_LIST_STS_OTHER)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_STS_OTHER] +DATA_RETRIEVAL_LAW = get_mteb_data(["Retrieval"], [], TASK_LIST_RETRIEVAL_LAW)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_RETRIEVAL_LAW] + +# Exact, add all non-nan integer values for every dataset +NUM_SCORES = 0 +DATASETS = [] +MODELS = [] +# LANGUAGES = [] +for d in [ + DATA_BITEXT_MINING, + DATA_BITEXT_MINING_DA, + DATA_CLASSIFICATION_EN, + DATA_CLASSIFICATION_DA, + DATA_CLASSIFICATION_FR, + DATA_CLASSIFICATION_NB, + DATA_CLASSIFICATION_PL, + DATA_CLASSIFICATION_SV, + DATA_CLASSIFICATION_ZH, + DATA_CLASSIFICATION_OTHER, + DATA_CLUSTERING, + DATA_CLUSTERING_DE, + DATA_CLUSTERING_FR, + DATA_CLUSTERING_PL, + DATA_CLUSTERING_ZH, + DATA_PAIR_CLASSIFICATION, + DATA_PAIR_CLASSIFICATION_FR, + DATA_PAIR_CLASSIFICATION_PL, + DATA_PAIR_CLASSIFICATION_ZH, + DATA_RERANKING, + DATA_RERANKING_FR, + DATA_RERANKING_ZH, + DATA_RETRIEVAL, + DATA_RETRIEVAL_FR, + DATA_RETRIEVAL_PL, + DATA_RETRIEVAL_ZH, + DATA_RETRIEVAL_LAW, + DATA_STS_EN, + DATA_STS_FR, + DATA_STS_PL, + DATA_STS_ZH, + DATA_STS_OTHER, + DATA_SUMMARIZATION, + DATA_SUMMARIZATION_FR, +]: + # NUM_SCORES += d.iloc[:, 1:].apply(lambda x: sum([1 for y in x if isinstance(y, float) and not np.isnan(y)]), axis=1).sum() + cols_to_ignore = 4 if "Average" in d.columns else 3 + # Count number of scores including only non-nan floats & excluding the rank column + NUM_SCORES += d.iloc[:, cols_to_ignore:].notna().sum().sum() + # Exclude rank & model name column (first two); Do not count different language versions as different datasets + DATASETS += [i.split(" ")[0] for i in d.columns[cols_to_ignore:]] + # LANGUAGES += [i.split(" ")[-1] for i in d.columns[cols_to_ignore:]] + MODELS += d["Model"].tolist() + +NUM_DATASETS = len(set(DATASETS)) +# NUM_LANGUAGES = len(set(LANGUAGES)) +NUM_MODELS = len(set(MODELS)) # 1. Force headers to wrap # 2. Force model column (maximum) width @@ -72,98 +1694,323 @@ Each inner tab can have the following keys: - language_long: [optional] The long form of the language - description: The description of the leaderboard - credits: [optional] The credits for the leaderboard -- desc: [optional] The description of the leaderboard - data: The data for the leaderboard +- refresh: The function to refresh the leaderboard """ -# No more refreshing manually, happens daily -# def get_refresh_function(task_category, task_list): -# def _refresh(): -# data_task_category = get_mteb_data(tasks=[task_category], datasets=task_list) -# data_task_category.drop(columns=["Embedding Dimensions", "Max Tokens"], inplace=True) -# return data_task_category -# return _refresh - - -# def get_refresh_overall_function(tasks): -# return lambda: get_mteb_average(tasks)[0] - - -# load in the pre-calculated `all_data_tasks` and `boards_data` -print(f"Loading pre-calculated data....") -all_data_tasks = load_results("all_data_tasks") -boards_data = load_results("boards_data") - -#### Caclulate Metadata -# Exact, add all non-nan integer values for every dataset -NUM_SCORES = 0 -DATASETS = [] -MODELS = [] -# LANGUAGES = [] -for d in all_data_tasks: - if isinstance(d, list) and len(d) == 0: - continue - # NUM_SCORES += d.iloc[:, 1:].apply(lambda x: sum([1 for y in x if isinstance(y, float) and not np.isnan(y)]), axis=1).sum() - cols_to_ignore = 4 if "Average" in d.columns else 3 - # Count number of scores including only non-nan floats & excluding the rank column - NUM_SCORES += d.iloc[:, cols_to_ignore:].notna().sum().sum() - # Exclude rank & model name column (first two); Do not count different language versions as different datasets - DATASETS += [i.split(" ")[0] for i in d.columns[cols_to_ignore:]] - # LANGUAGES += [i.split(" ")[-1] for i in d.columns[cols_to_ignore:]] - MODELS += d["Model"].tolist() - - -NUM_DATASETS = len(set(DATASETS)) -# NUM_LANGUAGES = len(set(LANGUAGES)) -NUM_MODELS = len(set(MODELS)) +chinese_credits = "[FlagEmbedding](https://github.com/FlagOpen/FlagEmbedding)" +french_credits = "[Lyon-NLP](https://github.com/Lyon-NLP): [Gabriel Sequeira](https://github.com/GabrielSequeira), [Imene Kerboua](https://github.com/imenelydiaker), [Wissam Siblini](https://github.com/wissam-sib), [Mathieu Ciancone](https://github.com/MathieuCiancone), [Marion Schaeffer](https://github.com/schmarion)" +danish_credits = "[Kenneth Enevoldsen](https://github.com/KennethEnevoldsen), [scandinavian-embedding-benchmark](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/)" +norwegian_credits = "[Kenneth Enevoldsen](https://github.com/KennethEnevoldsen), [scandinavian-embedding-benchmark](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/)" +polish_credits = "[Rafał Poświata](https://github.com/rafalposwiata)" data = { - "Overall": {"metric": "Various, refer to task tabs", "data": []} + "Overall": { + "metric": "Various, refer to task tabs", + "data": [ + { + "language": "English", + "description": "**Overall MTEB English leaderboard** 🔮", + "data": DATA_OVERALL, + "refresh": get_mteb_average, + }, + { + "language": "Chinese", + "data": DATA_OVERALL_ZH, + "description": "**Overall MTEB Chinese leaderboard (C-MTEB)** 🔮🇨🇳", + "credits": chinese_credits, + "refresh": get_mteb_average_zh, + }, + { + "language": "French", + "data": DATA_OVERALL_FR, + "description": "**Overall MTEB French leaderboard (F-MTEB)** 🔮🇫🇷", + "credits": french_credits, + "refresh": get_mteb_average_fr, + }, + { + "language": "Polish", + "data": DATA_OVERALL_PL, + "description": "**Overall MTEB Polish leaderboard** 🔮🇵🇱", + "refresh": get_mteb_average_pl, + }, + ] + }, + "Bitext Mining": { + "metric": "[F1](https://huggingface.co/spaces/evaluate-metric/f1)", + "data": [ + { + "language": "English-X", + "language_long": "117 (Pairs of: English & other language)", + "description": "**Bitext Mining English-X Leaderboard** 🎌", + "data": DATA_BITEXT_MINING, + "refresh": partial(get_mteb_data, tasks=["BitextMining"], datasets=TASK_LIST_BITEXT_MINING), + }, + { + "language": "Danish", + "language_long": "Danish & Bornholmsk (Danish Dialect)", + "description": "**Bitext Mining Danish Leaderboard** 🎌🇩🇰", + "credits": danish_credits, + "data": DATA_BITEXT_MINING_DA, + "refresh": partial(get_mteb_data, tasks=["BitextMining"], datasets=TASK_LIST_BITEXT_MINING_DA), + } + ] + }, + "Classification": { + "metric": "[Accuracy](https://huggingface.co/spaces/evaluate-metric/accuracy)", + "data": [ + { + "language": "English", + "description": "**Classification English Leaderboard** ❤️", + "data": DATA_CLASSIFICATION_EN, + "refresh": partial(get_mteb_data, tasks=["Classification"], langs=["en"]) + }, + { + "language": "Chinese", + "description": "**Classification Chinese Leaderboard** 🧡🇨🇳", + "credits": chinese_credits, + "data": DATA_CLASSIFICATION_ZH, + "refresh": partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_ZH) + }, + { + "language": "Danish", + "description": "**Classification Danish Leaderboard** 🤍🇩🇰", + "credits": danish_credits, + "data": DATA_CLASSIFICATION_DA, + "refresh": partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_DA) + }, + { + "language": "French", + "description": "**Classification French Leaderboard** 💙🇫🇷", + "credits": french_credits, + "data": DATA_CLASSIFICATION_FR, + "refresh": partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_FR) + }, + { + "language": "Norwegian", + "language_long": "Norwegian Bokmål", + "description": "**Classification Norwegian Leaderboard** 💙🇳🇴", + "credits": norwegian_credits, + "data": DATA_CLASSIFICATION_NB, + "refresh": partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_NB) + }, + { + "language": "Polish", + "description": "**Classification Polish Leaderboard** 🤍🇵🇱", + "credits": polish_credits, + "data": DATA_CLASSIFICATION_PL, + "refresh": partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_PL) + }, + { + "language": "Swedish", + "description": "**Classification Swedish Leaderboard** 💛🇸🇪", + "credits": norwegian_credits, + "data": DATA_CLASSIFICATION_SV, + "refresh": partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_SV) + }, + { + "language": "Other", + "language_long": "47 (Only languages not included in the other tabs)", + "description": "**Classification Other Languages Leaderboard** 💜💚💙", + "data": DATA_CLASSIFICATION_OTHER, + "refresh": partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_OTHER) + } + ] + }, + "Clustering": { + "metric": "Validity Measure (v_measure)", + "data": [ + { + "language": "English", + "description": "**Clustering Leaderboard** ✨", + "data": DATA_CLUSTERING, + "refresh": partial(get_mteb_data, tasks=["Clustering"], datasets=TASK_LIST_CLUSTERING) + }, + { + "language": "Chinese", + "description": "**Clustering Chinese Leaderboard** ✨🇨🇳", + "credits": chinese_credits, + "data": DATA_CLUSTERING_ZH, + "refresh": partial(get_mteb_data, tasks=["Clustering"], datasets=TASK_LIST_CLUSTERING_ZH) + }, + { + "language": "French", + "description": "**Clustering French Leaderboard** ✨🇫🇷", + "credits": french_credits, + "data": DATA_CLUSTERING_FR, + "refresh": partial(get_mteb_data, tasks=["Clustering"], datasets=TASK_LIST_CLUSTERING_FR) + }, + { + "language": "German", + "description": "**Clustering German Leaderboard** ✨🇩🇪", + "credits": "[Silvan](https://github.com/slvnwhrl)", + "data": DATA_CLUSTERING_DE, + "refresh": partial(get_mteb_data, tasks=["Clustering"], datasets=TASK_LIST_CLUSTERING_DE) + }, + { + "language": "Polish", + "description": "**Clustering Polish Leaderboard** ✨🇵🇱", + "credits": polish_credits, + "data": DATA_CLUSTERING_PL, + "refresh": partial(get_mteb_data, tasks=["Clustering"], datasets=TASK_LIST_CLUSTERING_PL) + }, + ] + }, + "Pair Classification": { + "metric": "Average Precision based on Cosine Similarities (cos_sim_ap)", + "data": [ + { + "language": "English", + "description": "**Pair Classification English Leaderboard** 🎭", + "data": DATA_PAIR_CLASSIFICATION, + "refresh": partial(get_mteb_data, tasks=["PairClassification"], datasets=TASK_LIST_PAIR_CLASSIFICATION) + }, + { + "language": "Chinese", + "description": "**Pair Classification Chinese Leaderboard** 🎭🇨🇳", + "credits": chinese_credits, + "data": DATA_PAIR_CLASSIFICATION_ZH, + "refresh": partial(get_mteb_data, tasks=["PairClassification"], datasets=TASK_LIST_PAIR_CLASSIFICATION_ZH) + }, + { + "language": "French", + "description": "**Pair Classification French Leaderboard** 🎭🇫🇷", + "credits": french_credits, + "data": DATA_PAIR_CLASSIFICATION_FR, + "refresh": partial(get_mteb_data, tasks=["PairClassification"], datasets=TASK_LIST_PAIR_CLASSIFICATION_FR) + }, + { + "language": "Polish", + "description": "**Pair Classification Polish Leaderboard** 🎭🇵🇱", + "credits": polish_credits, + "data": DATA_PAIR_CLASSIFICATION_PL, + "refresh": partial(get_mteb_data, tasks=["PairClassification"], datasets=TASK_LIST_PAIR_CLASSIFICATION_PL) + }, + ] + }, + "Reranking": { + "metric": "Mean Average Precision (MAP)", + "data": [ + { + "language": "English", + "description": "**Reranking English Leaderboard** 🥈", + "data": DATA_RERANKING, + "refresh": partial(get_mteb_data, tasks=["Reranking"], datasets=TASK_LIST_RERANKING) + }, + { + "language": "Chinese", + "description": "**Reranking Chinese Leaderboard** 🥈🇨🇳", + "credits": chinese_credits, + "data": DATA_RERANKING_ZH, + "refresh": partial(get_mteb_data, tasks=["Reranking"], datasets=TASK_LIST_RERANKING_ZH) + }, + { + "language": "French", + "description": "**Reranking French Leaderboard** 🥈🇫🇷", + "credits": french_credits, + "data": DATA_RERANKING_FR, + "refresh": partial(get_mteb_data, tasks=["Reranking"], datasets=TASK_LIST_RERANKING_FR) + } + ] + }, + "Retrieval": { + "metric": "Normalized Discounted Cumulative Gain @ k (ndcg_at_10)", + "data": [ + { + "language": "English", + "description": "**Retrieval English Leaderboard** 🔎", + "data": DATA_RETRIEVAL, + "refresh": partial(get_mteb_data, tasks=["Retrieval"], datasets=TASK_LIST_RETRIEVAL) + }, + { + "language": "Chinese", + "description": "**Retrieval Chinese Leaderboard** 🔎🇨🇳", + "credits": chinese_credits, + "data": DATA_RETRIEVAL_ZH, + "refresh": partial(get_mteb_data, tasks=["Retrieval"], datasets=TASK_LIST_RETRIEVAL_ZH) + }, + { + "language": "French", + "description": "**Retrieval French Leaderboard** 🔎🇫🇷", + "credits": french_credits, + "data": DATA_RETRIEVAL_FR, + "refresh": partial(get_mteb_data, tasks=["Retrieval"], datasets=TASK_LIST_RETRIEVAL_FR) + }, + { + "language": "Law", + "language_long": "English, German, Chinese", + "description": "**Retrieval Law Leaderboard** 🔎⚖️", + "credits": "[Voyage AI](https://www.voyageai.com/)", + "data": DATA_RETRIEVAL_LAW, + "refresh": partial(get_mteb_data, tasks=["Retrieval"], datasets=TASK_LIST_RETRIEVAL_LAW) + }, + { + "language": "Polish", + "description": "**Retrieval Polish Leaderboard** 🔎🇵🇱", + "credits": polish_credits, + "data": DATA_RETRIEVAL_PL, + "refresh": partial(get_mteb_data, tasks=["Retrieval"], datasets=TASK_LIST_RETRIEVAL_PL) + } + ] + }, + "STS": { + "metric": "Spearman correlation based on cosine similarity", + "data": [ + { + "language": "English", + "description": "**STS English Leaderboard** 🤖", + "data": DATA_STS_EN, + "refresh": partial(get_mteb_data, tasks=["STS"], datasets=TASK_LIST_STS) + }, + { + "language": "Chinese", + "description": "**STS Chinese Leaderboard** 🤖🇨🇳", + "credits": chinese_credits, + "data": DATA_STS_ZH, + "refresh": partial(get_mteb_data, tasks=["STS"], datasets=TASK_LIST_STS_ZH) + }, + { + "language": "French", + "description": "**STS French Leaderboard** 🤖🇫🇷", + "credits": french_credits, + "data": DATA_STS_FR, + "refresh": partial(get_mteb_data, tasks=["STS"], datasets=TASK_LIST_STS_FR) + }, + { + "language": "Polish", + "description": "**STS Polish Leaderboard** 🤖🇵🇱", + "credits": polish_credits, + "data": DATA_STS_PL, + "refresh": partial(get_mteb_data, tasks=["STS"], datasets=TASK_LIST_STS_PL) + }, + { + "language": "Other", + "language_long": "Arabic, Chinese, Dutch, English, French, German, Italian, Korean, Polish, Russian, Spanish (Only language combos not included in the other tabs)", + "description": "**STS Other Leaderboard** 👽", + "data": DATA_STS_OTHER, + "refresh": partial(get_mteb_data, tasks=["STS"], datasets=TASK_LIST_STS_OTHER) + }, + ] + }, + "Summarization": { + "metric": "Spearman correlation based on cosine similarity", + "data": [ + { + "language": "English", + "description": "**Summarization Leaderboard** 📜", + "data": DATA_SUMMARIZATION, + "refresh": partial(get_mteb_data, tasks=TASK_LIST_SUMMARIZATION) + }, + { + "language": "French", + "description": "**Summarization Leaderboard** 📜", + "credits": french_credits, + "data": DATA_SUMMARIZATION_FR, + "refresh": partial(get_mteb_data, tasks=TASK_LIST_SUMMARIZATION_FR) + } + ] + } } -for task in TASKS: - data[task] = {"metric": TASKS_CONFIG[task]["metric_description"], "data": []} - -for board, board_config in BOARDS_CONFIG.items(): - init_name = board_config["title"] - if init_name in PRETTY_NAMES: - init_name = PRETTY_NAMES[init_name] - board_pretty_name = f"{init_name} leaderboard" - acronym = board_config.get("acronym", None) - board_icon = board_config.get("icon", None) - if board_icon is None: - board_icon = "" - credits = board_config.get("credits", None) - metric = board_config.get("metric", None) - desc = board_config.get("desc", None) - - if board_config["has_overall"]: - overall_pretty_name = board_pretty_name - if acronym is not None: - overall_pretty_name += f" ({board_config['acronym']})" - data["Overall"]["data"].append({ - "language": board_config["title"], - "language_long": board_config["language_long"], - "description": f"**Overall MTEB {overall_pretty_name}** 🔮{board_icon}", - "data": boards_data[board]["data_overall"], - # "refresh": get_refresh_overall_function(board_config["tasks"]), - "credits": credits, - "metric": metric, - "desc": desc, - }) - for task_category, task_category_list in board_config["tasks"].items(): - task_icon = TASKS_CONFIG[task_category]['icon'] - if "special_icons" in board_config and isinstance(board_config["special_icons"], dict): - task_icon = board_config["special_icons"].get(task_category, task_icon) - data[task_category]["data"].append({ - "language": board_config["title"], - "language_long": board_config["language_long"], - "description": f"**{task_category} {board_pretty_name}** {task_icon}{board_icon}", - "data": boards_data[board]["data_tasks"][task_category], - # "refresh": get_refresh_function(task_category, task_category_list), - "credits": credits, - "metric": metric, - "desc": desc, - }) dataframes = [] full_dataframes = [] @@ -189,11 +2036,7 @@ function(goalUrlObject) { def update_url_task(event: gr.SelectData, current_task_language: dict, language_per_task: dict): current_task_language["task"] = event.target.id # Either use the cached language for this task or the 1st language - try: - current_task_language["language"] = language_per_task.get(event.target.id, event.target.children[1].children[0].id) - except Exception as e: # is Overall tab, no description - current_task_language["language"] = language_per_task.get(event.target.id, event.target.children[0].children[0].id) - + current_task_language["language"] = language_per_task.get(event.target.id, event.target.children[0].children[0].id) return current_task_language, language_per_task def update_url_language(event: gr.SelectData, current_task_language: dict, language_per_task: dict): @@ -215,10 +2058,6 @@ MODEL_TYPES = [ "Open", "Proprietary", "Sentence Transformers", - "Cross-Encoders", - "Bi-Encoders", - "Uses Instructions", - "No Instructions", ] def filter_data(search_query, model_types, model_sizes, *full_dataframes): @@ -229,7 +2068,7 @@ def filter_data(search_query, model_types, model_sizes, *full_dataframes): names = df["Model"].map(lambda x: re.match("(.+)", x).group(1)) masks = [] for query in search_query.split(";"): - masks.append(names.str.lower().str.contains(query.lower())) + masks.append(names.str.contains(query)) df = df[reduce(lambda a, b: a | b, masks)] # Apply the model type filtering @@ -242,14 +2081,6 @@ def filter_data(search_query, model_types, model_sizes, *full_dataframes): masks.append(df["Model"].isin(PROPRIETARY_MODELS)) elif model_type == "Sentence Transformers": masks.append(df["Model"].isin(SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS)) - elif model_type == "Cross-Encoders": - masks.append(df["Model"].isin(CROSS_ENCODERS)) - elif model_type == "Bi-Encoders": - masks.append(df["Model"].isin(BI_ENCODERS)) - elif model_type == "Uses Instructions": - masks.append(df["Model"].isin(INSTRUCT_MODELS)) - elif model_type == "No Instructions": - masks.append(df["Model"].isin(NOINSTRUCT_MODELS)) if masks: df = df[reduce(lambda a, b: a | b, masks)] else: @@ -261,10 +2092,10 @@ def filter_data(search_query, model_types, model_sizes, *full_dataframes): sizes = df["Model Size (Million Parameters)"].replace('', 0) mask = sizes.apply(lambda size: any(numeric_interval.contains(size))) df = df[mask] + output_dataframes.append(df) return output_dataframes - with gr.Blocks(css=css) as block: # Store the current task and language for updating the URL. This is a bit hacky, but it works @@ -273,7 +2104,7 @@ with gr.Blocks(css=css) as block: language_per_task = gr.JSON(value=dict(), visible=False) gr.Markdown(f""" - Massive Text Embedding Benchmark (MTEB) Leaderboard. To submit, refer to the MTEB GitHub repository 🤗 Refer to the [MTEB paper](https://arxiv.org/abs/2210.07316) for details on metrics, tasks and models. Also check out [MTEB Arena](https://huggingface.co/spaces/mteb/arena) ⚔️ + Massive Text Embedding Benchmark (MTEB) Leaderboard. To submit, refer to the MTEB GitHub repository 🤗 Refer to the [MTEB paper](https://arxiv.org/abs/2210.07316) for details on metrics, tasks and models. """) with gr.Row(): @@ -286,8 +2117,7 @@ with gr.Blocks(css=css) as block: choices=MODEL_TYPES, value=MODEL_TYPES, interactive=True, - elem_classes=["filter-checkbox-group"], - scale=3, + elem_classes=["filter-checkbox-group"] ) filter_model_sizes = gr.CheckboxGroup( label="Model sizes (in number of parameters)", @@ -301,17 +2131,16 @@ with gr.Blocks(css=css) as block: with gr.Tabs() as outer_tabs: # Store the tabs for updating them on load based on URL parameters tabs.append(outer_tabs) + for task, task_values in data.items(): metric = task_values["metric"] task_tab_id = task.lower().replace(" ", "-") # Overall, Bitext Mining, Classification, etc. - pretty_task_name = task if task not in PRETTY_NAMES.keys() else PRETTY_NAMES[task] - with gr.Tab(pretty_task_name, id=task_tab_id) as task_tab: + with gr.Tab(task, id=task_tab_id) as task_tab: # For updating the 'task' in the URL task_tab.select(update_url_task, [current_task_language, language_per_task], [current_task_language, language_per_task]).then(None, [current_task_language], [], js=set_window_url_params) - if "Overall" != task: - gr.Markdown(TASK_DESCRIPTIONS[task]) + with gr.Tabs() as task_tabs: # Store the task tabs for updating them on load based on URL parameters tabs.append(task_tabs) @@ -324,18 +2153,13 @@ with gr.Blocks(css=css) as block: # For updating the 'language' in the URL item_tab.select(update_url_language, [current_task_language, language_per_task], [current_task_language, language_per_task], trigger_mode="always_last").then(None, [current_task_language], [], js=set_window_url_params) - specific_metric = metric - if item.get("metric", None) is not None: - specific_metric = item['metric'] - with gr.Row(): gr.Markdown(f""" {item['description']} - - **Metric:** {specific_metric} + - **Metric:** {metric} - **Languages:** {item['language_long'] if 'language_long' in item else item['language']} - {"- **Credits:** " + item['credits'] if ("credits" in item and item["credits"] is not None) else ''} - {"- **Description:** " + item['desc'] if ("desc" in item and item["desc"] is not None) else ''} + {"- **Credits:** " + item['credits'] if "credits" in item else ''} """) with gr.Row(): @@ -346,9 +2170,9 @@ with gr.Blocks(css=css) as block: full_dataframe = gr.Dataframe(item["data"], datatype=datatype, type="pandas", visible=False) full_dataframes.append(full_dataframe) - # with gr.Row(): - # refresh_button = gr.Button("Refresh") - # refresh_button.click(item["refresh"], inputs=None, outputs=dataframe, concurrency_limit=20) + with gr.Row(): + refresh_button = gr.Button("Refresh") + refresh_button.click(item["refresh"], inputs=None, outputs=dataframe) gr.Markdown(f""" - **Total Datasets**: {NUM_DATASETS} @@ -399,14 +2223,6 @@ with gr.Blocks(css=css) as block: block.queue(max_size=10) block.launch() -# Add model names here so the mteb/leaderboard space shows up on their model page -# from envs import MODEL_META -# print("','".join(MODEL_META["models_to_skip"])) -# print("','".join(list(MODEL_META['model_meta'].keys()))) -# print("','".join([x['link'].split("co/")[-1] for x in MODEL_META['model_meta'].values() if (x.get('link', None)) and ("huggingface.co" in x['link'])])) -# from envs import API; print("','".join([x.modelId for x in list(API.list_models(filter="mteb")) if x.modelId not in UNUSED])) -UNUSED = ['michaelfeil/ct2fast-e5-large-v2','McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-unsup-simcse','newsrx/instructor-xl','sionic-ai/sionic-ai-v1','lsf1000/bge-evaluation','Intel/bge-small-en-v1.5-sst2','newsrx/instructor-xl-newsrx','McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-unsup-simcse','McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-unsup-simcse','davidpeer/gte-small','goldenrooster/multilingual-e5-large','kozistr/fused-large-en','mixamrepijey/instructor-small','McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-supervised','DecisionOptimizationSystem/DeepFeatEmbeddingLargeContext','Intel/bge-base-en-v1.5-sst2-int8-dynamic','morgendigital/multilingual-e5-large-quantized','BAAI/bge-small-en','ggrn/e5-small-v2','vectoriseai/gte-small','giulio98/placeholder','odunola/UAE-Large-VI','vectoriseai/e5-large-v2','gruber/e5-small-v2-ggml','Severian/nomic','arcdev/e5-mistral-7b-instruct','mlx-community/multilingual-e5-base-mlx','michaelfeil/ct2fast-bge-base-en-v1.5','Intel/bge-small-en-v1.5-sst2-int8-static','jncraton/stella-base-en-v2-ct2-int8','vectoriseai/multilingual-e5-large','rlsChapters/Chapters-SFR-Embedding-Mistral','arcdev/SFR-Embedding-Mistral','McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised','McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-supervised','vectoriseai/gte-base','mixamrepijey/instructor-models','GovCompete/e5-large-v2','ef-zulla/e5-multi-sml-torch','khoa-klaytn/bge-small-en-v1.5-angle','krilecy/e5-mistral-7b-instruct','vectoriseai/bge-base-en-v1.5','vectoriseai/instructor-base','jingyeom/korean_embedding_model','rizki/bgr-tf','barisaydin/bge-base-en','jamesgpt1/zzz','Malmuk1/e5-large-v2_Sharded','vectoriseai/ember-v1','Consensus/instructor-base','barisaydin/bge-small-en','barisaydin/gte-base','woody72/multilingual-e5-base','Einas/einas_ashkar','michaelfeil/ct2fast-bge-large-en-v1.5','vectoriseai/bge-small-en-v1.5','iampanda/Test','cherubhao/yogamodel','ieasybooks/multilingual-e5-large-onnx','jncraton/e5-small-v2-ct2-int8','radames/e5-large','khoa-klaytn/bge-base-en-v1.5-angle','Intel/bge-base-en-v1.5-sst2-int8-static','vectoriseai/e5-large','TitanML/jina-v2-base-en-embed','Koat/gte-tiny','binqiangliu/EmbeddingModlebgelargeENv1.5','beademiguelperez/sentence-transformers-multilingual-e5-small','sionic-ai/sionic-ai-v2','jamesdborin/jina-v2-base-en-embed','maiyad/multilingual-e5-small','dmlls/all-mpnet-base-v2','odunola/e5-base-v2','vectoriseai/bge-large-en-v1.5','vectoriseai/bge-small-en','karrar-alwaili/UAE-Large-V1','t12e/instructor-base','Frazic/udever-bloom-3b-sentence','Geolumina/instructor-xl','hsikchi/dump','recipe/embeddings','michaelfeil/ct2fast-bge-small-en-v1.5','ildodeltaRule/multilingual-e5-large','shubham-bgi/UAE-Large','BAAI/bge-large-en','michaelfeil/ct2fast-e5-small-v2','cgldo/semanticClone','barisaydin/gte-small','aident-ai/bge-base-en-onnx','jamesgpt1/english-large-v1','michaelfeil/ct2fast-e5-small','baseplate/instructor-large-1','newsrx/instructor-large','Narsil/bge-base-en','michaelfeil/ct2fast-e5-large','mlx-community/multilingual-e5-small-mlx','lightbird-ai/nomic','MaziyarPanahi/GritLM-8x7B-GGUF','newsrx/instructor-large-newsrx','dhairya0907/thenlper-get-large','barisaydin/bge-large-en','jncraton/bge-small-en-ct2-int8','retrainai/instructor-xl','BAAI/bge-base-en','gentlebowl/instructor-large-safetensors','d0rj/e5-large-en-ru','atian-chapters/Chapters-SFR-Embedding-Mistral','Intel/bge-base-en-v1.5-sts-int8-static','Intel/bge-base-en-v1.5-sts-int8-dynamic','jncraton/GIST-small-Embedding-v0-ct2-int8','jncraton/gte-tiny-ct2-int8','d0rj/e5-small-en-ru','vectoriseai/e5-small-v2','SmartComponents/bge-micro-v2','michaelfeil/ct2fast-gte-base','vectoriseai/e5-base-v2','Intel/bge-base-en-v1.5-sst2','McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-supervised','Research2NLP/electrical_stella','weakit-v/bge-base-en-v1.5-onnx','GovCompete/instructor-xl','barisaydin/text2vec-base-multilingual','Intel/bge-small-en-v1.5-sst2-int8-dynamic','jncraton/gte-small-ct2-int8','d0rj/e5-base-en-ru','barisaydin/gte-large','fresha/e5-large-v2-endpoint','vectoriseai/instructor-large','Severian/embed','vectoriseai/e5-base','mlx-community/multilingual-e5-large-mlx','vectoriseai/gte-large','anttip/ct2fast-e5-small-v2-hfie','michaelfeil/ct2fast-gte-large','gizmo-ai/Cohere-embed-multilingual-v3.0','McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-unsup-simcse','Kenknight1999/tungdd7_ft_e5','joteqwork/new_gsev0','vantagediscovery/jina-embeddings-v2-base-en','vantagediscovery/nomic-embed-text-v1','vantagediscovery/nomic-embed-text-v1.5','srikanthmalla/hkunlp-instructor-xl','afrideva/GIST-all-MiniLM-L6-v2-GGUF','nadeem1362/mxbai-embed-large-v1-Q4_K_M-GGUF','agier9/gte-Qwen1.5-7B-instruct-Q5_K_M-GGUF','ekorman-strive/bge-large-en-v1.5','raghavlight/SE_v1','liddlefish/privacyembeddingv2_bge_small','ahmet1338/finetuned_embedder','radia/snowflake-arctic-embed-l-Q4_K_M-GGUF','GregorBiswanger/GritLM-7B-Q4_K_M-GGUF','powermove72/GritLM-7B-Q4_K_M-GGUF','sunzx0810/gte-Qwen2-7B-instruct-Q5_K_M-GGUF','nazimali/gte-Qwen2-7B-instruct-Q6_K-GGUF','nazimali/gte-Qwen2-7B-instruct-Q6_K-GGUF','fishbone64/gte-Qwen2-7B-instruct-Q8_0-GGUF','tobchef/gte-Qwen2-1.5B-instruct-Q4_K_M-GGUF','liddlefish/privacy_embedding_rag','liddlefish/privacy_embedding_rag_10k_tmp','liddlefish/privacy_embedding_bge_small_synthetic','mxs980/gte-Qwen2-1.5B-instruct-Q8_0-GGUF','leonn71/gte-Qwen2-1.5B-instruct-Q6_K-GGUF', 'Baichuan-text-embedding','Cohere-embed-english-v3.0','Cohere-embed-multilingual-light-v3.0','Cohere-embed-multilingual-v3.0','DanskBERT','FollowIR-7B','GritLM-7B','LASER2','LLM2Vec-Llama-2-supervised','LLM2Vec-Llama-2-unsupervised','LLM2Vec-Meta-Llama-3-supervised','LLM2Vec-Meta-Llama-3-unsupervised','LLM2Vec-Mistral-supervised','LLM2Vec-Mistral-unsupervised','LLM2Vec-Sheared-Llama-supervised','LLM2Vec-Sheared-Llama-unsupervised','LaBSE','OpenSearch-text-hybrid','SFR-Embedding-Mistral','all-MiniLM-L12-v2','all-MiniLM-L6-v2','all-mpnet-base-v2','allenai-specter','bert-base-10lang-cased','bert-base-15lang-cased','bert-base-25lang-cased','bert-base-multilingual-cased','bert-base-multilingual-uncased','bert-base-swedish-cased','bert-base-uncased','bge-base-zh-v1.5','bge-large-en-v1.5','bge-large-zh-noinstruct','bge-large-zh-v1.5','bge-m3','bge-small-zh-v1.5','bm25','camembert-base','camembert-large','contriever-base-msmarco','cross-en-de-roberta-sentence-transformer','dfm-encoder-large-v1','dfm-sentence-encoder-large-1','distilbert-base-25lang-cased','distilbert-base-en-fr-cased','distilbert-base-en-fr-es-pt-it-cased','distilbert-base-fr-cased','distilbert-base-uncased','distiluse-base-multilingual-cased-v2','e5-base-4k','e5-base-v2','e5-base','e5-large-v2','e5-large','e5-mistral-7b-instruct','e5-small','electra-small-nordic','electra-small-swedish-cased-discriminator','elser-v2','flan-t5-base','flan-t5-large','flaubert_base_cased','flaubert_base_uncased','flaubert_large_cased','gbert-base','gbert-large','gelectra-base','gelectra-large','glove.6B.300d','google-gecko-256.text-embedding-004','google-gecko.text-embedding-004','gottbert-base','gte-Qwen1.5-7B-instruct','gte-Qwen2-7B-instruct','gtr-t5-base','gtr-t5-large','gtr-t5-xl','gtr-t5-xxl','herbert-base-retrieval-v2','instructor-base','instructor-large','instructor-xl','jina-embeddings-v2-base-en','komninos','llama-2-7b-chat','luotuo-bert-medium','m3e-base','m3e-large','mistral-7b-instruct-v0.2','mistral-embed','monobert-large-msmarco','monot5-3b-msmarco-10k','monot5-base-msmarco-10k','msmarco-bert-co-condensor','multi-qa-MiniLM-L6-cos-v1','multilingual-e5-base','multilingual-e5-large','multilingual-e5-small','nb-bert-base','nb-bert-large','nomic-embed-text-v1','nomic-embed-text-v1.5-128','nomic-embed-text-v1.5-256','nomic-embed-text-v1.5-512','nomic-embed-text-v1.5-64','norbert3-base','norbert3-large','paraphrase-multilingual-MiniLM-L12-v2','paraphrase-multilingual-mpnet-base-v2','sentence-bert-swedish-cased','sentence-camembert-base','sentence-camembert-large','sentence-croissant-llm-base','sentence-t5-base','sentence-t5-large','sentence-t5-xl','sentence-t5-xxl','silver-retriever-base-v1','st-polish-paraphrase-from-distilroberta','st-polish-paraphrase-from-mpnet','sup-simcse-bert-base-uncased','text-embedding-3-large','text-embedding-3-large-256','text-embedding-3-small','text-embedding-ada-002','text-search-ada-001','text-search-ada-doc-001','text-search-ada-query-001','text-search-babbage-001','text-search-curie-001','text-search-davinci-001','text-similarity-ada-001','text-similarity-babbage-001','text-similarity-curie-001','text-similarity-davinci-001','tart-dual-contriever-msmarco','tart-full-flan-t5-xl','text2vec-base-chinese','text2vec-base-multilingual','text2vec-large-chinese','titan-embed-text-v1','udever-bloom-1b1','udever-bloom-560m','universal-sentence-encoder-multilingual-3','universal-sentence-encoder-multilingual-large-3','unsup-simcse-bert-base-uncased','use-cmlm-multilingual','voyage-2','voyage-code-2','voyage-large-2-instruct','voyage-law-2','voyage-lite-01-instruct','voyage-lite-02-instruct','voyage-multilingual-2','xlm-roberta-base','xlm-roberta-large','NV-Retriever-v1','NV-Embed-v1','Linq-Embed-Mistral','Muennighoff/SGPT-1.3B-weightedmean-msmarco-specb-bitfit','Muennighoff/SGPT-125M-weightedmean-msmarco-specb-bitfit','Muennighoff/SGPT-125M-weightedmean-nli-bitfit','Muennighoff/SGPT-2.7B-weightedmean-msmarco-specb-bitfit','Muennighoff/SGPT-5.8B-weightedmean-msmarco-specb-bitfit','Muennighoff/SGPT-5.8B-weightedmean-nli-bitfit','DMetaSoul/sbert-chinese-general-v1','bigscience-data/sgpt-bloom-1b7-nli','bigscience/sgpt-bloom-7b1-msmarco','aari1995/German_Semantic_STS_V2','intfloat/e5-small','hkunlp/instructor-large','hkunlp/instructor-base','hkunlp/instructor-xl','intfloat/e5-base','intfloat/e5-large','Shimin/yiyouliao','vprelovac/universal-sentence-encoder-multilingual-large-3','vprelovac/universal-sentence-encoder-multilingual-3','vprelovac/universal-sentence-encoder-4','vprelovac/universal-sentence-encoder-large-5','ManiShankar-AlpesAi/paraphrase-multilingual-mpnet-base-v2-KE_Sieve','nickprock/mmarco-bert-base-italian-uncased','intfloat/e5-small-v2','intfloat/e5-base-v2','intfloat/e5-large-v2','intfloat/multilingual-e5-base','Shimin/LLaMA-embeeding','Forbu14/openai_clip_embeddings','shibing624/text2vec-base-multilingual','consciousAI/cai-lunaris-text-embeddings','consciousAI/cai-stellaris-text-embeddings','intfloat/multilingual-e5-small','intfloat/multilingual-e5-large','jinaai/jina-embedding-s-en-v1','jinaai/jina-embedding-b-en-v1','jinaai/jina-embedding-l-en-v1','deepfile/embedder-100p','lixsh6/XLM-3B5-embedding','lixsh6/XLM-0B6-embedding','thenlper/gte-base','thenlper/gte-large','thenlper/gte-small','lixsh6/MegatronBert-1B3-embedding','facebook/SONAR','Hum-Works/lodestone-base-4096-v1','sensenova/piccolo-base-zh','sensenova/piccolo-large-zh','infgrad/stella-base-zh','infgrad/stella-large-zh','BAAI/bge-reranker-base','BAAI/bge-base-en-v1.5','BAAI/bge-large-en-v1.5','BAAI/bge-small-en-v1.5','BAAI/bge-reranker-large','mgoin/all-MiniLM-L6-v2-ds','neuralmagic/bge-small-en-v1.5-sparse','jinaai/jina-embeddings-v2-base-en','jinaai/jina-embeddings-v2-small-en','neuralmagic/bge-small-en-v1.5-quant','nickprock/stsbm-sentence-flare-it','nickprock/mmarco-sentence-flare-it','neuralmagic/bge-base-en-v1.5-sparse','neuralmagic/bge-base-en-v1.5-quant','neuralmagic/bge-large-en-v1.5-sparse','neuralmagic/bge-large-en-v1.5-quant','TaylorAI/gte-tiny','TaylorAI/bge-micro','llmrails/ember-v1','TaylorAI/bge-micro-v2','zeroshot/gte-small-quant','infgrad/stella-large-zh-v2','infgrad/stella-base-zh-v2','zeroshot/gte-large-quant','zeroshot/gte-large-sparse','EdwardBurgin/paraphrase-multilingual-mpnet-base-v2','Amu/tao','infgrad/stella-base-en-v2','djovak/multi-qa-MiniLM-L6-cos-v1','izhx/udever-bloom-560m','izhx/udever-bloom-1b1','izhx/udever-bloom-3b','izhx/udever-bloom-7b1','thtang/ALL_862873','andersonbcdefg/bge-small-4096','Cohere/Cohere-embed-multilingual-light-v3.0','Cohere/Cohere-embed-multilingual-v3.0','Cohere/Cohere-embed-english-light-v3.0','Cohere/Cohere-embed-english-v3.0','Amu/tao-8k','thenlper/gte-large-zh','thenlper/gte-base-zh','thenlper/gte-small-zh','jamesgpt1/sf_model_e5','OrlikB/st-polish-kartonberta-base-alpha-v1','TownsWu/PEG','sdadas/mmlw-e5-small','sdadas/mmlw-e5-base','sdadas/mmlw-e5-large','sdadas/mmlw-roberta-base','sdadas/mmlw-roberta-large','jinaai/jina-embeddings-v2-base-code','aws-neuron/bge-base-en-v1-5-seqlen-384-bs-1','Erin/mist-zh','ClayAtlas/winberta-base','Pristinenlp/alime-reranker-large-zh','WhereIsAI/UAE-Large-V1','OrdalieTech/Solon-embeddings-large-0.1','ClayAtlas/winberta-large','intfloat/e5-mistral-7b-instruct','liujiarik/lim_base_zh','RookieHX/bge_m3e_stella','akarum/cloudy-large-zh','zhou-xl/bi-cse','lier007/xiaobu-embedding','jinaai/jina-embeddings-v2-base-zh','jinaai/jina-embeddings-v2-base-de','nomic-ai/nomic-embed-text-v1-ablated','nomic-ai/nomic-embed-text-v1-unsupervised','mukaj/fin-mpnet-base','Pristinenlp/alime-embedding-large-zh','pascalhuerten/instructor-skillfit','jinaai/jina-embeddings-v2-base-es','Salesforce/SFR-Embedding-Mistral','DMetaSoul/Dmeta-embedding-zh','Xenova/jina-embeddings-v2-base-zh','Xenova/jina-embeddings-v2-base-de','avsolatorio/GIST-Embedding-v0','nomic-ai/nomic-embed-text-v1','avsolatorio/GIST-all-MiniLM-L6-v2','avsolatorio/GIST-small-Embedding-v0','biswa921/bge-m3','Jechto/e5-dansk-test-0.1','intfloat/multilingual-e5-large-instruct','tanmaylaud/ret-phi2-v0','nomic-ai/nomic-embed-text-v1.5','GritLM/GritLM-7B','GritLM/GritLM-8x7B','avsolatorio/GIST-large-Embedding-v0','ClayAtlas/windberta-large','infgrad/stella-base-zh-v3-1792d','dunzhang/stella-large-zh-v3-1792d','jspringer/echo-mistral-7b-instruct-lasttoken','dunzhang/stella-mrl-large-zh-v3.5-1792d','sentosa/ZNV-Embedding','Nehc/e5-large-ru','neofung/m3e-ernie-xbase-zh','mixedbread-ai/mxbai-embed-2d-large-v1','mixedbread-ai/mxbai-embed-large-v1','aspire/acge_text_embedding','manu/sentence_croissant_alpha_v0.1','wongctroman/hktv-fine-tuned-cloudy-large-zh-metaphor14','manu/sentence_croissant_alpha_v0.2','mradermacher/GritLM-8x7B-GGUF','jhu-clsp/FollowIR-7B','DMetaSoul/Dmeta-embedding-zh-small','dwzhu/e5-base-4k','McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp','McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp','McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp','ChristianAzinn/uae-large-v1-gguf','ChristianAzinn/gist-large-embedding-v0-gguf','ChristianAzinn/bge-base-en-v1.5-gguf','ChristianAzinn/bge-small-en-v1.5-gguf','ChristianAzinn/bge-large-en-v1.5-gguf','ChristianAzinn/gte-base-gguf','ChristianAzinn/gte-large-gguf','ChristianAzinn/gte-small-gguf','ChristianAzinn/mxbai-embed-large-v1-gguf','ChristianAzinn/gist-small-embedding-v0-gguf','ChristianAzinn/e5-base-v2-gguf','ChristianAzinn/e5-large-v2-gguf','ChristianAzinn/e5-small-v2-gguf','ChristianAzinn/labse-gguf','srikanthmalla/BAAI-bge-reranker-large','Snowflake/snowflake-arctic-embed-m','manu/bge-m3-custom-fr','Snowflake/snowflake-arctic-embed-m-long','Snowflake/snowflake-arctic-embed-s','Snowflake/snowflake-arctic-embed-xs','Snowflake/snowflake-arctic-embed-l','ChristianAzinn/snowflake-arctic-embed-l-gguf','ChristianAzinn/snowflake-arctic-embed-m-long-GGUF','ChristianAzinn/snowflake-arctic-embed-m-gguf','ChristianAzinn/snowflake-arctic-embed-s-gguf','ChristianAzinn/snowflake-arctic-embed-xs-gguf','dwzhu/e5rope-base','pengql/checkpoint-9000','Alibaba-NLP/gte-base-en-v1.5','Alibaba-NLP/gte-large-en-v1.5','Alibaba-NLP/gte-Qwen1.5-7B-instruct','sensenova/piccolo-large-zh-v2','Mihaiii/gte-micro','NLPArtisan/qwen-1.8b-retrieval-test','Mihaiii/gte-micro-v2','Mihaiii/gte-micro-v3','Mihaiii/gte-micro-v4','Mihaiii/Taximetristi-2023','manu/sentence_croissant_alpha_v0.3','Mihaiii/Bulbasaur','Mihaiii/Ivysaur','manu/sentence_croissant_alpha_v0.4','Mihaiii/Venusaur','McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp','amazon/Titan-text-embeddings-v2','Mihaiii/Squirtle','Mihaiii/Wartortle','avsolatorio/NoInstruct-small-Embedding-v0','Mihaiii/test24','Mihaiii/test25','yessilver/new_model','fine-tuned/jina-embeddings-v2-base-en-03052024-r5ez-webapp','fine-tuned/jina-embeddings-v2-base-en-03052024-c20v-webapp','fine-tuned/jina-embeddings-v2-base-en-03052024-x8ew-webapp','fine-tuned/jina-embeddings-v2-base-en-03052024-73xx-webapp','fine-tuned/jina-embeddings-v2-base-en-03052024-21on-webapp','fine-tuned/jina-embeddings-v2-base-en-03052024-0swb-webapp','corto-ai/nomic-embed-text-v1','fine-tuned/jina-embeddings-v2-base-en-06052024-lmgf-webapp','fine-tuned/jina-embeddings-v2-base-en-06052024-6bdu-webapp','fine-tuned/jina-embeddings-v2-base-en-06052024-5pdj-webapp','fine-tuned/jina-embeddings-v2-base-en-06052024-yl1z-webapp','fine-tuned/jina-embeddings-v2-base-en-652024-vsmg-webapp','fine-tuned/jina-embeddings-v2-base-en-06052024-ruwi-webapp','fine-tuned/test','fine-tuned/jina-embeddings-v2-base-code-06052024-mhal-webapp','fine-tuned/jina-embeddings-v2-base-en-562024-j9xx-webapp','fine-tuned/jina-embeddings-v2-base-en-572024-xg53-webapp','fine-tuned/jina-embeddings-v2-base-en-202457-oc31-webapp','fine-tuned/scientific_papers_from_arxiv','fine-tuned/coding','fine-tuned/very_specific_technical_questions_about_Ubuntu','fine-tuned/CMedQAv2-reranking-improved','Labib11/MUG-B-1.6','shhy1995/AGE_Hybrid','fine-tuned/jina-embeddings-v2-base-en-10052024-lns6-webapp','fine-tuned/jinaai_jina-embeddings-v2-base-en-scientific-papers-from-arxiv','fine-tuned/jinaai_jina-embeddings-v2-base-code-askubuntu','fine-tuned/jinaai_jina-embeddings-v2-base-en-scidocs','fine-tuned/jinaai_jina-embeddings-v2-base-code-stackoverflow','fine-tuned/jina-embeddings-v2-base-en-5102024-kvgq-webapp','fine-tuned/jinaai_jina-embeddings-v2-base-zh-CMedQAv2','fine-tuned/jina-embeddings-v2-base-code-11_05_2024-hbxc-webapp','fine-tuned/jina-embeddings-v2-base-en-5102024-h7o7-webapp','fine-tuned/CMedQAv2-3','michaelfeil/jina-embeddings-v2-base-code','fine-tuned/jina-embeddings-v2-base-en-2024512-wvj9-webapp','fine-tuned/jina-embeddings-v2-base-en-5122024-3toh-webapp','MoMonir/SFR-Embedding-Mistral-GGUF','technicolor/Angle_BERT','fine-tuned/jina-embeddings-v2-base-en-2024513-kkxa-webapp','fine-tuned/jina-embeddings-v2-base-en-13052024-35bv-webapp','fine-tuned/jinaai_jina-embeddings-v2-base-code-jinaai_jina-embeddings-v2-base-cod','fine-tuned/jinaai_jina-embeddings-v2-base-en-jinaai_jina-embeddings-v2-base-en-sc','fine-tuned/jinaai_jina-embeddings-v2-base-zh-jinaai_jina-embeddings-v2-base-zh-CM','fine-tuned/jinaai_jina-embeddings-v2-base-zh-CMedQAv2-3','fine-tuned/scidocs','fine-tuned/askubuntu','fine-tuned/stackoverflow','fine-tuned/cmedqav2','fine-tuned/jina-embeddings-v2-base-en-13052024-ch9n-webapp','fine-tuned/askubuntu-c','fine-tuned/askubuntu-l','fine-tuned/scidocs-c','fine-tuned/stackoverflow-c','fine-tuned/cmedqav2-c','fine-tuned/norwegian-nli-triplets-c','AdrienB134/llm2vec-croissant-mntp','Erin/IYun-large-zh','fine-tuned/jina-embeddings-v2-base-en-14052024-5b5o-webapp','fine-tuned/jina-embeddings-v2-base-en-14052024-9xxb-webapp','fine-tuned/jina-embeddings-v2-base-en-14052024-afuz-webapp','fine-tuned/dutch-legal-c','AdrienB134/llm2vec-occiglot-mntp','fine-tuned/dutch-legal-c-64-24','w601sxs/b1ade-embed','fine-tuned/dutch-legal-c-1280-24','neofung/bge-reranker-large-1k','fine-tuned/askubuntu-c-128-24','fine-tuned/askubuntu-c-256-24','fine-tuned/stackoverflow-c-128-24','fine-tuned/cmedqav2-c-128-24','fine-tuned/scidocs-c-128-24','fine-tuned/dutch-legal-c-128-24','fine-tuned/scidocs-c-256-24','fine-tuned/stackoverflow-c-256-24','qihoo360/360Zhinao-search','fine-tuned/stackoverflow-c-64-24','fine-tuned/askubuntu-c-64-24','fine-tuned/scidocs-c-64-24','fine-tuned/cmedqav2-c-64-24','fine-tuned/jina-embeddings-v2-base-en-15052024-stsl-webapp','fine-tuned/jina-embeddings-v2-base-en-5152024-tsbl-webapp','fine-tuned/jina-embeddings-v2-base-en-5162024-o9um-webapp','fine-tuned/test-run','fine-tuned/stackoverflow-c-64-24-gpt-4o-2024-05-13','MoMonir/gte-Qwen1.5-7B-instruct-GGUF','fine-tuned/scidocs-c-64-24-gpt-4o-2024-05-133652','fine-tuned/askubuntu-c-64-24-gpt-4o-2024-05-135760','fine-tuned/stackoverflow-c-64-24-gpt-4o-2024-05-137765','fine-tuned/scidocs-c-64-24-gpt-4o-2024-05-13-46337','fine-tuned/askubuntu-c-64-24-gpt-4o-2024-05-131171','fine-tuned/scidocs-c-64-24-gpt-4o-2024-05-135334','fine-tuned/askubuntu-c-64-24-gpt-4o-2024-05-13-61285','fine-tuned/cmedqav2-c-64-24-gpt-4o-2024-05-13-50353','fine-tuned/jina-embeddings-v2-base-en-1752024-13s3-webapp','fine-tuned/jina-embeddings-v2-base-en-1752024-zdtc-webapp','fine-tuned/jina-embeddings-v2-base-en-17052024-uhub-webapp','neofung/bge-reranker-base-1k','fine-tuned/jina-embeddings-v2-base-en-17052024-dumr-webapp','fine-tuned/arguana-c-64-24-gpt-4o-2024-05-136897','fine-tuned/arguana-c-64-24-gpt-4o-2024-05-136538','fine-tuned/arguana-c-128-24-gpt-4o-2024-05-13-68212','fine-tuned/arguana-c-256-24-gpt-4o-2024-05-13-51550','fine-tuned/jina-embeddings-v2-base-en-19052024-oiu8-webapp','fine-tuned/jina-embeddings-v2-base-en-5192024-xqq9-webapp','fine-tuned/jina-embeddings-v2-base-en-5192024-qeye-webapp','fine-tuned/jina-embeddings-v2-base-en-5192024-seuc-webapp','qihoo360/360Zhinao-1.8B-Reranking','fine-tuned/jina-embeddings-v2-base-en-5202024-55bm-webapp','fine-tuned/arguana-c-256-24-gpt-4o-2024-05-13-693632','fine-tuned/arguana-c-256-24-gpt-4o-2024-05-13-819563','fine-tuned/arguana-c-256-24-gpt-4o-2024-05-13-413214','fine-tuned/arguana-c-256-24-gpt-4o-2024-05-13-129048','fine-tuned/arguana-c-256-24-gpt-4o-2024-05-13-550302','fine-tuned/arguana-c-256-24-gpt-4o-2024-05-13-799305','fine-tuned/jina-embeddings-v2-base-en-5202024-6tkj-webapp','fine-tuned/arguana-c-256-24-gpt-4o-2024-05-13-264015','fine-tuned/arguana-c-256-24-gpt-4o-2024-05-13-994439','fine-tuned/jina-embeddings-v2-base-en-5202024-rxyq-webapp','jinaai/jina-clip-v1','fine-tuned/jina-embeddings-v2-base-en-21052024-5qm5-webapp','dayyass/universal-sentence-encoder-multilingual-large-3-pytorch','fine-tuned/jina-embeddings-v2-base-en-21052024-5smg-webapp','fine-tuned/jina-embeddings-v2-base-en-22052024-vuno-webapp','fine-tuned/arguana-c-256-24-gpt-4o-2024-05-13-387094','fine-tuned/LegalBenchConsumerContractsQA-256-24-gpt-4o-2024-05-13-292605','fine-tuned/LegalBenchCorporateLobbying-256-24-gpt-4o-2024-05-13-296144','fine-tuned/SciFact-256-24-gpt-4o-2024-05-13-454852','fine-tuned/SCIDOCS-256-24-gpt-4o-2024-05-13-79875','fine-tuned/TRECCOVID-256-24-gpt-4o-2024-05-13-190413','fine-tuned/NFCorpus-256-24-gpt-4o-2024-05-13-727361','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-410031','fine-tuned/jina-embeddings-v2-base-code-5222024-i8af-webapp','fine-tuned/jina-embeddings-v2-base-en-5222024-hkde-webapp','fine-tuned/NFCorpus-256-24-gpt-4o-2024-05-13-14719','fine-tuned/SciFact-256-24-gpt-4o-2024-05-13-526066','fine-tuned/SCIDOCS-256-24-gpt-4o-2024-05-13-10630','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-825318','nvidia/NV-Embed-v1','fine-tuned/NFCorpus-256-24-gpt-4o-2024-05-13-203779','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-497939','fine-tuned/SCIDOCS-256-24-gpt-4o-2024-05-13-417900','fine-tuned/SciFact-256-24-gpt-4o-2024-05-13-994884','fine-tuned/jina-embeddings-v2-base-en-23052024-hbdj-webapp','fine-tuned/jina-embeddings-v2-base-en-23052024-6kfw-webapp','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-214114','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-587313','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-36954','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-814821','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-256742','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-317735','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-378237','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-992459','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-552473','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-816730','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-875153','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-630221','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-214478','fine-tuned/NFCorpus-256-24-gpt-4o-2024-05-13-645586','fine-tuned/SciFact-256-24-gpt-4o-2024-05-13-786584','fine-tuned/SciFact-256-24-gpt-4o-2024-05-13-785172','fine-tuned/NFCorpus-256-24-gpt-4o-2024-05-13-687872','fine-tuned/BAAI_bge-small-en-v1_5-23052024-upq5-webapp','fine-tuned/NFCorpus-8-8-gpt-4o-2024-05-13-855191','fine-tuned/NFCorpus-8-8-gpt-4o-2024-05-13-978964','fine-tuned/NFCorpus-8-8-gpt-4o-2024-05-13-847943','fine-tuned/NFCorpus-8-8-gpt-4o-2024-05-13-449863','fine-tuned/NFCorpus-8-8-gpt-4o-2024-05-13-610535','fine-tuned/NFCorpus-8-8-gpt-4o-2024-05-13-322852','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-898550','fine-tuned/NFCorpus-256-24-gpt-4o-2024-05-13-546049','fine-tuned/SciFact-256-24-gpt-4o-2024-05-13-499715','fine-tuned/SCIDOCS-256-24-gpt-4o-2024-05-13-598568','fine-tuned/BAAI_bge-large-en-v1_5-5242024-5uvy-webapp','fine-tuned/SciFact-256-24-gpt-4o-2024-05-13-304829','fine-tuned/NFCorpus-256-24-gpt-4o-2024-05-13-138515','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-269096','fine-tuned/SCIDOCS-256-24-gpt-4o-2024-05-13-778232','fine-tuned/SciFact-256-24-gpt-4o-2024-05-13-111876','fine-tuned/SCIDOCS-256-24-gpt-4o-2024-05-13-292803','fine-tuned/NFCorpus-256-24-gpt-4o-2024-05-13-96776','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-67198','fine-tuned/SciFact-256-24-gpt-4o-2024-05-13-310581','fine-tuned/SciFact-256-24-gpt-4o-2024-05-13-449834','fine-tuned/NFCorpus-256-24-gpt-4o-2024-05-13-737659','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-976783','fine-tuned/SciFact-256-24-gpt-4o-2024-05-13-27685','fine-tuned/SCIDOCS-256-24-gpt-4o-2024-05-13-54716','fine-tuned/NFCorpus-256-24-gpt-4o-2024-05-13-166315','fine-tuned/SciFact-256-24-gpt-4o-2024-05-13-812157','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-133486','fine-tuned/SCIDOCS-256-24-gpt-4o-2024-05-13-423936','fine-tuned/NFCorpus-256-24-gpt-4o-2024-05-13-772252','w601sxs/b1ade-embed-kd','fine-tuned/NFCorpus-256-24-gpt-4o-2024-05-13-141246','fine-tuned/SciFact-256-24-gpt-4o-2024-05-13-478897','fine-tuned/ArguAna-256-24-gpt-4o-2024-05-13-952023','fine-tuned/SCIDOCS-256-24-gpt-4o-2024-05-13-157892','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-780826','fine-tuned/TRECCOVID-256-24-gpt-4o-2024-05-13-475598','fine-tuned/QuoraRetrieval-256-24-gpt-4o-2024-05-13-635320','fine-tuned/Touche2020-256-24-gpt-4o-2024-05-13-27907','fine-tuned/BAAI_bge-small-en-v1_5-5252024-jzfp-webapp','fine-tuned/TRECCOVID-256-24-gpt-4o-2024-05-13-953989','fine-tuned/ArguAna-256-24-gpt-4o-2024-05-13-413991','fine-tuned/QuoraRetrieval-256-24-gpt-4o-2024-05-13-80208','fine-tuned/SciFact-256-24-gpt-4o-2024-05-13-484582','fine-tuned/FiQA2018-256-24-gpt-4o-2024-05-13-919917','fine-tuned/NFCorpus-256-24-gpt-4o-2024-05-13-988957','fine-tuned/SCIDOCS-256-24-gpt-4o-2024-05-13-597314','fine-tuned/TRECCOVID-256-24-gpt-4o-2024-05-13-896673','fine-tuned/ArguAna-256-24-gpt-4o-2024-05-13-689823','fine-tuned/BAAI_bge-small-en-v1_5-5272024-2fs4-webapp','fine-tuned/BAAI_bge-small-en-v1_5-27052024-4e8w-webapp','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-890333','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-140539','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-2499','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-733782','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-221689','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-465198','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-698531','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-548936','fine-tuned/BAAI_bge-small-en-v1_5-5272024-ou25-webapp','agier9/UAE-Large-V1-Q5_K_S-GGUF','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-69882','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-822545','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-268697','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-43315','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-866232','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-580978','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-115380','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-985263','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-439294','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-607244','fine-tuned/TRECCOVID-512-192-gpt-4o-2024-05-13-347397','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-650620','fine-tuned/QuoraRetrieval-512-192-gpt-4o-2024-05-13-777321','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-73934','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-14571','fine-tuned/TRECCOVID-512-192-gpt-4o-2024-05-13-653452','fine-tuned/QuoraRetrieval-512-192-gpt-4o-2024-05-13-768442','fine-tuned/BAAI_bge-small-en-v1_5-5282024-hkt5-webapp','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-100928','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-906438','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-266507','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-93805','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-424608','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-710799','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-357185','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-873132','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-452456','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-143735','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-625238','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-186741','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-935443','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-418918','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-110174','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-859511','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-437825','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-986812','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-37395','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-591725','fine-tuned/BAAI_bge-small-en-v1_5-2852024-6p16-webapp','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-93651135','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-89953157','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-23636059','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-83930416','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-27692546','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-76823162','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-89836585','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-28032241','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-34914559','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-10552781','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-44219785','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-60453771','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-34917964','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-24541174','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-20151707','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-26543668','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-3292683','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-14028623','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-378068','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-27258064','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-79168271','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-80780135','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-42468142','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-47583376','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-80745457','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-34699555','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-35912','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-6089388','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-43473113','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-31581583','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-79659206','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-51211577','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-53785794','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-37851926','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-93507731','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-24464680','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-1134151','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-87401391','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-76679499','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-58211433','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-56351634','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-87403910','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-67485775','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-8421720','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-50444055','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-67948597','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-63275487','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-90390391','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-16241583','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-86331274','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-53403987','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-3465370','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-19100452','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-83904142','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-37125303','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-94762694','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-20768519','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-35609715','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-14003539','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-16083606','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-3973638','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-76839538','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-90164285','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-52015789','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-93248154','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-74504128','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-65608189','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-92012085','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-34898812','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-59792256','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-26737110','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-41821758','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-29425597','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-12907987','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-34642434','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-65268203','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-85722278','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-7975202','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-62563104','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-22039677','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-80948573','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-48400660','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-10086588','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-11626257','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-5953538','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-68485784','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-51991531','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-81928581','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-6825910','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-52686172','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-63983441','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-76979764','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-25305323','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-89774081','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-99342737','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-97839788','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-52238558','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-486134','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-46607440','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-80802988','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-67820659','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-37230491','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-64924747','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-17390035','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-66909812','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-67941497','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-95714065','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-65992666','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-89826544','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-74939490','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-16883408','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-40695234','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-68577224','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-47339454','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-36338558','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-17911388','fine-tuned/FiQA2018-512-192-gpt-4o-2024-05-13-97777963','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-51883844','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-24419258','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-34427772','Linq-AI-Research/Linq-Embed-Mistral','fine-tuned/ArguAna-512-192-gpt-4o-2024-05-13-14562627','fine-tuned/SCIDOCS-512-192-gpt-4o-2024-05-13-37833293','fine-tuned/before-finetuning-512-192-gpt-4o-2024-05-13-65274313','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-99421248','fine-tuned/NFCorpus-512-192-gpt-4o-2024-05-13-67596481','fine-tuned/SciFact-512-192-gpt-4o-2024-05-13-3038586','fine-tuned/before-finetuning-32000-384-gpt-4o-2024-05-13-18360524','fine-tuned/before-finetuning-32000-384-gpt-4o-2024-05-13-73143156','fine-tuned/before-finetuning-32000-384-gpt-4o-2024-05-13-20584918','fine-tuned/FiQA2018-32000-384-gpt-4o-2024-05-13-52831585','fine-tuned/SciFact-32000-384-gpt-4o-2024-05-13-45622553','fine-tuned/NFCorpus-32000-384-gpt-4o-2024-05-13-45587246','fine-tuned/ArguAna-32000-384-gpt-4o-2024-05-13-39088299','fine-tuned/SCIDOCS-32000-384-gpt-4o-2024-05-13-5483216','fine-tuned/FiQA2018-32000-384-gpt-4o-2024-05-13-23538198','fine-tuned/NFCorpus-32000-384-gpt-4o-2024-05-13-94858978','fine-tuned/SciFact-32000-384-gpt-4o-2024-05-13-25926506','fine-tuned/ArguAna-32000-384-gpt-4o-2024-05-13-60385830','fine-tuned/SCIDOCS-32000-384-gpt-4o-2024-05-13-19472313','fine-tuned/NFCorpus-32000-384-gpt-4o-2024-05-13-1216656','fine-tuned/before-finetuning-32000-384-gpt-4o-2024-05-13-39265981','fine-tuned/SciFact-32000-384-gpt-4o-2024-05-13-76083984','fine-tuned/SCIDOCS-32000-384-gpt-4o-2024-05-13-97946708','fine-tuned/FiQA2018-32000-384-gpt-4o-2024-05-13-66633416','fine-tuned/ArguAna-32000-384-gpt-4o-2024-05-13-13220755','fine-tuned/NFCorpus-32000-384-gpt-4o-2024-05-13-62034393','Classical/Yinka','fine-tuned/BAAI_bge-small-en-v1_5-30052024-rc2l-webapp','fine-tuned/ArguAna-32000-384-gpt-4o-2024-05-13-55034819','twadada/tst','fine-tuned/NFCorpus-32000-384-gpt-4o-2024-05-13-2553188','fine-tuned/FiQA2018-32000-384-gpt-4o-2024-05-13-28832324','fine-tuned/ArguAna-32000-384-gpt-4o-2024-05-13-50573159','fine-tuned/SCIDOCS-32000-384-gpt-4o-2024-05-13-38097330','fine-tuned/SciFact-32000-384-gpt-4o-2024-05-13-66747460','fine-tuned/NFCorpus-32000-384-gpt-4o-2024-05-13-48618256','fine-tuned/BAAI_bge-small-en-v1_5-612024-vf79-webapp','fine-tuned/BAAI_bge-small-en-v1_5-632024-34lw-webapp','corto-ai/bge-reranker-large-onnx','fine-tuned/BAAI_bge-small-en-v1_5-04062024-hsmq-webapp','iampanda/zpoint_large_embedding_zh','silverjam/jina-embeddings-v2-base-zh','fine-tuned/jinaai_jina-embeddings-v2-base-en-05062024-16gq-webapp','fine-tuned/jinaai_jina-embeddings-v2-base-en-05062024-445b-webapp','neofung/LdIR-reranker-large','fine-tuned/jinaai_jina-embeddings-v2-base-en-05062024-zvoa-webapp','fine-tuned/BAAI_bge-small-en-v1_5-05062024-x987-webapp','fine-tuned/deepspeed-from-new-new-docker','fine-tuned/before-finetuning-32000-384-gpt-4o-2024-05-13-86786922','fine-tuned/before-finetuning-32000-384-gpt-4o-2024-05-13-59074949','fine-tuned/before-finetuning-32000-384-gpt-4o-2024-05-13-55567015','fine-tuned/before-finetuning-32000-384-gpt-4o-2024-05-13-67199932','fine-tuned/before-finetuning-32000-384-gpt-4o-2024-05-13-24297328','fine-tuned/NFCorpus-32000-384-gpt-4o-2024-05-13-81211802','fine-tuned/before-finetuning-32000-384-gpt-4o-2024-05-13-7385160','fine-tuned/FiQA2018-32000-384-gpt-4o-2024-05-13-74794049','fine-tuned/SciFact-32000-384-gpt-4o-2024-05-13-42885533','fine-tuned/NFCorpus-32000-384-gpt-4o-2024-05-13-27359624','fine-tuned/ArguAna-32000-384-gpt-4o-2024-05-13-35162543','fine-tuned/SCIDOCS-32000-384-gpt-4o-2024-05-13-33133286','fine-tuned/FiQA2018-32000-384-gpt-4o-2024-05-13-83115388','fine-tuned/SciFact-32000-384-gpt-4o-2024-05-13-41822019','fine-tuned/NFCorpus-32000-384-gpt-4o-2024-05-13-66131574','fine-tuned/ArguAna-32000-384-gpt-4o-2024-05-13-68388407','fine-tuned/SCIDOCS-32000-384-gpt-4o-2024-05-13-71434542','fine-tuned/before-finetuning-32000-384-gpt-4o-2024-05-13-6875032','fine-tuned/before-finetuning-32000-384-gpt-4o-2024-05-13-91940173','fine-tuned/NFCorpus-32000-384-gpt-4o-2024-05-13-70846146','fine-tuned/BAAI_bge-large-en-v1_5-672024-v51y-webapp','Gameselo/STS-multilingual-mpnet-base-v2','itod/UAE-Large-V1-Q8_0-GGUF','fine-tuned/jinaai_jina-embeddings-v2-base-en-08062024-z8ik-webapp','fine-tuned/jinaai_jina-embeddings-v2-base-en-202469-tgjk-webapp','liddlefish/privacy_embedding_rag_10k_base_checkpoint_2','liddlefish/privacy_embedding_rag_10k_base_final','w601sxs/b1ade-embed-kd_3','fine-tuned/jinaai_jina-embeddings-v2-base-en-6112024-fmxr-webapp','liddlefish/privacy_embedding_rag_10k_base_15_final','liddlefish/privacy_embedding_rag_10k_base_12_final','fine-tuned/BAAI_bge-m3-6122024-ibs3-webapp','fine-tuned/BAAI_bge-m3-2024__6__12_-1217-webapp','fine-tuned/jinaai_jina-embeddings-v2-base-es-6122024-fv1x-webapp','fine-tuned/jinaai_jina-embeddings-v2-base-en-6122024-bhm2-webapp','fine-tuned/BAAI_bge-large-en-v1_5-1362024-2wos-webapp','raghavlight/TDTE','fine-tuned/jinaai_jina-embeddings-v2-base-en-6132024-wvrg-webapp','fine-tuned/jinaai_jina-embeddings-v2-base-en-6132024-bez1-webapp','fine-tuned/BAAI_bge-large-en-1362024-gcw6-webapp','fine-tuned/BAAI_bge-base-en-1362024-n19c-webapp','fine-tuned/BAAI_bge-m3-1362024-m82b-webapp','fine-tuned/jinaai_jina-embeddings-v2-base-en-6142024-huet-webapp','fine-tuned/BAAI_bge-m3-6142024-0ndt-webapp','fine-tuned/BAAI_bge-large-en-v1_5-14062024-fimj-webapp','Omartificial-Intelligence-Space/Arabic-all-nli-triplet-Matryoshka','CAiRE/UniVaR-lambda-80','CAiRE/UniVaR-lambda-20','CAiRE/UniVaR-lambda-5','CAiRE/UniVaR-lambda-1','fine-tuned/BAAI_bge-large-en-v1_5-14062024-xdwa-webapp','Salesforce/SFR-Embedding-2_R','fine-tuned/BAAI_bge-large-en-15062024-atex-webapp','fine-tuned/jinaai_jina-embeddings-v2-base-en-2024615-ioyu-webapp','ILKT/2024-06-15_10-09-42','Alibaba-NLP/gte-Qwen2-7B-instruct','fine-tuned/BAAI_bge-large-en-v1_5-1562024-to89-webapp','Omartificial-Intelligence-Space/Arabic-mpnet-base-all-nli-triplet','fine-tuned/jinaai_jina-embeddings-v2-base-en-6162024-xxse-webapp','Omartificial-Intelligence-Space/Arabic-labse-Matryoshka','Omartificial-Intelligence-Space/Arabert-all-nli-triplet-Matryoshka','Omartificial-Intelligence-Space/Marbert-all-nli-triplet-Matryoshka','ILKT/2024-06-17_21-37-12','fine-tuned/BAAI_bge-small-en-v1_5-18062024-56t5-webapp','ILKT/2024-06-19_08-22-22','ILKT/2024-06-19_10-03-38','fine-tuned/jinaai_jina-embeddings-v2-base-en-6192024-56os-webapp','ILKT/2024-06-19_21-12-17','ILKT/2024-06-19_22-27-15','ILKT/2024-06-19_22-23-38','fine-tuned/jinaai_jina-embeddings-v2-base-en-20062024-djhb-webapp','fine-tuned/jinaai_jina-embeddings-v2-base-en-20062024-t2n9-webapp','ILKT/2024-06-20_12-31-59','fine-tuned/BAAI_bge-large-en-2062024-u43q-webapp','ILKT/2024-06-20_12-31-55','tomaarsen/jina-clip-v1-st','tomaarsen/jina-clip-v1-st-remote','fine-tuned/jinaai_jina-embeddings-v2-base-en-6212024-p8j6-webapp','ILKT/2024-06-22_12-37-29_epoch_1','ILKT/2024-06-22_12-37-29_epoch_2','ILKT/2024-06-22_12-37-29_epoch_3','ILKT/2024-06-22_12-37-29_epoch_4','ILKT/2024-06-22_12-37-29_epoch_5','fine-tuned/jinaai_jina-embeddings-v2-base-es-22062024-taeu-webapp','ILKT/2024-06-22_12-37-29_epoch_6','ILKT/2024-06-22_12-37-29_epoch_7','ILKT/2024-06-22_12-37-29_epoch_8','ILKT/2024-06-22_12-37-29_epoch_9','ILKT/2024-06-22_12-37-29_epoch_10','ILKT/2024-06-22_12-37-29_epoch_11','ILKT/2024-06-22_12-37-29_epoch_12','fine-tuned/jinaai_jina-embeddings-v2-base-en-6232024-zldx-webapp','ILKT/2024-06-22_12-37-29_epoch_13','ILKT/2024-06-22_12-37-29_epoch_14','ILKT/2024-06-23_09-09-07_epoch_1','ILKT/2024-06-22_12-37-29_epoch_15','ILKT/2024-06-23_09-09-07_epoch_2','ILKT/2024-06-23_09-09-07_epoch_3','ILKT/2024-06-23_09-09-07_epoch_4','ILKT/2024-06-23_09-09-07_epoch_5','ILKT/2024-06-23_09-09-07_epoch_6','ILKT/2024-06-23_09-09-07_epoch_7','ILKT/2024-06-23_09-09-07_epoch_8','fine-tuned/BAAI_bge-m3-6232024-4vtf-webapp','ILKT/2024-06-23_09-09-07_epoch_9','ILKT/2024-06-24_00-11-56_epoch_1','ILKT/2024-06-23_09-09-07_epoch_10','ILKT/2024-06-24_00-11-56_epoch_2','ILKT/2024-06-23_09-09-07_epoch_11','ILKT/2024-06-24_00-11-56_epoch_3','ILKT/2024-06-24_00-11-56_epoch_4','ILKT/2024-06-23_09-09-07_epoch_12','ILKT/2024-06-24_00-11-56_epoch_5','ILKT/2024-06-23_09-09-07_epoch_13','ILKT/2024-06-24_00-11-56_epoch_6','ILKT/2024-06-24_00-11-56_epoch_7','Lajavaness/bilingual-embedding-large','fine-tuned/jinaai_jina-embeddings-v2-base-en-24_06_2024-lrip-webapp','ILKT/2024-06-24_22-31-18_epoch_1','ILKT/2024-06-24_22-31-28_epoch_1','ILKT/2024-06-24_22-31-18_epoch_2','ILKT/2024-06-24_22-31-28_epoch_2','ILKT/2024-06-24_22-31-18_epoch_3','ILKT/2024-06-24_22-31-28_epoch_3','ILKT/2024-06-24_22-31-18_epoch_4','ILKT/2024-06-24_22-31-28_epoch_4','ILKT/2024-06-24_22-31-18_epoch_5','ILKT/2024-06-24_22-31-28_epoch_5','ILKT/2024-06-24_22-31-18_epoch_6','ILKT/2024-06-24_22-31-28_epoch_6','ILKT/2024-06-24_22-31-18_epoch_7','ILKT/2024-06-24_22-31-28_epoch_7','ILKT/2024-06-24_22-31-18_epoch_8','ILKT/2024-06-24_22-31-28_epoch_8','ILKT/2024-06-24_22-31-18_epoch_9','ILKT/2024-06-24_22-31-28_epoch_9','ILKT/2024-06-24_22-31-18_epoch_10','ILKT/2024-06-24_22-31-28_epoch_10','ILKT/2024-06-24_22-31-18_epoch_11','ILKT/2024-06-24_22-31-28_epoch_11','ILKT/2024-06-24_22-31-18_epoch_12','ILKT/2024-06-24_22-31-28_epoch_12','ILKT/2024-06-24_22-31-18_epoch_13','ILKT/2024-06-24_22-31-28_epoch_13','ILKT/2024-06-24_22-31-18_epoch_14','ILKT/2024-06-24_22-31-28_epoch_14','ILKT/2024-06-24_22-31-18_epoch_15','ILKT/2024-06-24_22-31-28_epoch_15','ILKT/2024-06-24_22-31-18_epoch_16','ILKT/2024-06-24_22-31-28_epoch_16','ILKT/2024-06-24_22-31-18_epoch_17','ILKT/2024-06-24_22-31-28_epoch_17','ILKT/2024-06-24_22-31-18_epoch_18','ILKT/2024-06-24_22-31-28_epoch_18','ILKT/2024-06-24_22-31-18_epoch_19','ILKT/2024-06-24_22-31-28_epoch_19','ILKT/2024-06-24_22-31-18_epoch_20','ILKT/2024-06-24_22-31-28_epoch_20','ILKT/2024-06-24_22-31-18_epoch_21','ILKT/2024-06-24_22-31-28_epoch_21','ILKT/2024-06-24_22-31-18_epoch_22','ILKT/2024-06-24_22-31-28_epoch_22','ILKT/2024-06-24_22-31-18_epoch_23','ILKT/2024-06-24_22-31-28_epoch_23','ILKT/2024-06-24_22-31-18_epoch_24','ILKT/2024-06-24_22-31-28_epoch_24','ILKT/2024-06-24_22-31-18_epoch_25','ILKT/2024-06-24_22-31-28_epoch_25','ILKT/2024-06-24_22-31-18_epoch_26','ILKT/2024-06-24_22-31-28_epoch_26','ILKT/2024-06-24_22-31-18_epoch_27','ILKT/2024-06-24_22-31-28_epoch_27','ILKT/2024-06-24_22-31-18_epoch_28','ILKT/2024-06-24_22-31-28_epoch_28','ILKT/2024-06-24_22-31-18_epoch_29','ILKT/2024-06-24_22-31-28_epoch_29','ILKT/2024-06-24_22-31-18_epoch_30','Lenovo-Zhihui/Zhihui_LLM_Embedding','ILKT/2024-06-24_22-31-28_epoch_30','ILKT/2024-06-24_22-31-18_epoch_31','ILKT/2024-06-24_22-31-28_epoch_31','ILKT/2024-06-24_22-31-18_epoch_32','ILKT/2024-06-24_22-31-28_epoch_32','ILKT/2024-06-24_22-31-18_epoch_33','ILKT/2024-06-24_22-31-28_epoch_33','ILKT/2024-06-24_22-31-18_epoch_34','ILKT/2024-06-24_22-31-28_epoch_34','ILKT/2024-06-24_22-31-18_epoch_35','ILKT/2024-06-24_22-31-28_epoch_35','ILKT/2024-06-24_22-31-18_epoch_36','ILKT/2024-06-24_22-31-28_epoch_36','ILKT/2024-06-24_22-31-18_epoch_37','ILKT/2024-06-24_22-31-28_epoch_37','ILKT/2024-06-24_22-31-18_epoch_38','ILKT/2024-06-24_22-31-28_epoch_38','ILKT/2024-06-24_22-31-18_epoch_39','ILKT/2024-06-24_22-31-28_epoch_39','ILKT/2024-06-24_22-31-18_epoch_40','ILKT/2024-06-24_22-31-28_epoch_40','ILKT/2024-06-24_22-31-18_epoch_41','ILKT/2024-06-24_22-31-28_epoch_41','ILKT/2024-06-24_22-31-18_epoch_42','ILKT/2024-06-24_22-31-28_epoch_42','ILKT/2024-06-24_22-31-18_epoch_43','ILKT/2024-06-24_22-31-28_epoch_43','ILKT/2024-06-24_22-31-18_epoch_44','ILKT/2024-06-24_22-31-28_epoch_44','ILKT/2024-06-24_22-31-18_epoch_45','ILKT/2024-06-24_22-31-28_epoch_45','ILKT/2024-06-24_22-31-18_epoch_46','ILKT/2024-06-24_22-31-28_epoch_46','ILKT/2024-06-24_22-31-18_epoch_47','ILKT/2024-06-24_22-31-28_epoch_47','ILKT/2024-06-24_22-31-18_epoch_48','ILKT/2024-06-24_22-31-28_epoch_48','ILKT/2024-06-24_22-31-18_epoch_49','ILKT/2024-06-24_22-31-28_epoch_49','ILKT/2024-06-24_22-31-18_epoch_50','ILKT/2024-06-24_22-31-28_epoch_50','ILKT/2024-06-24_22-31-18_epoch_51','ILKT/2024-06-24_22-31-28_epoch_51','ILKT/2024-06-24_22-31-18_epoch_52','ILKT/2024-06-24_22-31-28_epoch_52','ILKT/2024-06-24_22-31-18_epoch_53','ILKT/2024-06-24_22-31-28_epoch_53','ILKT/2024-06-24_22-31-18_epoch_54','ILKT/2024-06-24_22-31-28_epoch_54','ILKT/2024-06-24_22-31-18_epoch_55','ILKT/2024-06-24_22-31-28_epoch_55','ILKT/2024-06-24_22-31-18_epoch_56','ILKT/2024-06-24_22-31-28_epoch_56','ILKT/2024-06-24_22-31-18_epoch_57','ILKT/2024-06-24_22-31-28_epoch_57','ILKT/2024-06-24_22-31-18_epoch_58','ILKT/2024-06-24_22-31-28_epoch_58','ILKT/2024-06-24_22-31-18_epoch_59','ILKT/2024-06-24_22-31-28_epoch_59','ILKT/2024-06-24_22-31-18_epoch_60','ILKT/2024-06-24_22-31-28_epoch_60','ILKT/2024-06-24_22-31-18_epoch_61','ILKT/2024-06-24_22-31-28_epoch_61','ILKT/2024-06-24_22-31-18_epoch_62','ILKT/2024-06-24_22-31-28_epoch_62','ILKT/2024-06-24_22-31-18_epoch_63','ILKT/2024-06-24_22-31-28_epoch_63','ILKT/2024-06-24_22-31-18_epoch_64','ILKT/2024-06-24_22-31-28_epoch_64','ILKT/2024-06-24_22-31-18_epoch_65','ILKT/2024-06-24_22-31-28_epoch_65','ILKT/2024-06-24_22-31-18_epoch_66','ILKT/2024-06-24_22-31-28_epoch_66','ILKT/2024-06-24_22-31-18_epoch_67','Omartificial-Intelligence-Space/Arabic-MiniLM-L12-v2-all-nli-triplet','ILKT/2024-06-24_22-31-28_epoch_67','ILKT/2024-06-24_22-31-18_epoch_68','ILKT/2024-06-24_22-31-28_epoch_68','ILKT/2024-06-24_22-31-18_epoch_69','ILKT/2024-06-24_22-31-28_epoch_69','ILKT/2024-06-24_22-31-18_epoch_70','ILKT/2024-06-24_22-31-28_epoch_70','ILKT/2024-06-24_22-31-18_epoch_71','ILKT/2024-06-24_22-31-28_epoch_71','ILKT/2024-06-24_22-31-18_epoch_72','ILKT/2024-06-24_22-31-28_epoch_72','ILKT/2024-06-24_22-31-18_epoch_73','ILKT/2024-06-24_22-31-28_epoch_73','ILKT/2024-06-24_22-31-18_epoch_74','ILKT/2024-06-24_22-31-28_epoch_74','ILKT/2024-06-24_22-31-18_epoch_75','Intel/neural-embedding-v1','ILKT/2024-06-24_22-31-28_epoch_75','fine-tuned/BAAI_bge-m3-26062024-gdon-webapp','Lajavaness/bilingual-embedding-base','fine-tuned/jinaai_jina-embeddings-v2-base-es-6262024-yjwm-webapp','fine-tuned/jinaai_jina-embeddings-v2-base-en-6262024-wtkc-webapp','fine-tuned/jinaai_jina-embeddings-v2-base-en-6272024-qn9b-webapp','BeastyZ/e5-R-mistral-7b','ILKT/2024-06-23_09-09-07_epoch_14','ILKT/2024-06-23_09-09-07_epoch_15','ILKT/2024-06-23_09-09-07_epoch_16','ILKT/2024-06-23_09-09-07_epoch_17','ILKT/2024-06-23_09-09-07_epoch_18','ILKT/2024-06-23_09-09-07_epoch_19','ILKT/2024-06-23_09-09-07_epoch_20','ILKT/2024-06-23_09-09-07_epoch_21','ILKT/2024-06-23_09-09-07_epoch_22','ILKT/2024-06-23_09-09-07_epoch_23','ILKT/2024-06-23_09-09-07_epoch_24','ILKT/2024-06-23_09-09-07_epoch_25','ILKT/2024-06-23_09-09-07_epoch_26','ILKT/2024-06-23_09-09-07_epoch_27','ILKT/2024-06-23_09-09-07_epoch_28','ILKT/2024-06-23_09-09-07_epoch_29','ILKT/2024-06-23_09-09-07_epoch_30','ILKT/2024-06-23_09-09-07_epoch_31','ILKT/2024-06-23_09-09-07_epoch_32','ILKT/2024-06-23_09-09-07_epoch_33','ILKT/2024-06-23_09-09-07_epoch_34','ILKT/2024-06-23_09-09-07_epoch_35','ILKT/2024-06-23_09-09-07_epoch_36','ILKT/2024-06-23_09-09-07_epoch_37','ILKT/2024-06-23_09-09-07_epoch_38','ILKT/2024-06-23_09-09-07_epoch_39','ILKT/2024-06-23_09-09-07_epoch_40','ILKT/2024-06-23_09-09-07_epoch_41','ILKT/2024-06-23_09-09-07_epoch_42','ILKT/2024-06-23_09-09-07_epoch_43','ILKT/2024-06-23_09-09-07_epoch_44','ILKT/2024-06-23_09-09-07_epoch_45','ILKT/2024-06-23_09-09-07_epoch_46','ILKT/2024-06-23_09-09-07_epoch_47','ILKT/2024-06-23_09-09-07_epoch_48','ILKT/2024-06-23_09-09-07_epoch_49','ILKT/2024-06-23_09-09-07_epoch_50','ILKT/2024-06-23_09-09-07_epoch_51','ILKT/2024-06-23_09-09-07_epoch_52','ILKT/2024-06-23_09-09-07_epoch_53','ILKT/2024-06-23_09-09-07_epoch_54','ILKT/2024-06-23_09-09-07_epoch_55','ILKT/2024-06-23_09-09-07_epoch_56','ILKT/2024-06-23_09-09-07_epoch_57','ILKT/2024-06-23_09-09-07_epoch_58','ILKT/2024-06-23_09-09-07_epoch_59','ILKT/2024-06-23_09-09-07_epoch_60','ILKT/2024-06-23_09-09-07_epoch_61','ILKT/2024-06-23_09-09-07_epoch_62','ILKT/2024-06-23_09-09-07_epoch_63','ILKT/2024-06-23_09-09-07_epoch_64','ILKT/2024-06-23_09-09-07_epoch_65','ILKT/2024-06-23_09-09-07_epoch_66','ILKT/2024-06-23_09-09-07_epoch_67','ILKT/2024-06-23_09-09-07_epoch_68','ILKT/2024-06-23_09-09-07_epoch_69','ILKT/2024-06-23_09-09-07_epoch_70','ILKT/2024-06-23_09-09-07_epoch_71','ILKT/2024-06-23_09-09-07_epoch_72','ILKT/2024-06-23_09-09-07_epoch_73','ILKT/2024-06-23_09-09-07_epoch_74','ILKT/2024-06-23_09-09-07_epoch_75','Pekarnick/e5-large-v2-Q4_K_M-GGUF','ILKT/2024-06-24_00-11-56_epoch_8','ILKT/2024-06-24_00-11-56_epoch_9','ILKT/2024-06-24_00-11-56_epoch_10','ILKT/2024-06-24_00-11-56_epoch_11','ILKT/2024-06-24_00-11-56_epoch_12','ILKT/2024-06-24_00-11-56_epoch_13','ILKT/2024-06-24_00-11-56_epoch_14','ILKT/2024-06-24_00-11-56_epoch_15','ILKT/2024-06-24_00-11-56_epoch_16','ILKT/2024-06-24_00-11-56_epoch_17','ILKT/2024-06-24_00-11-56_epoch_18','ILKT/2024-06-24_00-11-56_epoch_19','ILKT/2024-06-24_00-11-56_epoch_20','ILKT/2024-06-24_00-11-56_epoch_21','ILKT/2024-06-24_00-11-56_epoch_22','ILKT/2024-06-24_00-11-56_epoch_23','ILKT/2024-06-24_00-11-56_epoch_24','ILKT/2024-06-24_00-11-56_epoch_25','ILKT/2024-06-24_00-11-56_epoch_26','ILKT/2024-06-24_00-11-56_epoch_27','ILKT/2024-06-24_00-11-56_epoch_28','ILKT/2024-06-24_00-11-56_epoch_29','ILKT/2024-06-24_00-11-56_epoch_30','ILKT/2024-06-24_00-11-56_epoch_31','ILKT/2024-06-24_00-11-56_epoch_32','ILKT/2024-06-24_00-11-56_epoch_33','ILKT/2024-06-24_00-11-56_epoch_34','ILKT/2024-06-24_00-11-56_epoch_35','ILKT/2024-06-24_00-11-56_epoch_36','ILKT/2024-06-24_00-11-56_epoch_37','ILKT/2024-06-24_00-11-56_epoch_38','ILKT/2024-06-24_00-11-56_epoch_39','ILKT/2024-06-24_00-11-56_epoch_40','ILKT/2024-06-24_00-11-56_epoch_41','ILKT/2024-06-24_00-11-56_epoch_42','ILKT/2024-06-24_00-11-56_epoch_43','ILKT/2024-06-24_00-11-56_epoch_44','ILKT/2024-06-24_00-11-56_epoch_45','ILKT/2024-06-24_00-11-56_epoch_46','ILKT/2024-06-24_00-11-56_epoch_47','ILKT/2024-06-24_00-11-56_epoch_48','ILKT/2024-06-24_00-11-56_epoch_49','ILKT/2024-06-24_00-11-56_epoch_50','ILKT/2024-06-24_00-11-56_epoch_51','ILKT/2024-06-24_00-11-56_epoch_52','ILKT/2024-06-24_00-11-56_epoch_53','ILKT/2024-06-24_00-11-56_epoch_54','ILKT/2024-06-24_00-11-56_epoch_55','ILKT/2024-06-24_00-11-56_epoch_56','ILKT/2024-06-24_00-11-56_epoch_57','ILKT/2024-06-24_00-11-56_epoch_58','ILKT/2024-06-24_00-11-56_epoch_59','ILKT/2024-06-24_00-11-56_epoch_60','ILKT/2024-06-24_00-11-56_epoch_61','ILKT/2024-06-24_00-11-56_epoch_62','ILKT/2024-06-24_00-11-56_epoch_63','ILKT/2024-06-24_00-11-56_epoch_64','ILKT/2024-06-24_00-11-56_epoch_65','ILKT/2024-06-24_00-11-56_epoch_66','ILKT/2024-06-24_00-11-56_epoch_67','ILKT/2024-06-24_00-11-56_epoch_68','ILKT/2024-06-24_00-11-56_epoch_69','ILKT/2024-06-24_00-11-56_epoch_70','ILKT/2024-06-24_00-11-56_epoch_71','ILKT/2024-06-24_00-11-56_epoch_72','ILKT/2024-06-24_00-11-56_epoch_73','ILKT/2024-06-24_00-11-56_epoch_74','ILKT/2024-06-24_00-11-56_epoch_75','ILKT/2024-06-22_12-37-29_epoch_16','ILKT/2024-06-22_12-37-29_epoch_17','ILKT/2024-06-22_12-37-29_epoch_18','ILKT/2024-06-22_12-37-29_epoch_19','ILKT/2024-06-22_12-37-29_epoch_20','ILKT/2024-06-22_12-37-29_epoch_21','ILKT/2024-06-22_12-37-29_epoch_22','ILKT/2024-06-22_12-37-29_epoch_23','ILKT/2024-06-22_12-37-29_epoch_24','ILKT/2024-06-22_12-37-29_epoch_25','ILKT/2024-06-22_12-37-29_epoch_26','ILKT/2024-06-22_12-37-29_epoch_27','ILKT/2024-06-22_12-37-29_epoch_28','ILKT/2024-06-22_12-37-29_epoch_29','ILKT/2024-06-22_12-37-29_epoch_30','ILKT/2024-06-22_12-37-29_epoch_31','ILKT/2024-06-22_12-37-29_epoch_32','ILKT/2024-06-22_12-37-29_epoch_33','ILKT/2024-06-22_12-37-29_epoch_34','ILKT/2024-06-22_12-37-29_epoch_35','ILKT/2024-06-22_12-37-29_epoch_36','ILKT/2024-06-22_12-37-29_epoch_37','ILKT/2024-06-22_12-37-29_epoch_38','ILKT/2024-06-22_12-37-29_epoch_39','ILKT/2024-06-22_12-37-29_epoch_40','ILKT/2024-06-22_12-37-29_epoch_41','ILKT/2024-06-22_12-37-29_epoch_42','ILKT/2024-06-22_12-37-29_epoch_43','ILKT/2024-06-22_12-37-29_epoch_44','ILKT/2024-06-22_12-37-29_epoch_45','ILKT/2024-06-22_12-37-29_epoch_46','ILKT/2024-06-22_12-37-29_epoch_47','ILKT/2024-06-22_12-37-29_epoch_48','ILKT/2024-06-22_12-37-29_epoch_49','ILKT/2024-06-22_12-37-29_epoch_50','ILKT/2024-06-22_12-37-29_epoch_51','ILKT/2024-06-22_12-37-29_epoch_52','ILKT/2024-06-22_12-37-29_epoch_53','ILKT/2024-06-22_12-37-29_epoch_54','ILKT/2024-06-22_12-37-29_epoch_55','ILKT/2024-06-22_12-37-29_epoch_56','ILKT/2024-06-22_12-37-29_epoch_57','ILKT/2024-06-22_12-37-29_epoch_58','ILKT/2024-06-22_12-37-29_epoch_59','ILKT/2024-06-22_12-37-29_epoch_60','ILKT/2024-06-22_12-37-29_epoch_61','ILKT/2024-06-22_12-37-29_epoch_62','ILKT/2024-06-22_12-37-29_epoch_63','ILKT/2024-06-22_12-37-29_epoch_64','ILKT/2024-06-22_12-37-29_epoch_65','ILKT/2024-06-22_12-37-29_epoch_66','ILKT/2024-06-22_12-37-29_epoch_67','ILKT/2024-06-22_12-37-29_epoch_68','ILKT/2024-06-22_12-37-29_epoch_69','ILKT/2024-06-22_12-37-29_epoch_70','ILKT/2024-06-22_12-37-29_epoch_71','ILKT/2024-06-22_12-37-29_epoch_72','ILKT/2024-06-22_12-37-29_epoch_73','ILKT/2024-06-22_12-37-29_epoch_74','ILKT/2024-06-22_12-37-29_epoch_75','Lajavaness/bilingual-embedding-large-8k','Alibaba-NLP/gte-Qwen2-1.5B-instruct','Jaume/gemma-2b-embeddings','lier007/xiaobu-embedding-v2','chihlunLee/NoInstruct-small-Embedding-v0-Q4_0-GGUF','fine-tuned/jinaai_jina-embeddings-v2-base-es-472024-aqk1-webapp','second-state/gte-Qwen2-1.5B-instruct-GGUF','gaianet/gte-Qwen2-1.5B-instruct-GGUF','yco/bilingual-embedding-base','fine-tuned/jinaai_jina-embeddings-v2-base-en-05072024-aj6g-webapp','AbderrahmanSkiredj1/arabic_text_embedding_sts_arabertv02_arabicnlitriplet','AbderrahmanSkiredj1/Arabic_text_embedding_for_sts','dimcha/mxbai-embed-large-v1-Q4_K_M-GGUF','fine-tuned/BAAI_bge-m3-782024-wl54-webapp','nvidia/NV-Retriever-v1','fine-tuned/jinaai_jina-embeddings-v2-base-en-792024-tyen-webapp','fine-tuned/jinaai_jina-embeddings-v2-base-en-11072024-bh6v-webapp','archit28/bge-large-en-v1.5-Q4_K_S-GGUF','dunzhang/stella_en_1.5B_v5','dunzhang/stella_en_400M_v5','niancheng/gte-Qwen2-1.5B-instruct-Q4_K_M-GGUF','niancheng/gte-Qwen2-7B-instruct-Q4_K_M-GGUF','fine-tuned/jinaai_jina-embeddings-v2-base-en-15072024-5xy1-webapp','fine-tuned/BAAI_bge-small-en-v1_5-7152024-w1z0-webapp', 'Cohere/Cohere-embed-english-v3.0','Cohere/Cohere-embed-english-v3.0','Cohere/Cohere-embed-multilingual-light-v3.0','Cohere/Cohere-embed-multilingual-v3.0','vesteinn/DanskBERT','jhu-clsp/FollowIR-7B','GritLM/GritLM-7B','GritLM/GritLM-7B','McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-supervised','McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-unsup-simcse','McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-supervised','McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-unsup-simcse','McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised','McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-unsup-simcse','McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-supervised','McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-unsup-simcse','sentence-transformers/LaBSE','Linq-AI-Research/Linq-Embed-Mistral','nvidia/NV-Embed-v1','nvidia/NV-Retriever-v1','Salesforce/SFR-Embedding-Mistral','sentence-transformers/all-MiniLM-L12-v2','sentence-transformers/all-MiniLM-L12-v2','sentence-transformers/all-MiniLM-L6-v2','sentence-transformers/all-MiniLM-L6-v2','sentence-transformers/all-mpnet-base-v2','sentence-transformers/all-mpnet-base-v2','sentence-transformers/allenai-specter','Geotrend/bert-base-10lang-cased','Geotrend/bert-base-15lang-cased','Geotrend/bert-base-25lang-cased','google-bert/bert-base-multilingual-cased','google-bert/bert-base-multilingual-uncased','KB/bert-base-swedish-cased','bert-base-uncased','BAAI/bge-base-en-v1.5','BAAI/bge-base-en-v1.5','BAAI/bge-base-zh-v1.5','BAAI/bge-large-en-v1.5','BAAI/bge-large-en-v1.5','BAAI/bge-large-zh-noinstruct','BAAI/bge-large-zh-v1.5','BAAI/bge-m3','BAAI/bge-m3','BAAI/bge-small-en-v1.5','BAAI/bge-small-en-v1.5','BAAI/bge-small-zh-v1.5','almanach/camembert-base','almanach/camembert-large','nthakur/contriever-base-msmarco','facebook/contriever','facebook/contriever','T-Systems-onsite/cross-en-de-roberta-sentence-transformer','chcaa/dfm-encoder-large-v1','chcaa/dfm-encoder-large-v1','Geotrend/distilbert-base-25lang-cased','Geotrend/distilbert-base-en-fr-cased','Geotrend/distilbert-base-en-fr-es-pt-it-cased','Geotrend/distilbert-base-fr-cased','distilbert-base-uncased','sentence-transformers/distiluse-base-multilingual-cased-v2','dwzhu/e5-base-4k','intfloat/e5-base-v2','intfloat/e5-base','intfloat/e5-large-v2','intfloat/e5-large','intfloat/e5-mistral-7b-instruct','intfloat/e5-mistral-7b-instruct-noinstruct','intfloat/e5-small','jonfd/electra-small-nordic','KBLab/electra-small-swedish-cased-discriminator','google/flan-t5-base','google/flan-t5-large','flaubert/flaubert_base_cased','flaubert/flaubert_base_uncased','flaubert/flaubert_large_cased','deepset/gbert-base','deepset/gbert-large','deepset/gelectra-base','deepset/gelectra-large','sentence-transformers/average_word_embeddings_glove.6B.300d','uklfr/gottbert-base','Alibaba-NLP/gte-Qwen1.5-7B-instruct','Alibaba-NLP/gte-Qwen2-7B-instruct','sentence-transformers/gtr-t5-base','sentence-transformers/gtr-t5-large','sentence-transformers/gtr-t5-xl','sentence-transformers/gtr-t5-xxl','ipipan/herbert-base-retrieval-v2','hkunlp/instructor-base','hkunlp/instructor-large','hkunlp/instructor-xl','jinaai/jina-embeddings-v2-base-en','sentence-transformers/average_word_embeddings_komninos','meta-llama/Llama-2-7b-chat-hf','silk-road/luotuo-bert-medium','moka-ai/m3e-base','moka-ai/m3e-large','mistralai/Mistral-7B-Instruct-v0.2','castorini/monobert-large-msmarco','castorini/monot5-3b-msmarco-10k','castorini/monot5-base-msmarco-10k','sentence-transformers/msmarco-bert-co-condensor','sentence-transformers/multi-qa-MiniLM-L6-cos-v1','intfloat/multilingual-e5-base','intfloat/multilingual-e5-large','intfloat/multilingual-e5-small','NbAiLab/nb-bert-base','NbAiLab/nb-bert-large','nomic-ai/nomic-embed-text-v1','nomic-ai/nomic-embed-text-v1.5','nomic-ai/nomic-embed-text-v1.5','nomic-ai/nomic-embed-text-v1.5','nomic-ai/nomic-embed-text-v1.5','ltg/norbert3-base','ltg/norbert3-large','sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2','sentence-transformers/paraphrase-multilingual-mpnet-base-v2','KBLab/sentence-bert-swedish-cased','dangvantuan/sentence-camembert-base','dangvantuan/sentence-camembert-large','Wissam42/sentence-croissant-llm-base','sentence-transformers/sentence-t5-base','sentence-transformers/sentence-t5-large','sentence-transformers/sentence-t5-xl','sentence-transformers/sentence-t5-xxl','ipipan/silver-retriever-base-v1','sdadas/st-polish-paraphrase-from-distilroberta','sdadas/st-polish-paraphrase-from-mpnet','princeton-nlp/sup-simcse-bert-base-uncased','orionweller/tart-dual-contriever-msmarco','facebook/tart-full-flan-t5-xl','shibing624/text2vec-base-chinese','GanymedeNil/text2vec-large-chinese','izhx/udever-bloom-1b1','izhx/udever-bloom-560m','vprelovac/universal-sentence-encoder-multilingual-3','vprelovac/universal-sentence-encoder-multilingual-large-3','princeton-nlp/unsup-simcse-bert-base-uncased','sentence-transformers/use-cmlm-multilingual','xlm-roberta-base','xlm-roberta-large'] - # Possible changes: # Could add graphs / other visual content # Could add verification marks