|
from typing import Dict
|
|
|
|
LONG_TIME_TASK_NAMES = [
|
|
"MSMARCO",
|
|
"FEVER",
|
|
"HotpotQA",
|
|
"ClimateFEVER",
|
|
"DBPedia",
|
|
"NQ",
|
|
"ArxivClusteringP2P",
|
|
"ArxivClusteringS2S",
|
|
"RedditClusteringP2P",
|
|
"RedditClustering",
|
|
"QuoraRetrieval",
|
|
"StackExchangeClustering",
|
|
"Touche2020",
|
|
"MindSmallReranking",
|
|
"AmazonPolarityClassification",
|
|
"BiorxivClusteringP2P",
|
|
"StackExchangeClusteringP2P",
|
|
"TRECCOVID"
|
|
]
|
|
|
|
SHORT_TIME_TASK_NAMES = [
|
|
"BIOSSES",
|
|
"STS17",
|
|
"STS16",
|
|
"AskUbuntuDupQuestions",
|
|
"SummEval",
|
|
"SciFact",
|
|
"TweetSentimentExtractionClassification",
|
|
"EmotionClassification",
|
|
"SprintDuplicateQuestions"
|
|
]
|
|
MID_TIME_TASK_NAMES = ['BIOSSES', 'STS17', 'STS22', 'STS16', 'STSBenchmark', 'STS13', 'STS15', 'STS12', 'STS14',
|
|
'AskUbuntuDupQuestions', 'TwitterSemEval2015', 'SummEval', 'SICK-R', 'NFCorpus', 'SciFact',
|
|
'CQADupstackWebmastersRetrieval', 'TwitterURLCorpus', 'SprintDuplicateQuestions',
|
|
'CQADupstackAndroidRetrieval', 'CQADupstackMathematicaRetrieval', 'ArguAna',
|
|
'CQADupstackProgrammersRetrieval', 'SCIDOCS', 'StackOverflowDupQuestions',
|
|
'EmotionClassification', 'TweetSentimentExtractionClassification', 'CQADupstackStatsRetrieval',
|
|
'CQADupstackGisRetrieval', 'CQADupstackWordpressRetrieval', 'CQADupstackEnglishRetrieval',
|
|
'CQADupstackPhysicsRetrieval', 'CQADupstackGamingRetrieval', 'SciDocsRR', 'FiQA2018',
|
|
'CQADupstackUnixRetrieval', 'ToxicConversationsClassification', 'Banking77Classification',
|
|
'TwentyNewsgroupsClustering', 'MedrxivClusteringS2S', 'ImdbClassification',
|
|
'MTOPDomainClassification', 'BiorxivClusteringS2S', 'AmazonCounterfactualClassification',
|
|
'MassiveScenarioClassification', 'MedrxivClusteringP2P', 'MTOPIntentClassification',
|
|
'MassiveIntentClassification', 'CQADupstackTexRetrieval', 'AmazonReviewsClassification',
|
|
'TRECCOVID', 'BiorxivClusteringP2P', 'StackExchangeClusteringP2P', 'StackExchangeClustering']
|
|
|
|
CMTEB_TASK_LIST = ['TNews', 'IFlyTek', 'MultilingualSentiment', 'JDReview', 'OnlineShopping', 'Waimai',
|
|
'AmazonReviewsClassification', 'MassiveIntentClassification', 'MassiveScenarioClassification',
|
|
'MultilingualSentiment',
|
|
'CLSClusteringS2S', 'CLSClusteringP2P', 'ThuNewsClusteringS2S', 'ThuNewsClusteringP2P',
|
|
'Ocnli', 'Cmnli',
|
|
'T2Reranking', 'MmarcoReranking', 'CMedQAv1', 'CMedQAv2',
|
|
'T2Retrieval', 'MMarcoRetrieval', 'DuRetrieval', 'CovidRetrieval', 'CmedqaRetrieval',
|
|
'EcomRetrieval', 'MedicalRetrieval', 'VideoRetrieval',
|
|
'ATEC', 'BQ', 'LCQMC', 'PAWSX', 'STSB', 'AFQMC', 'QBQTC', 'STS22']
|
|
|
|
TASK_LIST_CLASSIFICATION = [
|
|
"AmazonCounterfactualClassification",
|
|
"AmazonPolarityClassification",
|
|
"AmazonReviewsClassification",
|
|
"Banking77Classification",
|
|
"EmotionClassification",
|
|
"ImdbClassification",
|
|
"MassiveIntentClassification",
|
|
"MassiveScenarioClassification",
|
|
"MTOPDomainClassification",
|
|
"MTOPIntentClassification",
|
|
"ToxicConversationsClassification",
|
|
"TweetSentimentExtractionClassification",
|
|
]
|
|
|
|
TASK_LIST_CLUSTERING = [
|
|
"ArxivClusteringP2P",
|
|
"ArxivClusteringS2S",
|
|
"BiorxivClusteringP2P",
|
|
"BiorxivClusteringS2S",
|
|
"MedrxivClusteringP2P",
|
|
"MedrxivClusteringS2S",
|
|
"RedditClustering",
|
|
"RedditClusteringP2P",
|
|
"StackExchangeClustering",
|
|
"StackExchangeClusteringP2P",
|
|
"TwentyNewsgroupsClustering",
|
|
]
|
|
|
|
TASK_LIST_PAIR_CLASSIFICATION = [
|
|
"SprintDuplicateQuestions",
|
|
"TwitterSemEval2015",
|
|
"TwitterURLCorpus",
|
|
]
|
|
|
|
TASK_LIST_RERANKING = [
|
|
"AskUbuntuDupQuestions",
|
|
"MindSmallReranking",
|
|
"SciDocsRR",
|
|
"StackOverflowDupQuestions",
|
|
]
|
|
|
|
TASK_LIST_RETRIEVAL = [
|
|
"ArguAna",
|
|
"CQADupstackAndroidRetrieval",
|
|
"CQADupstackEnglishRetrieval",
|
|
"CQADupstackGamingRetrieval",
|
|
"CQADupstackGisRetrieval",
|
|
"CQADupstackMathematicaRetrieval",
|
|
"CQADupstackPhysicsRetrieval",
|
|
"CQADupstackProgrammersRetrieval",
|
|
"CQADupstackStatsRetrieval",
|
|
"CQADupstackTexRetrieval",
|
|
"CQADupstackUnixRetrieval",
|
|
"CQADupstackWebmastersRetrieval",
|
|
"CQADupstackWordpressRetrieval",
|
|
"DBPedia",
|
|
"FEVER",
|
|
"FiQA2018",
|
|
"NFCorpus",
|
|
"NQ",
|
|
"QuoraRetrieval",
|
|
"SCIDOCS",
|
|
"SciFact",
|
|
"Touche2020",
|
|
"TRECCOVID",
|
|
"ClimateFEVER",
|
|
"HotpotQA",
|
|
"MSMARCO",
|
|
]
|
|
|
|
TASK_LIST_STS = [
|
|
"BIOSSES",
|
|
"SICK-R",
|
|
"STS12",
|
|
"STS13",
|
|
"STS14",
|
|
"STS15",
|
|
"STS16",
|
|
"STS17",
|
|
"STS22",
|
|
"STSBenchmark",
|
|
"SummEval",
|
|
]
|
|
|
|
MTEB_TASK_LIST = (
|
|
TASK_LIST_CLASSIFICATION
|
|
+ TASK_LIST_CLUSTERING
|
|
+ TASK_LIST_PAIR_CLASSIFICATION
|
|
+ TASK_LIST_RERANKING
|
|
+ TASK_LIST_STS
|
|
+ TASK_LIST_RETRIEVAL
|
|
)
|
|
|
|
|
|
def get_task_type_en(task_name: str):
|
|
if task_name == "SummEval":
|
|
return "Summarization"
|
|
if task_name in TASK_LIST_CLASSIFICATION:
|
|
return "Classification"
|
|
if task_name in TASK_LIST_CLUSTERING:
|
|
return "Clustering"
|
|
if task_name in TASK_LIST_PAIR_CLASSIFICATION:
|
|
return "PairClassification"
|
|
if task_name in TASK_LIST_RERANKING:
|
|
return "Reranking"
|
|
if task_name in TASK_LIST_STS:
|
|
return "STS"
|
|
if task_name in TASK_LIST_RETRIEVAL:
|
|
return "Retrieval"
|
|
raise ValueError(f"unknown task name:{task_name}")
|
|
|
|
|
|
def get_task_def_by_task_name_and_type(task_name: str, task_type: str) -> str:
|
|
if task_type in ['STS']:
|
|
return "Retrieve semantically similar text."
|
|
|
|
if task_type in ['Summarization']:
|
|
return "Given a news summary, retrieve other semantically similar summaries"
|
|
|
|
if task_type in ['BitextMining']:
|
|
return "Retrieve parallel sentences."
|
|
|
|
if task_type in ['Classification']:
|
|
task_name_to_instruct: Dict[str, str] = {
|
|
'AmazonCounterfactualClassification': 'Classify a given Amazon customer review text as either counterfactual or not-counterfactual',
|
|
'AmazonPolarityClassification': 'Classify Amazon reviews into positive or negative sentiment',
|
|
'AmazonReviewsClassification': 'Classify the given Amazon review into its appropriate rating category',
|
|
'Banking77Classification': 'Given a online banking query, find the corresponding intents',
|
|
'EmotionClassification': 'Classify the emotion expressed in the given Twitter message into one of the six emotions: anger, fear, joy, love, sadness, and surprise',
|
|
'ImdbClassification': 'Classify the sentiment expressed in the given movie review text from the IMDB dataset',
|
|
'MassiveIntentClassification': 'Given a user utterance as query, find the user intents',
|
|
'MassiveScenarioClassification': 'Given a user utterance as query, find the user scenarios',
|
|
'MTOPDomainClassification': 'Classify the intent domain of the given utterance in task-oriented conversation',
|
|
'MTOPIntentClassification': 'Classify the intent of the given utterance in task-oriented conversation',
|
|
'ToxicConversationsClassification': 'Classify the given comments as either toxic or not toxic',
|
|
'TweetSentimentExtractionClassification': 'Classify the sentiment of a given tweet as either positive, negative, or neutral',
|
|
|
|
'TNews': 'Classify the fine-grained category of the given news title',
|
|
'IFlyTek': 'Given an App description text, find the appropriate fine-grained category',
|
|
'MultilingualSentiment': 'Classify sentiment of the customer review into positive, neutral, or negative',
|
|
'JDReview': 'Classify the customer review for iPhone on e-commerce platform into positive or negative',
|
|
'OnlineShopping': 'Classify the customer review for online shopping into positive or negative',
|
|
'Waimai': 'Classify the customer review from a food takeaway platform into positive or negative',
|
|
}
|
|
return task_name_to_instruct[task_name]
|
|
|
|
if task_type in ['Clustering']:
|
|
task_name_to_instruct: Dict[str, str] = {
|
|
'ArxivClusteringP2P': 'Identify the main and secondary category of Arxiv papers based on the titles and abstracts',
|
|
'ArxivClusteringS2S': 'Identify the main and secondary category of Arxiv papers based on the titles',
|
|
'BiorxivClusteringP2P': 'Identify the main category of Biorxiv papers based on the titles and abstracts',
|
|
'BiorxivClusteringS2S': 'Identify the main category of Biorxiv papers based on the titles',
|
|
'MedrxivClusteringP2P': 'Identify the main category of Medrxiv papers based on the titles and abstracts',
|
|
'MedrxivClusteringS2S': 'Identify the main category of Medrxiv papers based on the titles',
|
|
'RedditClustering': 'Identify the topic or theme of Reddit posts based on the titles',
|
|
'RedditClusteringP2P': 'Identify the topic or theme of Reddit posts based on the titles and posts',
|
|
'StackExchangeClustering': 'Identify the topic or theme of StackExchange posts based on the titles',
|
|
'StackExchangeClusteringP2P': 'Identify the topic or theme of StackExchange posts based on the given paragraphs',
|
|
'TwentyNewsgroupsClustering': 'Identify the topic or theme of the given news articles',
|
|
|
|
'CLSClusteringS2S': 'Identify the main category of scholar papers based on the titles',
|
|
'CLSClusteringP2P': 'Identify the main category of scholar papers based on the titles and abstracts',
|
|
'ThuNewsClusteringS2S': 'Identify the topic or theme of the given news articles based on the titles',
|
|
'ThuNewsClusteringP2P': 'Identify the topic or theme of the given news articles based on the titles and contents',
|
|
}
|
|
return task_name_to_instruct[task_name]
|
|
|
|
if task_type in ['Reranking', 'PairClassification']:
|
|
task_name_to_instruct: Dict[str, str] = {
|
|
'AskUbuntuDupQuestions': 'Retrieve duplicate questions from AskUbuntu forum',
|
|
'MindSmallReranking': 'Retrieve relevant news articles based on user browsing history',
|
|
'SciDocsRR': 'Given a title of a scientific paper, retrieve the titles of other relevant papers',
|
|
'StackOverflowDupQuestions': 'Retrieve duplicate questions from StackOverflow forum',
|
|
'SprintDuplicateQuestions': 'Retrieve duplicate questions from Sprint forum',
|
|
'TwitterSemEval2015': 'Retrieve tweets that are semantically similar to the given tweet',
|
|
'TwitterURLCorpus': 'Retrieve tweets that are semantically similar to the given tweet',
|
|
|
|
'T2Reranking': 'Given a Chinese search query, retrieve web passages that answer the question',
|
|
'MMarcoReranking': 'Given a Chinese search query, retrieve web passages that answer the question',
|
|
'CMedQAv1': 'Given a Chinese community medical question, retrieve replies that best answer the question',
|
|
'CMedQAv2': 'Given a Chinese community medical question, retrieve replies that best answer the question',
|
|
'Ocnli': 'Retrieve semantically similar text.',
|
|
'Cmnli': 'Retrieve semantically similar text.',
|
|
}
|
|
return task_name_to_instruct[task_name]
|
|
|
|
if task_type in ['Retrieval']:
|
|
if task_name.lower().startswith('cqadupstack'):
|
|
return 'Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question'
|
|
|
|
task_name_to_instruct: Dict[str, str] = {
|
|
'ArguAna': 'Given a claim, find documents that refute the claim',
|
|
'ClimateFEVER': 'Given a claim about climate change, retrieve documents that support or refute the claim',
|
|
'DBPedia': 'Given a query, retrieve relevant entity descriptions from DBPedia',
|
|
'FEVER': 'Given a claim, retrieve documents that support or refute the claim',
|
|
'FiQA2018': 'Given a financial question, retrieve user replies that best answer the question',
|
|
'HotpotQA': 'Given a multi-hop question, retrieve documents that can help answer the question',
|
|
'MSMARCO': 'Given a web search query, retrieve relevant passages that answer the query.',
|
|
'NFCorpus': 'Given a question, retrieve relevant documents that best answer the question',
|
|
'NQ': 'Given a question, retrieve Wikipedia passages that answer the question',
|
|
'QuoraRetrieval': 'Given a question, retrieve questions that are semantically equivalent to the given question',
|
|
'SCIDOCS': 'Given a scientific paper title, retrieve paper abstracts that are cited by the given paper',
|
|
'SciFact': 'Given a scientific claim, retrieve documents that support or refute the claim',
|
|
'Touche2020': 'Given a question, retrieve detailed and persuasive arguments that answer the question',
|
|
'TRECCOVID': 'Given a query on COVID-19, retrieve documents that answer the query',
|
|
|
|
'T2Retrieval': 'Given a Chinese search query, retrieve web passages that answer the question',
|
|
'MMarcoRetrieval': 'Given a web search query, retrieve relevant passages that answer the query',
|
|
'DuRetrieval': 'Given a Chinese search query, retrieve web passages that answer the question',
|
|
'CovidRetrieval': 'Given a question on COVID-19, retrieve news articles that answer the question',
|
|
'CmedqaRetrieval': 'Given a Chinese community medical question, retrieve replies that best answer the question',
|
|
'EcomRetrieval': 'Given a user query from an e-commerce website, retrieve description sentences of relevant products',
|
|
'MedicalRetrieval': 'Given a medical question, retrieve user replies that best answer the question',
|
|
'VideoRetrieval': 'Given a video search query, retrieve the titles of relevant videos',
|
|
}
|
|
|
|
|
|
task_name_to_instruct.update({k.lower(): v for k, v in task_name_to_instruct.items()})
|
|
|
|
task_name_to_instruct['trec-covid'] = task_name_to_instruct['TRECCOVID']
|
|
task_name_to_instruct['climate-fever'] = task_name_to_instruct['ClimateFEVER']
|
|
task_name_to_instruct['dbpedia-entity'] = task_name_to_instruct['DBPedia']
|
|
task_name_to_instruct['webis-touche2020'] = task_name_to_instruct['Touche2020']
|
|
task_name_to_instruct['fiqa'] = task_name_to_instruct['FiQA2018']
|
|
task_name_to_instruct['quora'] = task_name_to_instruct['QuoraRetrieval']
|
|
|
|
|
|
task_name_to_instruct['miracl'] = 'Given a question, retrieve Wikipedia passages that answer the question'
|
|
|
|
return task_name_to_instruct[task_name]
|
|
|
|
raise ValueError(f"No instruction config for task {task_name} with type {task_type}")
|
|
|
|
|
|
def get_detailed_instruct(task_description: str) -> str:
|
|
if not task_description:
|
|
return ''
|
|
|
|
return 'Instruct: {}\nQuery: '.format(task_description)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
print(len(MTEB_TASK_LIST))
|
|
|