vidore-leaderboard / data /dataset_handler.py
QuentinJG's picture
new_benchmark_2 (#7)
ebac224 verified
VIDORE_DATASETS_KEYWORDS = [
"arxivqa",
"docvqa",
"infovqa",
"tabfquad",
"tatdqa",
"shift",
"artificial_intelligence",
"energy",
"government_reports",
"healthcare_industry",
]
VIDORE_2_DATASETS_KEYWORDS = [
"restaurant_esg",
"rse_restaurant",
"axa",
"mit_biomedical",
"economics_macro",
]
def get_datasets_nickname(dataset_name) -> str:
if "arxivqa" in dataset_name:
return "ArxivQA"
elif "docvqa" in dataset_name:
return "DocVQA"
elif "infovqa" in dataset_name:
return "InfoVQA"
elif "tabfquad" in dataset_name:
return "TabFQuad"
elif "tatdqa" in dataset_name:
return "TAT-DQA"
elif "shift" in dataset_name:
return "Shift Project"
elif "artificial_intelligence" in dataset_name:
return "Artificial Intelligence"
elif "energy" in dataset_name:
return "Energy"
elif "government_reports" in dataset_name:
return "Government Reports"
elif "healthcare_industry" in dataset_name:
return "Healthcare Industry"
elif "restaurant_esg" in dataset_name:
return "ESG Restaurant Human"
elif "rse_restaurant" in dataset_name and "multilingual" in dataset_name:
return "ESG Restaurant Synthetic Multilingual"
elif "rse_restaurant" in dataset_name:
return "ESG Restaurant Synthetic"
elif "axa" in dataset_name and "multilingual" in dataset_name:
return "AXA Multilingual"
elif "axa" in dataset_name:
return "AXA"
elif "mit_biomedical" in dataset_name and "multilingual" in dataset_name:
return "MIT Biomedical Multilingual"
elif "mit_biomedical" in dataset_name:
return "MIT Biomedical"
elif "economics_macro" in dataset_name and "multilingual" in dataset_name:
return "Economics Macro Multilingual"
elif "economics_macro" in dataset_name:
return "Economics Macro"
else:
raise ValueError(f"Dataset {dataset_name} not found in ViDoRe")