long-code-arena / src /leaderboard_formatting.py
jenyag's picture
Update src/leaderboard_formatting.py
7961458 verified
raw
history blame
No virus
1.98 kB
from typing import List
COLUMNS_PRETTY = {
"bleu": "BLEU",
"chrf": "ChrF",
"rouge1": "ROUGE-1",
"rouge2": "ROUGE-2",
"rougeL": "ROUGE-L",
"bertscore": "BERTScore",
"bertscore_normalized": "BERTScore (Normalized)",
"model_name": "Model Name",
"model_availability": "Availability",
"urls": "Resources",
"context_size": "Context Size",
"submitted_by": "Submitted By",
"EM infile": "EM infile",
"EM inproject": "EM inproject",
"EM common": "EM common",
"EM commited": "EM committed",
"EM non_informative": "EM non-informative",
"EM random": "EM random",
"EM all": "EM all",
"dataset": "Dataset",
}
METRICS_PER_TASK = {
"commit_message_generation": [
"BLEU",
"ChrF",
"ROUGE-1",
"ROUGE-2",
"ROUGE-L",
"BERTScore",
"BERTScore (Normalized)",
],
"project_code_completion": [
"EM infile",
"EM inproject",
"EM common",
"EM committed",
"EM non-informative",
"EM random",
"EM all",
]
}
SORT_COLUMN_PER_TASK = {
"commit_message_generation": "ROUGE-1",
"project_code_completion": "Dataset"
}
def get_columns_per_task(task_id: str) -> List[str]:
metrics_per_task = METRICS_PER_TASK[task_id]
if task_id == 'project_code_completion':
return ["Model Name", "Availability", "Context Size", "Dataset"] + metrics_per_task + ["Submitted By", "Resources"]
return ["Model Name", "Context Size"] + metrics_per_task + ["Availability", "Submitted By", "Resources"]
def get_types_per_task(task_id: str) -> List[str]:
metrics_per_task = METRICS_PER_TASK.get(task_id, (0, 0, 0, 0, 0))
if task_id == 'project_code_completion':
return ["html", "markdown", "markdown", "html"] + ["number" for _ in metrics_per_task] + ["markdown", "html"]
return ["html", "markdown", "markdown"] + ["number" for _ in metrics_per_task] + ["markdown", "html"]