File size: 2,481 Bytes
9203553 cdf268e 6c92442 9203553 6c92442 e2473e2 9203553 e2473e2 9203553 e2473e2 9203553 6c92442 9203553 6c92442 aa8b23d 6c92442 aa8b23d dc801c4 6c92442 dc801c4 6c92442 aa8b23d 9203553 2d0af54 9203553 2d0af54 9203553 2d0af54 9203553 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
import logging
import os
import pandas as pd # type: ignore[import]
from datasets import get_dataset_config_names, load_dataset # type: ignore[import]
from .leaderboard_formatting import (
COLUMNS_PRETTY,
METRICS_PER_TASK,
SORT_COLUMN_PER_TASK,
get_columns_per_task,
)
from .tasks_content import TASKS_PRETTY_REVERSE
AVAILABLE_TASKS = get_dataset_config_names(os.environ["DATASET_ID"])
def _get_results_stub() -> pd.DataFrame:
stub_df = pd.DataFrame(
[
{
"Model Name": "GPT-4",
"Availability": "Proprietary",
"Context Size": "16k",
"BLEU": "X",
"ROUGE": "X",
"ChrF": "X",
"BERTScore": "X",
"BERTScore (Normalized)": "X",
"Submitted By": "π Long Code Arena Team",
},
{
"Model Name": "CodeLlama-7b (instruct)",
"Availability": "Llama 2 license",
"Context Size": "16k",
"BLEU": "X",
"ROUGE": "X",
"ChrF": "X",
"BERTScore": "X",
"BERTScore (Normalized)": "X",
"Submitted By": "π Long Code Arena Team",
},
]
)
return stub_df
def _get_results_dataset(task_id: str) -> pd.DataFrame:
results_df = load_dataset(os.environ["DATASET_ID"], task_id, split="test").to_pandas()
results_df = results_df.rename(columns=COLUMNS_PRETTY, errors="ignore")
results_df["Context Size"] = results_df["Context Size"].map(lambda x: f"{int(x) // 1000}k" if int(x) >= 1000 else x)
results_df = results_df.sort_values(by=SORT_COLUMN_PER_TASK[task_id], ascending=False)
for metric_column in METRICS_PER_TASK[task_id]:
if "BERTScore" in metric_column:
results_df[metric_column] = results_df[metric_column].map(lambda x: f"{x:.5f}")
else:
results_df[metric_column] = results_df[metric_column].map(lambda x: f"{x:.2f}")
results_df = results_df[get_columns_per_task(task_id)]
return results_df
def get_results_for_task(task_pretty: str) -> pd.DataFrame:
task_id = TASKS_PRETTY_REVERSE[task_pretty]
if task_id in AVAILABLE_TASKS:
logging.info(f"Retrieving results for {task_pretty}...")
return _get_results_dataset(task_id)
logging.info(f"Generating leaderboard stub for {task_pretty}...")
return _get_results_stub()
|