File size: 3,486 Bytes
9203553 04f40cd 9203553 cdf268e 6c92442 9203553 2d26479 6c92442 04f40cd e2473e2 2d26479 dc0e67a 2d26479 9ea9349 9203553 a9273cf 9203553 e2473e2 04f40cd e2473e2 04f40cd e2473e2 9203553 04f40cd 9203553 04f40cd 9203553 6c92442 aa8b23d 6c92442 aa8b23d dc801c4 6c92442 dc801c4 6c92442 aa8b23d 2d26479 fcf16bd 04f40cd 9203553 2d0af54 9203553 2d0af54 9203553 2d0af54 9203553 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
import logging
import os
import re
import pandas as pd # type: ignore[import]
from datasets import get_dataset_config_names, load_dataset # type: ignore[import]
from .formatting import model_hyperlink
from .leaderboard_formatting import (
COLUMNS_PRETTY,
METRICS_PER_TASK,
SORT_COLUMN_PER_TASK,
get_columns_per_task,
)
from .tasks_content import TASKS_PRETTY_REVERSE
from .utils import MD_LINK_PATTERN
try:
AVAILABLE_TASKS = get_dataset_config_names(os.environ["DATASET_ID"])
except FileNotFoundError as e:
AVAILABLE_TASKS = []
logging.warning("Dataset is not available! Check if token is expired.")
def _get_results_stub() -> pd.DataFrame:
stub_df = pd.DataFrame(
[
{
"Model Name": "GPT-4",
"Availability": "Proprietary",
"Context Size": "16k",
"BLEU": "X",
"ROUGE": "X",
"ChrF": "X",
"BERTScore": "X",
"BERTScore (Normalized)": "X",
"Submitted By": "π Long Code Arena Team",
"Resources": "",
},
{
"Model Name": "CodeLlama-7b (instruct)",
"Availability": "Llama 2 license",
"Context Size": "16k",
"BLEU": "X",
"ROUGE": "X",
"ChrF": "X",
"BERTScore": "X",
"BERTScore (Normalized)": "X",
"Submitted By": "π Long Code Arena Team",
"Resources": "",
},
]
)
return stub_df
def _process_urls(raw_urls: str) -> str:
if not raw_urls:
return raw_urls
html_urls = [model_hyperlink(*re.search(MD_LINK_PATTERN, url.strip()).groups()) for url in raw_urls.split(",")]
return ", ".join(html_urls)
def _get_results_dataset(task_id: str) -> pd.DataFrame:
results_df = load_dataset(
os.environ["DATASET_ID"], task_id, split="test", download_mode="force_redownload"
).to_pandas()
results_df = results_df.rename(columns=COLUMNS_PRETTY, errors="ignore")
results_df["Context Size"] = results_df["Context Size"].map(lambda x: f"{int(x) // 1000}k" if int(x) >= 1000 else x)
results_df = results_df.sort_values(by=SORT_COLUMN_PER_TASK[task_id], ascending=False)
for metric_column in METRICS_PER_TASK[task_id]:
if "BERTScore" in metric_column:
results_df[metric_column] = results_df[metric_column].map(lambda x: f"{x:.5f}")
else:
results_df[metric_column] = results_df[metric_column].map(lambda x: f"{x:.2f}")
results_df["Model Name"] = [
model_hyperlink(link=link, model_name=model_name) if link else model_name
for link, model_name in zip(results_df["model_url"], results_df["Model Name"])
]
if task_id == 'project_code_completion':
results_df["Dataset"] = [_process_urls(urls) for urls in results_df["Dataset"]]
results_df["Resources"] = [_process_urls(urls) for urls in results_df["Resources"]]
results_df = results_df[get_columns_per_task(task_id)]
return results_df
def get_results_for_task(task_pretty: str) -> pd.DataFrame:
task_id = TASKS_PRETTY_REVERSE[task_pretty]
if task_id in AVAILABLE_TASKS:
logging.info(f"Retrieving results for {task_pretty}...")
return _get_results_dataset(task_id)
logging.info(f"Generating leaderboard stub for {task_pretty}...")
return _get_results_stub()
|