leaderboard / src /populate.py
Minseok Bae
Integrated backend pipelines - error occurs during model submission. (Debugging needed).
58b9de9
raw
history blame
No virus
2.47 kB
import json
import os
import pandas as pd
import src.display.formatting as formatting
import src.display.utils as utils
import src.leaderboard.read_evals as read_evals
def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
raw_data = read_evals.get_raw_eval_results(results_path, requests_path)
all_data_json = [v.to_dict() for v in raw_data]
df = pd.DataFrame.from_records(all_data_json)
df = df.sort_values(by=[utils.AutoEvalColumn.accuracy.name], ascending=False)
df = df[cols].round(decimals=2)
# filter out if any of the benchmarks have not been produced
df = df[formatting.has_no_nan_values(df, benchmark_cols)]
return raw_data, df
def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
all_evals = []
for entry in entries:
if ".json" in entry:
file_path = os.path.join(save_path, entry)
with open(file_path) as fp:
data = json.load(fp)
data[utils.EvalQueueColumn.model.name] = formatting.make_clickable_model(data["model"])
data[utils.EvalQueueColumn.revision.name] = data.get("revision", "main")
all_evals.append(data)
elif ".md" not in entry:
# this is a folder
sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if not e.startswith(".")]
for sub_entry in sub_entries:
file_path = os.path.join(save_path, entry, sub_entry)
with open(file_path) as fp:
data = json.load(fp)
data[utils.EvalQueueColumn.model.name] = formatting.make_clickable_model(data["model"])
data[utils.EvalQueueColumn.revision.name] = data.get("revision", "main")
all_evals.append(data)
pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
running_list = [e for e in all_evals if e["status"] == "RUNNING"]
finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"]
df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
df_running = pd.DataFrame.from_records(running_list, columns=cols)
df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
return df_finished[cols], df_running[cols], df_pending[cols]