Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 3,752 Bytes
8c49cb6 8aaf0e7 8c49cb6 439afd4 1dbfacb 9839977 b1a1395 8c49cb6 8aaf0e7 0c7ef71 b1a1395 1dbfacb 9839977 8c49cb6 b1a1395 8c49cb6 ec3a730 8c49cb6 8aaf0e7 b1a1395 8c49cb6 8aaf0e7 8c49cb6 439afd4 8c49cb6 439afd4 8c49cb6 8aaf0e7 8c49cb6 8aaf0e7 ebb5810 8aaf0e7 8c49cb6 ebb5810 8aaf0e7 ebb5810 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
import json
import os
import copy
import pandas as pd
from src.display.formatting import has_no_nan_values, make_requests_clickable_model
from src.display.utils import AutoEvalColumn, EvalQueueColumn, baseline_row, proprietary_rows
from src.leaderboard.filter_models import filter_models_flags
from src.leaderboard.read_evals import get_raw_eval_results
def get_leaderboard_df(results_path: str, requests_path: str, dynamic_path: str, cols: list, benchmark_cols: list, show_incomplete=False) -> pd.DataFrame:
raw_data = get_raw_eval_results(results_path=results_path, requests_path=requests_path, dynamic_path=dynamic_path)
all_data_json = [v.to_dict() for v in raw_data]
all_data_json.append(baseline_row)
for proprietary_row in proprietary_rows:
all_data_json.append(proprietary_row)
filter_models_flags(all_data_json)
df = pd.DataFrame.from_records(all_data_json)
df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
df = df[cols].round(decimals=2)
# filter out if any of the benchmarks have not been produced
if not show_incomplete:
df = df[has_no_nan_values(df, benchmark_cols)]
return raw_data, df
def get_evaluation_queue_df(save_path: str, cols: list, show_incomplete=False) -> list[pd.DataFrame]:
entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
all_evals = []
for entry in entries:
if ".json" in entry:
file_path = os.path.join(save_path, entry)
with open(file_path) as fp:
data = json.load(fp)
data[EvalQueueColumn.model.name] = make_requests_clickable_model(data["model"], entry)
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
all_evals.append(data)
elif ".md" not in entry:
# this is a folder
sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if not e.startswith(".")]
for sub_entry in sub_entries:
file_path = os.path.join(save_path, entry, sub_entry)
with open(file_path) as fp:
data = json.load(fp)
data[EvalQueueColumn.model.name] = make_requests_clickable_model(data["model"], os.path.join(entry, sub_entry))
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
all_evals.append(data)
cols_pending = copy.deepcopy(cols)
cols_pending.append('source')
cols_pending.append('submitted_time')
pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN", "PENDING_NEW_EVAL"]]
running_list = [e for e in all_evals if e["status"] == "RUNNING"]
finished_list = [e for e in all_evals if e["status"] in ["FINISHED", "PENDING_NEW_EVAL" if show_incomplete else "FINISHED"]]
failed_list = [e for e in all_evals if e["status"] == "FAILED"]
df_pending = pd.DataFrame.from_records(pending_list, columns=cols_pending)
df_running = pd.DataFrame.from_records(running_list, columns=cols)
df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
df_failed = pd.DataFrame.from_records(failed_list, columns=cols)
df_pending['source_priority'] = df_pending["source"].apply(lambda x: {"manual": 0, "leaderboard": 1, "script": 2}.get(x, 3))
df_pending['status_priority'] = df_pending["status"].apply(lambda x: {"PENDING": 2, "RERUN": 0, "PENDING_NEW_EVAL": 1}.get(x, 3))
df_pending = df_pending.sort_values(['source_priority', 'status_priority', 'submitted_time'])
df_pending = df_pending.drop(['source_priority', 'status_priority', 'submitted_time', 'source'], axis=1)
return df_finished[cols], df_running[cols], df_pending[cols], df_failed[cols]
|