Spaces:
Running
Running
Create src.results module
Browse files- app.py +3 -2
- src/results.py +104 -0
app.py
CHANGED
@@ -4,8 +4,9 @@ import gradio as gr
|
|
4 |
import pandas as pd
|
5 |
from huggingface_hub import HfFileSystem
|
6 |
|
7 |
-
from src.constants import DETAILS_DATASET_ID, DETAILS_FILENAME,
|
8 |
-
|
|
|
9 |
|
10 |
fs = HfFileSystem()
|
11 |
|
|
|
4 |
import pandas as pd
|
5 |
from huggingface_hub import HfFileSystem
|
6 |
|
7 |
+
from src.constants import DETAILS_DATASET_ID, DETAILS_FILENAME, SUBTASKS, TASKS
|
8 |
+
from src.results import fetch_result_paths, filter_latest_result_path_per_model, update_load_results_component, \
|
9 |
+
load_results_dataframes, display_results, update_tasks_component, clear_results
|
10 |
|
11 |
fs = HfFileSystem()
|
12 |
|
src/results.py
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
|
3 |
+
import gradio as gr
|
4 |
+
import pandas as pd
|
5 |
+
|
6 |
+
from app import fs, latest_result_path_per_model
|
7 |
+
from src.constants import RESULTS_DATASET_ID, TASKS
|
8 |
+
|
9 |
+
|
10 |
+
def fetch_result_paths():
|
11 |
+
paths = fs.glob(f"{RESULTS_DATASET_ID}/**/**/*.json")
|
12 |
+
return paths
|
13 |
+
|
14 |
+
|
15 |
+
def filter_latest_result_path_per_model(paths):
|
16 |
+
from collections import defaultdict
|
17 |
+
|
18 |
+
d = defaultdict(list)
|
19 |
+
for path in paths:
|
20 |
+
model_id, _ = path[len(RESULTS_DATASET_ID) + 1:].rsplit("/", 1)
|
21 |
+
d[model_id].append(path)
|
22 |
+
return {model_id: max(paths) for model_id, paths in d.items()}
|
23 |
+
|
24 |
+
|
25 |
+
def get_result_path_from_model(model_id, result_path_per_model):
|
26 |
+
return result_path_per_model[model_id]
|
27 |
+
|
28 |
+
|
29 |
+
def update_load_results_component():
|
30 |
+
return gr.Button("Load Results", interactive=True)
|
31 |
+
|
32 |
+
|
33 |
+
def load_data(result_path) -> pd.DataFrame:
|
34 |
+
with fs.open(result_path, "r") as f:
|
35 |
+
data = json.load(f)
|
36 |
+
return data
|
37 |
+
|
38 |
+
|
39 |
+
def load_results_dataframe(model_id):
|
40 |
+
if not model_id:
|
41 |
+
return
|
42 |
+
result_path = get_result_path_from_model(model_id, latest_result_path_per_model)
|
43 |
+
data = load_data(result_path)
|
44 |
+
model_name = data.get("model_name", "Model")
|
45 |
+
df = pd.json_normalize([{key: value for key, value in data.items()}])
|
46 |
+
# df.columns = df.columns.str.split(".") # .split return a list instead of a tuple
|
47 |
+
return df.set_index(pd.Index([model_name])).reset_index()
|
48 |
+
|
49 |
+
|
50 |
+
def load_results_dataframes(*model_ids):
|
51 |
+
return [load_results_dataframe(model_id) for model_id in model_ids]
|
52 |
+
|
53 |
+
|
54 |
+
def display_results(task, *dfs):
|
55 |
+
dfs = [df.set_index("index") for df in dfs if "index" in df.columns]
|
56 |
+
if not dfs:
|
57 |
+
return None, None
|
58 |
+
df = pd.concat(dfs)
|
59 |
+
df = df.T.rename_axis(columns=None)
|
60 |
+
return display_tab("results", df, task), display_tab("configs", df, task)
|
61 |
+
|
62 |
+
|
63 |
+
def display_tab(tab, df, task):
|
64 |
+
df = df.style.format(na_rep="")
|
65 |
+
df.hide(
|
66 |
+
[
|
67 |
+
row
|
68 |
+
for row in df.index
|
69 |
+
if (
|
70 |
+
not row.startswith(f"{tab}.")
|
71 |
+
or row.startswith(f"{tab}.leaderboard.")
|
72 |
+
or row.endswith(".alias")
|
73 |
+
or (not row.startswith(f"{tab}.{task}") if task != "All" else False)
|
74 |
+
)
|
75 |
+
],
|
76 |
+
axis="index",
|
77 |
+
)
|
78 |
+
start = len(f"{tab}.leaderboard_") if task == "All" else len(f"{tab}.{task} ")
|
79 |
+
df.format_index(lambda idx: idx[start:].removesuffix(",none"), axis="index")
|
80 |
+
return df.to_html()
|
81 |
+
|
82 |
+
|
83 |
+
def update_tasks_component():
|
84 |
+
return gr.Radio(
|
85 |
+
["All"] + list(TASKS.values()),
|
86 |
+
label="Tasks",
|
87 |
+
info="Evaluation tasks to be displayed",
|
88 |
+
value="All",
|
89 |
+
interactive=True,
|
90 |
+
)
|
91 |
+
|
92 |
+
|
93 |
+
def clear_results():
|
94 |
+
# model_id_1, model_id_2, dataframe_1, dataframe_2, task
|
95 |
+
return (
|
96 |
+
None, None, None, None,
|
97 |
+
gr.Radio(
|
98 |
+
["All"] + list(TASKS.values()),
|
99 |
+
label="Tasks",
|
100 |
+
info="Evaluation tasks to be displayed",
|
101 |
+
value="All",
|
102 |
+
interactive=False,
|
103 |
+
),
|
104 |
+
)
|