albertvillanova HF staff commited on
Commit
15c8167
·
verified ·
1 Parent(s): f5976e2

Create src.results module

Browse files
Files changed (2) hide show
  1. app.py +3 -2
  2. src/results.py +104 -0
app.py CHANGED
@@ -4,8 +4,9 @@ import gradio as gr
4
  import pandas as pd
5
  from huggingface_hub import HfFileSystem
6
 
7
- from src.constants import DETAILS_DATASET_ID, DETAILS_FILENAME, RESULTS_DATASET_ID, SUBTASKS, TASKS
8
-
 
9
 
10
  fs = HfFileSystem()
11
 
 
4
  import pandas as pd
5
  from huggingface_hub import HfFileSystem
6
 
7
+ from src.constants import DETAILS_DATASET_ID, DETAILS_FILENAME, SUBTASKS, TASKS
8
+ from src.results import fetch_result_paths, filter_latest_result_path_per_model, update_load_results_component, \
9
+ load_results_dataframes, display_results, update_tasks_component, clear_results
10
 
11
  fs = HfFileSystem()
12
 
src/results.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ import gradio as gr
4
+ import pandas as pd
5
+
6
+ from app import fs, latest_result_path_per_model
7
+ from src.constants import RESULTS_DATASET_ID, TASKS
8
+
9
+
10
+ def fetch_result_paths():
11
+ paths = fs.glob(f"{RESULTS_DATASET_ID}/**/**/*.json")
12
+ return paths
13
+
14
+
15
+ def filter_latest_result_path_per_model(paths):
16
+ from collections import defaultdict
17
+
18
+ d = defaultdict(list)
19
+ for path in paths:
20
+ model_id, _ = path[len(RESULTS_DATASET_ID) + 1:].rsplit("/", 1)
21
+ d[model_id].append(path)
22
+ return {model_id: max(paths) for model_id, paths in d.items()}
23
+
24
+
25
+ def get_result_path_from_model(model_id, result_path_per_model):
26
+ return result_path_per_model[model_id]
27
+
28
+
29
+ def update_load_results_component():
30
+ return gr.Button("Load Results", interactive=True)
31
+
32
+
33
+ def load_data(result_path) -> pd.DataFrame:
34
+ with fs.open(result_path, "r") as f:
35
+ data = json.load(f)
36
+ return data
37
+
38
+
39
+ def load_results_dataframe(model_id):
40
+ if not model_id:
41
+ return
42
+ result_path = get_result_path_from_model(model_id, latest_result_path_per_model)
43
+ data = load_data(result_path)
44
+ model_name = data.get("model_name", "Model")
45
+ df = pd.json_normalize([{key: value for key, value in data.items()}])
46
+ # df.columns = df.columns.str.split(".") # .split return a list instead of a tuple
47
+ return df.set_index(pd.Index([model_name])).reset_index()
48
+
49
+
50
+ def load_results_dataframes(*model_ids):
51
+ return [load_results_dataframe(model_id) for model_id in model_ids]
52
+
53
+
54
+ def display_results(task, *dfs):
55
+ dfs = [df.set_index("index") for df in dfs if "index" in df.columns]
56
+ if not dfs:
57
+ return None, None
58
+ df = pd.concat(dfs)
59
+ df = df.T.rename_axis(columns=None)
60
+ return display_tab("results", df, task), display_tab("configs", df, task)
61
+
62
+
63
+ def display_tab(tab, df, task):
64
+ df = df.style.format(na_rep="")
65
+ df.hide(
66
+ [
67
+ row
68
+ for row in df.index
69
+ if (
70
+ not row.startswith(f"{tab}.")
71
+ or row.startswith(f"{tab}.leaderboard.")
72
+ or row.endswith(".alias")
73
+ or (not row.startswith(f"{tab}.{task}") if task != "All" else False)
74
+ )
75
+ ],
76
+ axis="index",
77
+ )
78
+ start = len(f"{tab}.leaderboard_") if task == "All" else len(f"{tab}.{task} ")
79
+ df.format_index(lambda idx: idx[start:].removesuffix(",none"), axis="index")
80
+ return df.to_html()
81
+
82
+
83
+ def update_tasks_component():
84
+ return gr.Radio(
85
+ ["All"] + list(TASKS.values()),
86
+ label="Tasks",
87
+ info="Evaluation tasks to be displayed",
88
+ value="All",
89
+ interactive=True,
90
+ )
91
+
92
+
93
+ def clear_results():
94
+ # model_id_1, model_id_2, dataframe_1, dataframe_2, task
95
+ return (
96
+ None, None, None, None,
97
+ gr.Radio(
98
+ ["All"] + list(TASKS.values()),
99
+ label="Tasks",
100
+ info="Evaluation tasks to be displayed",
101
+ value="All",
102
+ interactive=False,
103
+ ),
104
+ )