Spaces:
Runtime error
Runtime error
Tristan Thrush
commited on
Commit
·
bb28608
1
Parent(s):
fe77dfe
added selection of verified results
Browse files
app.py
CHANGED
@@ -10,9 +10,12 @@ from os.path import exists
|
|
10 |
import threading
|
11 |
|
12 |
|
13 |
-
def get_model_ids():
|
14 |
api = HfApi()
|
15 |
-
|
|
|
|
|
|
|
16 |
model_ids = [x.modelId for x in models]
|
17 |
return model_ids
|
18 |
|
@@ -42,24 +45,39 @@ def parse_metric_value(value):
|
|
42 |
return value
|
43 |
|
44 |
|
45 |
-
def parse_metrics_rows(meta):
|
46 |
if not isinstance(meta["model-index"], list) or len(meta["model-index"]) == 0 or "results" not in meta["model-index"][0]:
|
47 |
return None
|
48 |
for result in meta["model-index"][0]["results"]:
|
49 |
if not isinstance(result, dict) or "dataset" not in result or "metrics" not in result or "type" not in result["dataset"]:
|
50 |
continue
|
51 |
dataset = result["dataset"]["type"]
|
52 |
-
|
53 |
-
|
54 |
-
|
|
|
|
|
55 |
for metric in result["metrics"]:
|
56 |
type = metric["type"].lower().strip()
|
|
|
|
|
|
|
57 |
value = parse_metric_value(metric.get("value", None))
|
58 |
if value is None:
|
59 |
continue
|
60 |
-
if type
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
yield row
|
64 |
|
65 |
@st.cache(ttl=3600)
|
@@ -68,11 +86,12 @@ def get_data_wrapper():
|
|
68 |
def get_data():
|
69 |
data = []
|
70 |
model_ids = get_model_ids()
|
|
|
71 |
for model_id in tqdm(model_ids):
|
72 |
meta = get_metadata(model_id)
|
73 |
if meta is None:
|
74 |
continue
|
75 |
-
for row in parse_metrics_rows(meta):
|
76 |
if row is None:
|
77 |
continue
|
78 |
row["model_id"] = model_id
|
@@ -108,6 +127,10 @@ if "dataset" in query_params:
|
|
108 |
if len(query_params["dataset"]) > 0 and query_params["dataset"][0] in selectable_datasets:
|
109 |
default_dataset = query_params["dataset"][0]
|
110 |
|
|
|
|
|
|
|
|
|
111 |
dataset = st.sidebar.selectbox(
|
112 |
"Dataset",
|
113 |
selectable_datasets,
|
@@ -118,15 +141,19 @@ st.experimental_set_query_params(**{"dataset": [dataset]})
|
|
118 |
dataset_df = dataframe[dataframe.dataset == dataset]
|
119 |
dataset_df = dataset_df.dropna(axis="columns", how="all")
|
120 |
|
|
|
|
|
|
|
121 |
selectable_metrics = list(filter(lambda column: column not in ("model_id", "dataset"), dataset_df.columns))
|
|
|
|
|
|
|
|
|
122 |
sorting_metric = st.sidebar.radio(
|
123 |
"Sorting Metric",
|
124 |
selectable_metrics,
|
125 |
)
|
126 |
|
127 |
-
dataset_df = dataset_df.filter(["model_id"] + selectable_metrics)
|
128 |
-
dataset_df = dataset_df.dropna(thresh=2) # Want at least two non-na values (one for model_id and one for a metric).
|
129 |
-
|
130 |
st.markdown(
|
131 |
"Please click on the model's name to be redirected to its model card."
|
132 |
)
|
|
|
10 |
import threading
|
11 |
|
12 |
|
13 |
+
def get_model_ids(author=None):
|
14 |
api = HfApi()
|
15 |
+
if author is None:
|
16 |
+
models = api.list_models(filter="model-index")
|
17 |
+
else:
|
18 |
+
models = api.list_models(filter="model-index", author="autoevaluate")
|
19 |
model_ids = [x.modelId for x in models]
|
20 |
return model_ids
|
21 |
|
|
|
45 |
return value
|
46 |
|
47 |
|
48 |
+
def parse_metrics_rows(meta, from_autoeval=False):
|
49 |
if not isinstance(meta["model-index"], list) or len(meta["model-index"]) == 0 or "results" not in meta["model-index"][0]:
|
50 |
return None
|
51 |
for result in meta["model-index"][0]["results"]:
|
52 |
if not isinstance(result, dict) or "dataset" not in result or "metrics" not in result or "type" not in result["dataset"]:
|
53 |
continue
|
54 |
dataset = result["dataset"]["type"]
|
55 |
+
row = {"dataset": dataset, "split": None, "config": None, "verified": from_autoeval}
|
56 |
+
if "split" in result["dataset"]:
|
57 |
+
row["split"] = result["dataset"]["split"]
|
58 |
+
if "config" in result["dataset"]:
|
59 |
+
row["config"] = result["dataset"]["config"]
|
60 |
for metric in result["metrics"]:
|
61 |
type = metric["type"].lower().strip()
|
62 |
+
if type not in ("dataset", "split", "config", "verified"):
|
63 |
+
# Metrics are not allowed to be named "dataset", "split", "config", or "verified".
|
64 |
+
continue
|
65 |
value = parse_metric_value(metric.get("value", None))
|
66 |
if value is None:
|
67 |
continue
|
68 |
+
if type in row:
|
69 |
+
new_metric_better = value < row[type] if type in ascending_metrics else value > row[type]
|
70 |
+
if type not in row or new_metric_better:
|
71 |
+
# overwrite the metric if the new value is better.
|
72 |
+
|
73 |
+
if from_autoeval:
|
74 |
+
# if the metric is from autoeval, only include it in the leaderboard if
|
75 |
+
# it is a verified metric. Unverified metrics are already included
|
76 |
+
# in the leaderboard from the unverified model card.
|
77 |
+
if "verified" in metric and metric["verified"]:
|
78 |
+
row[type] = value
|
79 |
+
else:
|
80 |
+
row[type] = value
|
81 |
yield row
|
82 |
|
83 |
@st.cache(ttl=3600)
|
|
|
86 |
def get_data():
|
87 |
data = []
|
88 |
model_ids = get_model_ids()
|
89 |
+
model_ids_from_autoeval = set(get_model_ids(author="autoevaluate"))
|
90 |
for model_id in tqdm(model_ids):
|
91 |
meta = get_metadata(model_id)
|
92 |
if meta is None:
|
93 |
continue
|
94 |
+
for row in parse_metrics_rows(meta, from_autoeval=model_id in model_ids_from_autoeval):
|
95 |
if row is None:
|
96 |
continue
|
97 |
row["model_id"] = model_id
|
|
|
127 |
if len(query_params["dataset"]) > 0 and query_params["dataset"][0] in selectable_datasets:
|
128 |
default_dataset = query_params["dataset"][0]
|
129 |
|
130 |
+
only_verified_results = st.sidebar.checkbox(
|
131 |
+
"Filter for Verified Results",
|
132 |
+
)
|
133 |
+
|
134 |
dataset = st.sidebar.selectbox(
|
135 |
"Dataset",
|
136 |
selectable_datasets,
|
|
|
141 |
dataset_df = dataframe[dataframe.dataset == dataset]
|
142 |
dataset_df = dataset_df.dropna(axis="columns", how="all")
|
143 |
|
144 |
+
if only_verified_results:
|
145 |
+
dataset_df = dataset_df[dataset_df["verified"]]
|
146 |
+
|
147 |
selectable_metrics = list(filter(lambda column: column not in ("model_id", "dataset"), dataset_df.columns))
|
148 |
+
|
149 |
+
dataset_df = dataset_df.filter(["model_id"] + selectable_metrics)
|
150 |
+
dataset_df = dataset_df.dropna(thresh=2) # Want at least two non-na values (one for model_id and one for a metric).
|
151 |
+
|
152 |
sorting_metric = st.sidebar.radio(
|
153 |
"Sorting Metric",
|
154 |
selectable_metrics,
|
155 |
)
|
156 |
|
|
|
|
|
|
|
157 |
st.markdown(
|
158 |
"Please click on the model's name to be redirected to its model card."
|
159 |
)
|