Clémentine
commited on
Commit
•
f04f90e
1
Parent(s):
4b67a33
better checkboxes, better filtering
Browse files- app.py +19 -40
- src/scripts/update_all_request_files.py +5 -26
- src/submission/check_validity.py +33 -4
- src/submission/submit.py +3 -25
app.py
CHANGED
@@ -99,13 +99,10 @@ def update_table(
|
|
99 |
type_query: list,
|
100 |
precision_query: str,
|
101 |
size_query: list,
|
102 |
-
|
103 |
-
show_merges: bool,
|
104 |
-
show_moe: bool,
|
105 |
-
show_flagged: bool,
|
106 |
query: str,
|
107 |
):
|
108 |
-
filtered_df = filter_models(hidden_df, type_query, size_query, precision_query,
|
109 |
filtered_df = filter_queries(query, filtered_df)
|
110 |
df = select_columns(filtered_df, columns)
|
111 |
return df
|
@@ -153,21 +150,21 @@ def filter_queries(query: str, filtered_df: pd.DataFrame):
|
|
153 |
|
154 |
|
155 |
def filter_models(
|
156 |
-
df: pd.DataFrame, type_query: list, size_query: list, precision_query: list,
|
157 |
) -> pd.DataFrame:
|
158 |
# Show all models
|
159 |
-
if
|
160 |
-
filtered_df = df
|
161 |
-
else: # Show only still on the hub models
|
162 |
filtered_df = df[df[AutoEvalColumn.still_on_hub.name] == True]
|
|
|
|
|
163 |
|
164 |
-
if
|
165 |
filtered_df = filtered_df[filtered_df[AutoEvalColumn.merged.name] == False]
|
166 |
|
167 |
-
if
|
168 |
filtered_df = filtered_df[filtered_df[AutoEvalColumn.moe.name] == False]
|
169 |
|
170 |
-
if
|
171 |
filtered_df = filtered_df[filtered_df[AutoEvalColumn.flagged.name] == False]
|
172 |
|
173 |
type_emoji = [t[0] for t in type_query]
|
@@ -186,10 +183,7 @@ leaderboard_df = filter_models(
|
|
186 |
type_query=[t.to_str(" : ") for t in ModelType],
|
187 |
size_query=list(NUMERIC_INTERVALS.keys()),
|
188 |
precision_query=[i.value.name for i in Precision],
|
189 |
-
|
190 |
-
show_merges=False,
|
191 |
-
show_moe=True,
|
192 |
-
show_flagged=False
|
193 |
)
|
194 |
|
195 |
demo = gr.Blocks(css=custom_css)
|
@@ -224,17 +218,11 @@ with demo:
|
|
224 |
interactive=True,
|
225 |
)
|
226 |
with gr.Row():
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
)
|
233 |
-
moe_models_visibility = gr.Checkbox(
|
234 |
-
value=True, label="Show MoE", interactive=True
|
235 |
-
)
|
236 |
-
flagged_models_visibility = gr.Checkbox(
|
237 |
-
value=False, label="Show flagged models", interactive=True
|
238 |
)
|
239 |
with gr.Column(min_width=320):
|
240 |
#with gr.Box(elem_id="box-filter"):
|
@@ -289,10 +277,7 @@ with demo:
|
|
289 |
filter_columns_type,
|
290 |
filter_columns_precision,
|
291 |
filter_columns_size,
|
292 |
-
|
293 |
-
merged_models_visibility,
|
294 |
-
moe_models_visibility,
|
295 |
-
flagged_models_visibility,
|
296 |
search_bar,
|
297 |
],
|
298 |
leaderboard_table,
|
@@ -308,10 +293,7 @@ with demo:
|
|
308 |
filter_columns_type,
|
309 |
filter_columns_precision,
|
310 |
filter_columns_size,
|
311 |
-
|
312 |
-
merged_models_visibility,
|
313 |
-
moe_models_visibility,
|
314 |
-
flagged_models_visibility,
|
315 |
search_bar,
|
316 |
],
|
317 |
leaderboard_table,
|
@@ -319,7 +301,7 @@ with demo:
|
|
319 |
# Check query parameter once at startup and update search bar + hidden component
|
320 |
demo.load(load_query, inputs=[], outputs=[search_bar, hidden_search_bar])
|
321 |
|
322 |
-
for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size,
|
323 |
selector.change(
|
324 |
update_table,
|
325 |
[
|
@@ -328,10 +310,7 @@ with demo:
|
|
328 |
filter_columns_type,
|
329 |
filter_columns_precision,
|
330 |
filter_columns_size,
|
331 |
-
|
332 |
-
merged_models_visibility,
|
333 |
-
moe_models_visibility,
|
334 |
-
flagged_models_visibility,
|
335 |
search_bar,
|
336 |
],
|
337 |
leaderboard_table,
|
|
|
99 |
type_query: list,
|
100 |
precision_query: str,
|
101 |
size_query: list,
|
102 |
+
hide_models: list,
|
|
|
|
|
|
|
103 |
query: str,
|
104 |
):
|
105 |
+
filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, hide_models)
|
106 |
filtered_df = filter_queries(query, filtered_df)
|
107 |
df = select_columns(filtered_df, columns)
|
108 |
return df
|
|
|
150 |
|
151 |
|
152 |
def filter_models(
|
153 |
+
df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, hide_models: list
|
154 |
) -> pd.DataFrame:
|
155 |
# Show all models
|
156 |
+
if "Private or deleted" in hide_models:
|
|
|
|
|
157 |
filtered_df = df[df[AutoEvalColumn.still_on_hub.name] == True]
|
158 |
+
else:
|
159 |
+
filtered_df = df
|
160 |
|
161 |
+
if "Merges and moerges" in hide_models:
|
162 |
filtered_df = filtered_df[filtered_df[AutoEvalColumn.merged.name] == False]
|
163 |
|
164 |
+
if "MoE" in hide_models:
|
165 |
filtered_df = filtered_df[filtered_df[AutoEvalColumn.moe.name] == False]
|
166 |
|
167 |
+
if "Flagged" in hide_models:
|
168 |
filtered_df = filtered_df[filtered_df[AutoEvalColumn.flagged.name] == False]
|
169 |
|
170 |
type_emoji = [t[0] for t in type_query]
|
|
|
183 |
type_query=[t.to_str(" : ") for t in ModelType],
|
184 |
size_query=list(NUMERIC_INTERVALS.keys()),
|
185 |
precision_query=[i.value.name for i in Precision],
|
186 |
+
hide_models=[True, True, True, False], # Deleted, merges, flagged, MoEs
|
|
|
|
|
|
|
187 |
)
|
188 |
|
189 |
demo = gr.Blocks(css=custom_css)
|
|
|
218 |
interactive=True,
|
219 |
)
|
220 |
with gr.Row():
|
221 |
+
hide_models = gr.CheckboxGroup(
|
222 |
+
label="Hide models",
|
223 |
+
choices = ["Private or deleted", "Merges and moerges", "Flagged", "MoE"],
|
224 |
+
value=["Private or deleted", "Merges and moerges", "Flagged"],
|
225 |
+
interactive=True
|
|
|
|
|
|
|
|
|
|
|
|
|
226 |
)
|
227 |
with gr.Column(min_width=320):
|
228 |
#with gr.Box(elem_id="box-filter"):
|
|
|
277 |
filter_columns_type,
|
278 |
filter_columns_precision,
|
279 |
filter_columns_size,
|
280 |
+
hide_models,
|
|
|
|
|
|
|
281 |
search_bar,
|
282 |
],
|
283 |
leaderboard_table,
|
|
|
293 |
filter_columns_type,
|
294 |
filter_columns_precision,
|
295 |
filter_columns_size,
|
296 |
+
hide_models,
|
|
|
|
|
|
|
297 |
search_bar,
|
298 |
],
|
299 |
leaderboard_table,
|
|
|
301 |
# Check query parameter once at startup and update search bar + hidden component
|
302 |
demo.load(load_query, inputs=[], outputs=[search_bar, hidden_search_bar])
|
303 |
|
304 |
+
for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size, hide_models]:
|
305 |
selector.change(
|
306 |
update_table,
|
307 |
[
|
|
|
310 |
filter_columns_type,
|
311 |
filter_columns_precision,
|
312 |
filter_columns_size,
|
313 |
+
hide_models,
|
|
|
|
|
|
|
314 |
search_bar,
|
315 |
],
|
316 |
leaderboard_table,
|
src/scripts/update_all_request_files.py
CHANGED
@@ -3,7 +3,7 @@ from huggingface_hub import ModelCard
|
|
3 |
|
4 |
import json
|
5 |
import time
|
6 |
-
from src.submission.check_validity import is_model_on_hub, check_model_card
|
7 |
from src.envs import DYNAMIC_INFO_REPO, DYNAMIC_INFO_PATH, DYNAMIC_INFO_FILE_PATH, API, H4_TOKEN
|
8 |
|
9 |
def update_models(file_path, models):
|
@@ -35,37 +35,16 @@ def update_models(file_path, models):
|
|
35 |
# If the model doesn't have a model card or a license, we consider it's deleted
|
36 |
if still_on_hub:
|
37 |
try:
|
38 |
-
|
|
|
39 |
still_on_hub = False
|
40 |
except Exception:
|
|
|
41 |
still_on_hub = False
|
42 |
data['still_on_hub'] = still_on_hub
|
43 |
|
44 |
-
# Check if the model is a merge
|
45 |
-
is_merge_from_metadata = False
|
46 |
-
is_moe_from_metadata = False
|
47 |
if still_on_hub:
|
48 |
-
|
49 |
-
|
50 |
-
# Storing the model metadata
|
51 |
-
tags = []
|
52 |
-
if model_card.data.tags:
|
53 |
-
is_merge_from_metadata = "merge" in model_card.data.tags
|
54 |
-
is_moe_from_metadata = "moe" in model_card.data.tags
|
55 |
-
merge_keywords = ["mergekit", "merged model", "merge model", "merging"]
|
56 |
-
# If the model is a merge but not saying it in the metadata, we flag it
|
57 |
-
is_merge_from_model_card = any(keyword in model_card.text.lower() for keyword in merge_keywords)
|
58 |
-
if is_merge_from_model_card or is_merge_from_metadata:
|
59 |
-
tags.append("merge")
|
60 |
-
if not is_merge_from_metadata:
|
61 |
-
tags.append("flagged:undisclosed_merge")
|
62 |
-
moe_keywords = ["moe", "mixture of experts", "mixtral"]
|
63 |
-
is_moe_from_model_card = any(keyword in model_card.text.lower() for keyword in moe_keywords)
|
64 |
-
is_moe_from_name = "moe" in model_id.lower().replace("/", "-").replace("_", "-").split("-")
|
65 |
-
if is_moe_from_model_card or is_moe_from_name or is_moe_from_metadata:
|
66 |
-
tags.append("moe")
|
67 |
-
if not is_moe_from_metadata:
|
68 |
-
tags.append("flagged:undisclosed_moe")
|
69 |
|
70 |
data["tags"] = tags
|
71 |
|
|
|
3 |
|
4 |
import json
|
5 |
import time
|
6 |
+
from src.submission.check_validity import is_model_on_hub, check_model_card, get_model_tags
|
7 |
from src.envs import DYNAMIC_INFO_REPO, DYNAMIC_INFO_PATH, DYNAMIC_INFO_FILE_PATH, API, H4_TOKEN
|
8 |
|
9 |
def update_models(file_path, models):
|
|
|
35 |
# If the model doesn't have a model card or a license, we consider it's deleted
|
36 |
if still_on_hub:
|
37 |
try:
|
38 |
+
status, msg, model_card = check_model_card(model_id)
|
39 |
+
if status is False:
|
40 |
still_on_hub = False
|
41 |
except Exception:
|
42 |
+
model_card = None
|
43 |
still_on_hub = False
|
44 |
data['still_on_hub'] = still_on_hub
|
45 |
|
|
|
|
|
|
|
46 |
if still_on_hub:
|
47 |
+
tags = get_model_tags(model_card, model_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
data["tags"] = tags
|
50 |
|
src/submission/check_validity.py
CHANGED
@@ -19,7 +19,7 @@ def check_model_card(repo_id: str) -> tuple[bool, str]:
|
|
19 |
try:
|
20 |
card = ModelCard.load(repo_id)
|
21 |
except huggingface_hub.utils.EntryNotFoundError:
|
22 |
-
return False, "Please add a model card to your model to explain how you trained/fine-tuned it."
|
23 |
|
24 |
# Enforce license metadata
|
25 |
if card.data.license is None:
|
@@ -27,13 +27,13 @@ def check_model_card(repo_id: str) -> tuple[bool, str]:
|
|
27 |
return False, (
|
28 |
"License not found. Please add a license to your model card using the `license` metadata or a"
|
29 |
" `license_name`/`license_link` pair."
|
30 |
-
)
|
31 |
|
32 |
# Enforce card content
|
33 |
if len(card.text) < 200:
|
34 |
-
return False, "Please add a description to your model card, it is too short."
|
35 |
|
36 |
-
return True, ""
|
37 |
|
38 |
|
39 |
def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str, AutoConfig]:
|
@@ -133,3 +133,32 @@ def already_submitted_models(requested_models_dir: str) -> set[str]:
|
|
133 |
users_to_submission_dates[organisation].append(info["submitted_time"])
|
134 |
|
135 |
return set(file_names), users_to_submission_dates
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
try:
|
20 |
card = ModelCard.load(repo_id)
|
21 |
except huggingface_hub.utils.EntryNotFoundError:
|
22 |
+
return False, "Please add a model card to your model to explain how you trained/fine-tuned it.", None
|
23 |
|
24 |
# Enforce license metadata
|
25 |
if card.data.license is None:
|
|
|
27 |
return False, (
|
28 |
"License not found. Please add a license to your model card using the `license` metadata or a"
|
29 |
" `license_name`/`license_link` pair."
|
30 |
+
), None
|
31 |
|
32 |
# Enforce card content
|
33 |
if len(card.text) < 200:
|
34 |
+
return False, "Please add a description to your model card, it is too short.", None
|
35 |
|
36 |
+
return True, "", card
|
37 |
|
38 |
|
39 |
def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str, AutoConfig]:
|
|
|
133 |
users_to_submission_dates[organisation].append(info["submitted_time"])
|
134 |
|
135 |
return set(file_names), users_to_submission_dates
|
136 |
+
|
137 |
+
def get_model_tags(model_card, model: str):
|
138 |
+
is_merge_from_metadata = False
|
139 |
+
is_moe_from_metadata = False
|
140 |
+
|
141 |
+
tags = []
|
142 |
+
if model_card is None:
|
143 |
+
return tags
|
144 |
+
if model_card.data.tags:
|
145 |
+
is_merge_from_metadata = "merge" in model_card.data.tags
|
146 |
+
is_moe_from_metadata = "moe" in model_card.data.tags
|
147 |
+
merge_keywords = ["mergekit", "merged model", "merge model", "merging"]
|
148 |
+
# If the model is a merge but not saying it in the metadata, we flag it
|
149 |
+
is_merge_from_model_card = any(keyword in model_card.text.lower() for keyword in merge_keywords)
|
150 |
+
if is_merge_from_model_card or is_merge_from_metadata:
|
151 |
+
tags.append("merge")
|
152 |
+
if not is_merge_from_metadata:
|
153 |
+
tags.append("flagged:undisclosed_merge")
|
154 |
+
moe_keywords = ["moe", "mixture of experts", "mixtral"]
|
155 |
+
is_moe_from_model_card = any(keyword in model_card.text.lower() for keyword in moe_keywords)
|
156 |
+
is_moe_from_name = "moe" in model.lower().replace("/", "-").replace("_", "-").split("-")
|
157 |
+
if is_moe_from_model_card or is_moe_from_name or is_moe_from_metadata:
|
158 |
+
tags.append("moe")
|
159 |
+
# We no longer tag undisclosed MoEs
|
160 |
+
#if not is_moe_from_metadata:
|
161 |
+
# tags.append("flagged:undisclosed_moe")
|
162 |
+
|
163 |
+
|
164 |
+
return tags
|
src/submission/submit.py
CHANGED
@@ -13,6 +13,7 @@ from src.submission.check_validity import (
|
|
13 |
get_model_size,
|
14 |
is_model_on_hub,
|
15 |
user_submission_permission,
|
|
|
16 |
)
|
17 |
|
18 |
REQUESTED_MODELS = None
|
@@ -96,34 +97,11 @@ def add_new_eval(
|
|
96 |
except Exception:
|
97 |
return styled_error("Please select a license for your model")
|
98 |
|
99 |
-
modelcard_OK, error_msg = check_model_card(model)
|
100 |
if not modelcard_OK:
|
101 |
return styled_error(error_msg)
|
102 |
|
103 |
-
|
104 |
-
is_moe_from_metadata = False
|
105 |
-
model_card = ModelCard.load(model)
|
106 |
-
|
107 |
-
# Storing the model tags
|
108 |
-
tags = []
|
109 |
-
if model_card.data.tags:
|
110 |
-
is_merge_from_metadata = "merge" in model_card.data.tags
|
111 |
-
is_moe_from_metadata = "moe" in model_card.data.tags
|
112 |
-
merge_keywords = ["mergekit", "merged model", "merge model", "merging"]
|
113 |
-
# If the model is a merge but not saying it in the metadata, we flag it
|
114 |
-
is_merge_from_model_card = any(keyword in model_card.text.lower() for keyword in merge_keywords)
|
115 |
-
if is_merge_from_model_card or is_merge_from_metadata:
|
116 |
-
tags.append("merge")
|
117 |
-
if not is_merge_from_metadata:
|
118 |
-
tags.append("flagged:undisclosed_merge")
|
119 |
-
moe_keywords = ["moe", "mixture of experts", "mixtral"]
|
120 |
-
is_moe_from_model_card = any(keyword in model_card.text.lower() for keyword in moe_keywords)
|
121 |
-
is_moe_from_name = "moe" in model.lower().replace("/", "-").replace("_", "-").split("-")
|
122 |
-
if is_moe_from_model_card or is_moe_from_name or is_moe_from_metadata:
|
123 |
-
tags.append("moe")
|
124 |
-
if not is_moe_from_metadata:
|
125 |
-
tags.append("flagged:undisclosed_moe")
|
126 |
-
|
127 |
|
128 |
# Seems good, creating the eval
|
129 |
print("Adding new eval")
|
|
|
13 |
get_model_size,
|
14 |
is_model_on_hub,
|
15 |
user_submission_permission,
|
16 |
+
get_model_tags
|
17 |
)
|
18 |
|
19 |
REQUESTED_MODELS = None
|
|
|
97 |
except Exception:
|
98 |
return styled_error("Please select a license for your model")
|
99 |
|
100 |
+
modelcard_OK, error_msg, model_card = check_model_card(model)
|
101 |
if not modelcard_OK:
|
102 |
return styled_error(error_msg)
|
103 |
|
104 |
+
tags = get_model_tags(model_card, model)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
|
106 |
# Seems good, creating the eval
|
107 |
print("Adding new eval")
|