Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Alina Lozovskaia
commited on
Commit
·
1489ff1
1
Parent(s):
a03f0fa
debugging the codebase
Browse files- app.py +0 -1
- pyproject.toml +2 -2
- requirements.txt +2 -1
- src/leaderboard/filter_models.py +0 -2
- src/submission/check_validity.py +0 -1
- src/tools/plots.py +7 -2
app.py
CHANGED
@@ -141,7 +141,6 @@ def load_and_create_plots():
|
|
141 |
plot_df = create_plot_df(create_scores_df(raw_data))
|
142 |
return plot_df
|
143 |
|
144 |
-
print(leaderboard_df.columns)
|
145 |
|
146 |
demo = gr.Blocks(css=custom_css)
|
147 |
with demo:
|
|
|
141 |
plot_df = create_plot_df(create_scores_df(raw_data))
|
142 |
return plot_df
|
143 |
|
|
|
144 |
|
145 |
demo = gr.Blocks(css=custom_css)
|
146 |
with demo:
|
pyproject.toml
CHANGED
@@ -44,10 +44,10 @@ tqdm = "4.65.0"
|
|
44 |
transformers = "4.40.0"
|
45 |
tokenizers = ">=0.15.0"
|
46 |
gradio-space-ci = {git = "https://huggingface.co/spaces/Wauplin/gradio-space-ci", rev = "0.2.3"}
|
47 |
-
gradio = "4.
|
48 |
isort = "^5.13.2"
|
49 |
ruff = "^0.3.5"
|
50 |
-
gradio-leaderboard = "
|
51 |
|
52 |
[build-system]
|
53 |
requires = ["poetry-core"]
|
|
|
44 |
transformers = "4.40.0"
|
45 |
tokenizers = ">=0.15.0"
|
46 |
gradio-space-ci = {git = "https://huggingface.co/spaces/Wauplin/gradio-space-ci", rev = "0.2.3"}
|
47 |
+
gradio = " 4.20.0"
|
48 |
isort = "^5.13.2"
|
49 |
ruff = "^0.3.5"
|
50 |
+
gradio-leaderboard = "0.0.7"
|
51 |
|
52 |
[build-system]
|
53 |
requires = ["poetry-core"]
|
requirements.txt
CHANGED
@@ -14,4 +14,5 @@ tqdm==4.65.0
|
|
14 |
transformers==4.40.0
|
15 |
tokenizers>=0.15.0
|
16 |
gradio-space-ci @ git+https://huggingface.co/spaces/Wauplin/gradio-space-ci@0.2.3 # CI !!!
|
17 |
-
|
|
|
|
14 |
transformers==4.40.0
|
15 |
tokenizers>=0.15.0
|
16 |
gradio-space-ci @ git+https://huggingface.co/spaces/Wauplin/gradio-space-ci@0.2.3 # CI !!!
|
17 |
+
gradio==4.20.0
|
18 |
+
gradio_leaderboard==0.0.7
|
src/leaderboard/filter_models.py
CHANGED
@@ -139,8 +139,6 @@ def flag_models(leaderboard_data: list[dict]):
|
|
139 |
else:
|
140 |
# Merges and moes are flagged
|
141 |
flag_key = "merged"
|
142 |
-
|
143 |
-
print(f"model check: {flag_key}")
|
144 |
|
145 |
# Reverse the logic: Check for non-flagged models instead
|
146 |
if flag_key in FLAGGED_MODELS:
|
|
|
139 |
else:
|
140 |
# Merges and moes are flagged
|
141 |
flag_key = "merged"
|
|
|
|
|
142 |
|
143 |
# Reverse the logic: Check for non-flagged models instead
|
144 |
if flag_key in FLAGGED_MODELS:
|
src/submission/check_validity.py
CHANGED
@@ -170,7 +170,6 @@ def get_model_tags(model_card, model: str):
|
|
170 |
is_moe_from_model_card = any(keyword in model_card.text.lower() for keyword in ["moe", "mixtral"])
|
171 |
# Hardcoding because of gating problem
|
172 |
if "Qwen/Qwen1.5-32B" in model:
|
173 |
-
print("HERE NSHJNKJSNJLAS")
|
174 |
is_moe_from_model_card = False
|
175 |
is_moe_from_name = "moe" in model.lower().replace("/", "-").replace("_", "-").split("-")
|
176 |
if is_moe_from_model_card or is_moe_from_name or is_moe_from_metadata:
|
|
|
170 |
is_moe_from_model_card = any(keyword in model_card.text.lower() for keyword in ["moe", "mixtral"])
|
171 |
# Hardcoding because of gating problem
|
172 |
if "Qwen/Qwen1.5-32B" in model:
|
|
|
173 |
is_moe_from_model_card = False
|
174 |
is_moe_from_name = "moe" in model.lower().replace("/", "-").replace("_", "-").split("-")
|
175 |
if is_moe_from_model_card or is_moe_from_name or is_moe_from_metadata:
|
src/tools/plots.py
CHANGED
@@ -16,8 +16,11 @@ def create_scores_df(raw_data: list[EvalResult]) -> pd.DataFrame:
|
|
16 |
:param results_df: A DataFrame containing result information including metric scores and dates.
|
17 |
:return: A new DataFrame containing the maximum scores until each date for every metric.
|
18 |
"""
|
|
|
|
|
19 |
# Step 1: Ensure 'date' is in datetime format and sort the DataFrame by it
|
20 |
results_df = pd.DataFrame(raw_data)
|
|
|
21 |
# results_df["date"] = pd.to_datetime(results_df["date"], format="mixed", utc=True)
|
22 |
results_df.sort_values(by="date", inplace=True)
|
23 |
|
@@ -34,7 +37,7 @@ def create_scores_df(raw_data: list[EvalResult]) -> pd.DataFrame:
|
|
34 |
# We ignore models that are flagged/no longer on the hub/not finished
|
35 |
to_ignore = (
|
36 |
not row["still_on_hub"]
|
37 |
-
or row["not_flagged"]
|
38 |
or current_model in FLAGGED_MODELS
|
39 |
or row["status"] != "FINISHED"
|
40 |
)
|
@@ -68,7 +71,6 @@ def create_plot_df(scores_df: dict[str : pd.DataFrame]) -> pd.DataFrame:
|
|
68 |
"""
|
69 |
# Initialize the list to store DataFrames
|
70 |
dfs = []
|
71 |
-
|
72 |
# Iterate over the cols and create a new DataFrame for each column
|
73 |
for col in BENCHMARK_COLS + [AutoEvalColumn.average.name]:
|
74 |
d = scores_df[col].reset_index(drop=True)
|
@@ -77,6 +79,9 @@ def create_plot_df(scores_df: dict[str : pd.DataFrame]) -> pd.DataFrame:
|
|
77 |
|
78 |
# Concatenate all the created DataFrames
|
79 |
concat_df = pd.concat(dfs, ignore_index=True)
|
|
|
|
|
|
|
80 |
|
81 |
# Sort values by 'date'
|
82 |
concat_df.sort_values(by="date", inplace=True)
|
|
|
16 |
:param results_df: A DataFrame containing result information including metric scores and dates.
|
17 |
:return: A new DataFrame containing the maximum scores until each date for every metric.
|
18 |
"""
|
19 |
+
print(raw_data[0])
|
20 |
+
print(raw_data[0].date)
|
21 |
# Step 1: Ensure 'date' is in datetime format and sort the DataFrame by it
|
22 |
results_df = pd.DataFrame(raw_data)
|
23 |
+
print(results_df.columns)
|
24 |
# results_df["date"] = pd.to_datetime(results_df["date"], format="mixed", utc=True)
|
25 |
results_df.sort_values(by="date", inplace=True)
|
26 |
|
|
|
37 |
# We ignore models that are flagged/no longer on the hub/not finished
|
38 |
to_ignore = (
|
39 |
not row["still_on_hub"]
|
40 |
+
or not row["not_flagged"]
|
41 |
or current_model in FLAGGED_MODELS
|
42 |
or row["status"] != "FINISHED"
|
43 |
)
|
|
|
71 |
"""
|
72 |
# Initialize the list to store DataFrames
|
73 |
dfs = []
|
|
|
74 |
# Iterate over the cols and create a new DataFrame for each column
|
75 |
for col in BENCHMARK_COLS + [AutoEvalColumn.average.name]:
|
76 |
d = scores_df[col].reset_index(drop=True)
|
|
|
79 |
|
80 |
# Concatenate all the created DataFrames
|
81 |
concat_df = pd.concat(dfs, ignore_index=True)
|
82 |
+
# print("Columns in DataFrame:", concat_df.columns)
|
83 |
+
# if "date" not in concat_df.columns:
|
84 |
+
# raise ValueError("Date column missing from DataFrame. Cannot proceed with sorting.")
|
85 |
|
86 |
# Sort values by 'date'
|
87 |
concat_df.sort_values(by="date", inplace=True)
|