Alina Lozovskaia commited on
Commit
1489ff1
·
1 Parent(s): a03f0fa

debugging the codebase

Browse files
app.py CHANGED
@@ -141,7 +141,6 @@ def load_and_create_plots():
141
  plot_df = create_plot_df(create_scores_df(raw_data))
142
  return plot_df
143
 
144
- print(leaderboard_df.columns)
145
 
146
  demo = gr.Blocks(css=custom_css)
147
  with demo:
 
141
  plot_df = create_plot_df(create_scores_df(raw_data))
142
  return plot_df
143
 
 
144
 
145
  demo = gr.Blocks(css=custom_css)
146
  with demo:
pyproject.toml CHANGED
@@ -44,10 +44,10 @@ tqdm = "4.65.0"
44
  transformers = "4.40.0"
45
  tokenizers = ">=0.15.0"
46
  gradio-space-ci = {git = "https://huggingface.co/spaces/Wauplin/gradio-space-ci", rev = "0.2.3"}
47
- gradio = "4.9.0"
48
  isort = "^5.13.2"
49
  ruff = "^0.3.5"
50
- gradio-leaderboard = "^0.0.7"
51
 
52
  [build-system]
53
  requires = ["poetry-core"]
 
44
  transformers = "4.40.0"
45
  tokenizers = ">=0.15.0"
46
  gradio-space-ci = {git = "https://huggingface.co/spaces/Wauplin/gradio-space-ci", rev = "0.2.3"}
47
+ gradio = " 4.20.0"
48
  isort = "^5.13.2"
49
  ruff = "^0.3.5"
50
+ gradio-leaderboard = "0.0.7"
51
 
52
  [build-system]
53
  requires = ["poetry-core"]
requirements.txt CHANGED
@@ -14,4 +14,5 @@ tqdm==4.65.0
14
  transformers==4.40.0
15
  tokenizers>=0.15.0
16
  gradio-space-ci @ git+https://huggingface.co/spaces/Wauplin/gradio-space-ci@0.2.3 # CI !!!
17
- gradio_leaderboard
 
 
14
  transformers==4.40.0
15
  tokenizers>=0.15.0
16
  gradio-space-ci @ git+https://huggingface.co/spaces/Wauplin/gradio-space-ci@0.2.3 # CI !!!
17
+ gradio==4.20.0
18
+ gradio_leaderboard==0.0.7
src/leaderboard/filter_models.py CHANGED
@@ -139,8 +139,6 @@ def flag_models(leaderboard_data: list[dict]):
139
  else:
140
  # Merges and moes are flagged
141
  flag_key = "merged"
142
-
143
- print(f"model check: {flag_key}")
144
 
145
  # Reverse the logic: Check for non-flagged models instead
146
  if flag_key in FLAGGED_MODELS:
 
139
  else:
140
  # Merges and moes are flagged
141
  flag_key = "merged"
 
 
142
 
143
  # Reverse the logic: Check for non-flagged models instead
144
  if flag_key in FLAGGED_MODELS:
src/submission/check_validity.py CHANGED
@@ -170,7 +170,6 @@ def get_model_tags(model_card, model: str):
170
  is_moe_from_model_card = any(keyword in model_card.text.lower() for keyword in ["moe", "mixtral"])
171
  # Hardcoding because of gating problem
172
  if "Qwen/Qwen1.5-32B" in model:
173
- print("HERE NSHJNKJSNJLAS")
174
  is_moe_from_model_card = False
175
  is_moe_from_name = "moe" in model.lower().replace("/", "-").replace("_", "-").split("-")
176
  if is_moe_from_model_card or is_moe_from_name or is_moe_from_metadata:
 
170
  is_moe_from_model_card = any(keyword in model_card.text.lower() for keyword in ["moe", "mixtral"])
171
  # Hardcoding because of gating problem
172
  if "Qwen/Qwen1.5-32B" in model:
 
173
  is_moe_from_model_card = False
174
  is_moe_from_name = "moe" in model.lower().replace("/", "-").replace("_", "-").split("-")
175
  if is_moe_from_model_card or is_moe_from_name or is_moe_from_metadata:
src/tools/plots.py CHANGED
@@ -16,8 +16,11 @@ def create_scores_df(raw_data: list[EvalResult]) -> pd.DataFrame:
16
  :param results_df: A DataFrame containing result information including metric scores and dates.
17
  :return: A new DataFrame containing the maximum scores until each date for every metric.
18
  """
 
 
19
  # Step 1: Ensure 'date' is in datetime format and sort the DataFrame by it
20
  results_df = pd.DataFrame(raw_data)
 
21
  # results_df["date"] = pd.to_datetime(results_df["date"], format="mixed", utc=True)
22
  results_df.sort_values(by="date", inplace=True)
23
 
@@ -34,7 +37,7 @@ def create_scores_df(raw_data: list[EvalResult]) -> pd.DataFrame:
34
  # We ignore models that are flagged/no longer on the hub/not finished
35
  to_ignore = (
36
  not row["still_on_hub"]
37
- or row["not_flagged"]
38
  or current_model in FLAGGED_MODELS
39
  or row["status"] != "FINISHED"
40
  )
@@ -68,7 +71,6 @@ def create_plot_df(scores_df: dict[str : pd.DataFrame]) -> pd.DataFrame:
68
  """
69
  # Initialize the list to store DataFrames
70
  dfs = []
71
-
72
  # Iterate over the cols and create a new DataFrame for each column
73
  for col in BENCHMARK_COLS + [AutoEvalColumn.average.name]:
74
  d = scores_df[col].reset_index(drop=True)
@@ -77,6 +79,9 @@ def create_plot_df(scores_df: dict[str : pd.DataFrame]) -> pd.DataFrame:
77
 
78
  # Concatenate all the created DataFrames
79
  concat_df = pd.concat(dfs, ignore_index=True)
 
 
 
80
 
81
  # Sort values by 'date'
82
  concat_df.sort_values(by="date", inplace=True)
 
16
  :param results_df: A DataFrame containing result information including metric scores and dates.
17
  :return: A new DataFrame containing the maximum scores until each date for every metric.
18
  """
19
+ print(raw_data[0])
20
+ print(raw_data[0].date)
21
  # Step 1: Ensure 'date' is in datetime format and sort the DataFrame by it
22
  results_df = pd.DataFrame(raw_data)
23
+ print(results_df.columns)
24
  # results_df["date"] = pd.to_datetime(results_df["date"], format="mixed", utc=True)
25
  results_df.sort_values(by="date", inplace=True)
26
 
 
37
  # We ignore models that are flagged/no longer on the hub/not finished
38
  to_ignore = (
39
  not row["still_on_hub"]
40
+ or not row["not_flagged"]
41
  or current_model in FLAGGED_MODELS
42
  or row["status"] != "FINISHED"
43
  )
 
71
  """
72
  # Initialize the list to store DataFrames
73
  dfs = []
 
74
  # Iterate over the cols and create a new DataFrame for each column
75
  for col in BENCHMARK_COLS + [AutoEvalColumn.average.name]:
76
  d = scores_df[col].reset_index(drop=True)
 
79
 
80
  # Concatenate all the created DataFrames
81
  concat_df = pd.concat(dfs, ignore_index=True)
82
+ # print("Columns in DataFrame:", concat_df.columns)
83
+ # if "date" not in concat_df.columns:
84
+ # raise ValueError("Date column missing from DataFrame. Cannot proceed with sorting.")
85
 
86
  # Sort values by 'date'
87
  concat_df.sort_values(by="date", inplace=True)