Alina Lozovskaia commited on
Commit
2e74c81
β€’
1 Parent(s): 122c7af

bugfix and populate refactoring

Browse files
Files changed (3) hide show
  1. app.py +7 -9
  2. src/envs.py +3 -0
  3. src/populate.py +14 -17
app.py CHANGED
@@ -87,18 +87,19 @@ def init_space(full_init: bool = True):
87
  download_dataset(DYNAMIC_INFO_REPO, DYNAMIC_INFO_PATH)
88
  download_dataset(RESULTS_REPO, EVAL_RESULTS_PATH)
89
 
90
- raw_data, leaderboard_df = get_leaderboard_df(
91
  results_path=EVAL_RESULTS_PATH,
92
  requests_path=EVAL_REQUESTS_PATH,
93
  dynamic_path=DYNAMIC_INFO_FILE_PATH,
94
  cols=COLS,
95
  benchmark_cols=BENCHMARK_COLS,
96
  )
97
- update_collections(leaderboard_df)
98
-
 
99
  eval_queue_dfs = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
100
 
101
- return leaderboard_df, raw_data, eval_queue_dfs
102
 
103
 
104
  # Convert the environment variable "LEADERBOARD_FULL_INIT" to a boolean value, defaulting to True if the variable is not set.
@@ -107,7 +108,7 @@ do_full_init = os.getenv("LEADERBOARD_FULL_INIT", "True") == "True"
107
 
108
  # Calls the init_space function with the `full_init` parameter determined by the `do_full_init` variable.
109
  # This initializes various DataFrames used throughout the application, with the level of initialization detail controlled by the `do_full_init` flag.
110
- leaderboard_df, raw_data, eval_queue_dfs = init_space(full_init=do_full_init)
111
  finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = eval_queue_dfs
112
 
113
 
@@ -335,8 +336,7 @@ with demo:
335
 
336
  # Dummy leaderboard for handling the case when the user uses backspace key
337
  hidden_leaderboard_table_for_search = gr.components.Dataframe(
338
- # value=original_df[COLS],
339
- value=leaderboard_df[COLS], # UPDATED
340
  headers=COLS,
341
  datatype=TYPES,
342
  visible=False,
@@ -398,7 +398,6 @@ with demo:
398
  with gr.TabItem("πŸ“ˆ Metrics through time", elem_id="llm-benchmark-tab-table", id=2):
399
  with gr.Row():
400
  with gr.Column():
401
- # UPDATED
402
  plot_df = load_and_create_plots()
403
  chart = create_metric_plot_obj(
404
  plot_df,
@@ -407,7 +406,6 @@ with demo:
407
  )
408
  gr.Plot(value=chart, min_width=500)
409
  with gr.Column():
410
- # UPDATED
411
  plot_df = load_and_create_plots()
412
  chart = create_metric_plot_obj(
413
  plot_df,
 
87
  download_dataset(DYNAMIC_INFO_REPO, DYNAMIC_INFO_PATH)
88
  download_dataset(RESULTS_REPO, EVAL_RESULTS_PATH)
89
 
90
+ raw_data, original_df = get_leaderboard_df(
91
  results_path=EVAL_RESULTS_PATH,
92
  requests_path=EVAL_REQUESTS_PATH,
93
  dynamic_path=DYNAMIC_INFO_FILE_PATH,
94
  cols=COLS,
95
  benchmark_cols=BENCHMARK_COLS,
96
  )
97
+ update_collections(original_df)
98
+ leaderboard_df = original_df.copy()
99
+
100
  eval_queue_dfs = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
101
 
102
+ return leaderboard_df, raw_data, original_df, eval_queue_dfs
103
 
104
 
105
  # Convert the environment variable "LEADERBOARD_FULL_INIT" to a boolean value, defaulting to True if the variable is not set.
 
108
 
109
  # Calls the init_space function with the `full_init` parameter determined by the `do_full_init` variable.
110
  # This initializes various DataFrames used throughout the application, with the level of initialization detail controlled by the `do_full_init` flag.
111
+ leaderboard_df, raw_data, original_df, eval_queue_dfs = init_space(full_init=do_full_init)
112
  finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = eval_queue_dfs
113
 
114
 
 
336
 
337
  # Dummy leaderboard for handling the case when the user uses backspace key
338
  hidden_leaderboard_table_for_search = gr.components.Dataframe(
339
+ value=original_df[COLS],
 
340
  headers=COLS,
341
  datatype=TYPES,
342
  visible=False,
 
398
  with gr.TabItem("πŸ“ˆ Metrics through time", elem_id="llm-benchmark-tab-table", id=2):
399
  with gr.Row():
400
  with gr.Column():
 
401
  plot_df = load_and_create_plots()
402
  chart = create_metric_plot_obj(
403
  plot_df,
 
406
  )
407
  gr.Plot(value=chart, min_width=500)
408
  with gr.Column():
 
409
  plot_df = load_and_create_plots()
410
  chart = create_metric_plot_obj(
411
  plot_df,
src/envs.py CHANGED
@@ -16,6 +16,9 @@ PRIVATE_RESULTS_REPO = "open-llm-leaderboard/private-results"
16
  IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", True))
17
 
18
  CACHE_PATH = os.getenv("HF_HOME", ".")
 
 
 
19
 
20
  EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
21
  EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
 
16
  IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", True))
17
 
18
  CACHE_PATH = os.getenv("HF_HOME", ".")
19
+ # Check if the CACHE_PATH is a directory and if we have write access, if not set to '.'
20
+ if not os.path.isdir(CACHE_PATH) or not os.access(CACHE_PATH, os.W_OK):
21
+ CACHE_PATH = "."
22
 
23
  EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
24
  EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
src/populate.py CHANGED
@@ -1,5 +1,6 @@
1
  import json
2
  import os
 
3
  import pandas as pd
4
  from src.display.formatting import has_no_nan_values, make_clickable_model
5
  from src.display.utils import AutoEvalColumn, EvalQueueColumn, baseline_row
@@ -26,25 +27,20 @@ def _process_model_data(entry, model_name_key="model", revision_key="revision"):
26
 
27
  def get_evaluation_queue_df(save_path, cols):
28
  """Generate dataframes for pending, running, and finished evaluation entries."""
 
29
  all_evals = []
30
- entries = os.listdir(save_path)
31
- for entry in entries:
32
- if entry.startswith(".") or entry.endswith(".md"):
 
 
 
 
33
  continue
34
- file_path = os.path.join(save_path, entry)
35
- if os.path.isfile(file_path): # Check if it's a file
36
- data = _load_json_data(file_path)
37
- if data:
38
- all_evals.append(_process_model_data(data))
39
- else:
40
- # Optionally handle directory contents if needed
41
- sub_entries = os.listdir(file_path)
42
- for sub_entry in sub_entries:
43
- sub_file_path = os.path.join(file_path, sub_entry)
44
- if os.path.isfile(sub_file_path):
45
- data = _load_json_data(sub_file_path)
46
- if data:
47
- all_evals.append(_process_model_data(data))
48
 
49
  # Organizing data by status
50
  status_map = {
@@ -72,3 +68,4 @@ def get_leaderboard_df(results_path, requests_path, dynamic_path, cols, benchmar
72
  df = df[cols].round(decimals=2)
73
  df = df[has_no_nan_values(df, benchmark_cols)]
74
  return raw_data, df
 
 
1
  import json
2
  import os
3
+ import pathlib
4
  import pandas as pd
5
  from src.display.formatting import has_no_nan_values, make_clickable_model
6
  from src.display.utils import AutoEvalColumn, EvalQueueColumn, baseline_row
 
27
 
28
  def get_evaluation_queue_df(save_path, cols):
29
  """Generate dataframes for pending, running, and finished evaluation entries."""
30
+ save_path = pathlib.Path(save_path)
31
  all_evals = []
32
+
33
+ for path in save_path.rglob('*'):
34
+ if path.is_dir():
35
+ continue
36
+ if path.name.startswith('.'):
37
+ continue
38
+ if path.name.endswith('.md'):
39
  continue
40
+
41
+ data = _load_json_data(path)
42
+ if data:
43
+ all_evals.append(_process_model_data(data))
 
 
 
 
 
 
 
 
 
 
44
 
45
  # Organizing data by status
46
  status_map = {
 
68
  df = df[cols].round(decimals=2)
69
  df = df[has_no_nan_values(df, benchmark_cols)]
70
  return raw_data, df
71
+