Alignment-Lab-AI commited on
Commit
8efb597
β€’
1 Parent(s): d741f98

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -9
app.py CHANGED
@@ -6,6 +6,7 @@ import gradio as gr
6
  import datasets
7
  from huggingface_hub import snapshot_download
8
  from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
 
9
 
10
  from src.display.about import (
11
  CITATION_BUTTON_LABEL,
@@ -31,12 +32,11 @@ from src.envs import (
31
  HF_HOME,
32
  )
33
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
34
- from src.tools.plots import create_metric_plot_obj, create_plot_df, create_scores_df
35
 
36
  # Configure logging
37
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
38
 
39
-
40
  # Convert the environment variable "LEADERBOARD_FULL_INIT" to a boolean value, defaulting to True if the variable is not set.
41
  # This controls whether a full initialization should be performed.
42
  DO_FULL_INIT = os.getenv("LEADERBOARD_FULL_INIT", "True") == "True"
@@ -53,7 +53,6 @@ def time_diff_wrapper(func):
53
 
54
  return wrapper
55
 
56
-
57
  @time_diff_wrapper
58
  def download_dataset(repo_id, local_dir, repo_type="dataset", max_attempts=3, backoff_factor=1.5):
59
  """Download dataset with exponential backoff retries."""
@@ -119,18 +118,85 @@ def init_space():
119
 
120
  return leaderboard_df, eval_queue_dfs
121
 
122
-
123
- # Calls the init_space function with the `full_init` parameter determined by the `do_full_init` variable.
124
- # This initializes various DataFrames used throughout the application, with the level of initialization detail controlled by the `do_full_init` flag.
125
  leaderboard_df, eval_queue_dfs = init_space()
126
  finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = eval_queue_dfs
127
 
128
-
129
  # Data processing for plots now only on demand in the respective Gradio tab
130
  def load_and_create_plots():
131
  plot_df = create_plot_df(create_scores_df(leaderboard_df))
132
  return plot_df
133
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  def init_leaderboard(dataframe):
135
  return Leaderboard(
136
  value = dataframe,
@@ -210,6 +276,5 @@ with demo:
210
  )
211
 
212
  demo.load(fn=get_latest_data_leaderboard, inputs=[leaderboard], outputs=[leaderboard])
213
-
214
 
215
- demo.queue(default_concurrency_limit=40).launch()
 
6
  import datasets
7
  from huggingface_hub import snapshot_download
8
  from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
9
+ import plotly.graph_objects as go
10
 
11
  from src.display.about import (
12
  CITATION_BUTTON_LABEL,
 
32
  HF_HOME,
33
  )
34
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
35
+ from src.tools.plots import create_plot_df, create_scores_df
36
 
37
  # Configure logging
38
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
39
 
 
40
  # Convert the environment variable "LEADERBOARD_FULL_INIT" to a boolean value, defaulting to True if the variable is not set.
41
  # This controls whether a full initialization should be performed.
42
  DO_FULL_INIT = os.getenv("LEADERBOARD_FULL_INIT", "True") == "True"
 
53
 
54
  return wrapper
55
 
 
56
  @time_diff_wrapper
57
  def download_dataset(repo_id, local_dir, repo_type="dataset", max_attempts=3, backoff_factor=1.5):
58
  """Download dataset with exponential backoff retries."""
 
118
 
119
  return leaderboard_df, eval_queue_dfs
120
 
121
+ # Initialize the space
 
 
122
  leaderboard_df, eval_queue_dfs = init_space()
123
  finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = eval_queue_dfs
124
 
 
125
  # Data processing for plots now only on demand in the respective Gradio tab
126
  def load_and_create_plots():
127
  plot_df = create_plot_df(create_scores_df(leaderboard_df))
128
  return plot_df
129
 
130
+ def create_metric_plot_obj(df, metrics, title="Metrics Over Time"):
131
+ """Create plot with Open-Orca models highlighted in purple"""
132
+ fig = go.Figure()
133
+
134
+ # Add traces for each metric
135
+ for metric in metrics:
136
+ # Get the model names for this metric
137
+ model_names = df[f"{metric}_model"].tolist()
138
+
139
+ # Create masks for Open-Orca and non-Open-Orca models
140
+ is_open_orca = ["Open-Orca" in str(model) for model in model_names]
141
+
142
+ # Add trace for non-Open-Orca models
143
+ fig.add_trace(
144
+ go.Scatter(
145
+ x=df[df.index[~is_open_orca]],
146
+ y=df[metric][~is_open_orca],
147
+ name=metric,
148
+ mode='lines+markers',
149
+ line=dict(width=2),
150
+ marker=dict(size=8),
151
+ hovertemplate=(
152
+ "Date: %{x}<br>"
153
+ "Score: %{y:.2f}<br>"
154
+ "Model: %{text}<br>"
155
+ ),
156
+ text=[model_names[i] for i, flag in enumerate(is_open_orca) if not flag]
157
+ )
158
+ )
159
+
160
+ # Add trace for Open-Orca models with purple color and larger markers
161
+ if any(is_open_orca):
162
+ fig.add_trace(
163
+ go.Scatter(
164
+ x=df[df.index[is_open_orca]],
165
+ y=df[metric][is_open_orca],
166
+ name=f"{metric} (Open-Orca)",
167
+ mode='lines+markers',
168
+ line=dict(color='purple', width=3),
169
+ marker=dict(
170
+ color='purple',
171
+ size=12,
172
+ symbol='star'
173
+ ),
174
+ hovertemplate=(
175
+ "Date: %{x}<br>"
176
+ "Score: %{y:.2f}<br>"
177
+ "Model: %{text}<br>"
178
+ ),
179
+ text=[model_names[i] for i, flag in enumerate(is_open_orca) if flag]
180
+ )
181
+ )
182
+
183
+ # Update layout
184
+ fig.update_layout(
185
+ title=title,
186
+ xaxis_title="Date",
187
+ yaxis_title="Score",
188
+ hovermode='x unified',
189
+ showlegend=True,
190
+ legend=dict(
191
+ yanchor="top",
192
+ y=0.99,
193
+ xanchor="left",
194
+ x=0.01
195
+ )
196
+ )
197
+
198
+ return fig
199
+
200
  def init_leaderboard(dataframe):
201
  return Leaderboard(
202
  value = dataframe,
 
276
  )
277
 
278
  demo.load(fn=get_latest_data_leaderboard, inputs=[leaderboard], outputs=[leaderboard])
 
279
 
280
+ demo.queue(default_concurrency_limit=40).launch()