m-ric HF staff commited on
Commit
31aa24a
β€’
1 Parent(s): 79f07b1

Interactive sliders

Browse files
Files changed (2) hide show
  1. app.py +53 -58
  2. utils.py +45 -0
app.py CHANGED
@@ -18,7 +18,8 @@ from utils import (
18
  format_data,
19
  get_trendlines,
20
  find_crossover_point,
21
- sigmoid_transition
 
22
  )
23
 
24
  ###################
@@ -105,21 +106,14 @@ merged_dfs = {k: format_data(v) for k, v in merged_dfs.items()}
105
  # get constants
106
  min_elo_score, max_elo_score, _ = get_constants(merged_dfs)
107
  date_updated = elo_results["full"]["last_updated_datetime"].split(" ")[0]
108
- orgs = merged_dfs["Overall"].Organization.unique().tolist()
109
 
 
 
110
  ###################
111
  ### Build and Plot Data
112
  ###################
113
 
114
 
115
- df = merged_dfs["Overall"]
116
- top_orgs = df.groupby("Organization")["rating"].max().nlargest(11).index.tolist()
117
-
118
- df = df.loc[(df["Organization"].isin(top_orgs)) & (df["rating"] > 1000)]
119
- print(df)
120
-
121
- df = df.loc[~df["Release Date"].isna()]
122
-
123
  def get_data_split(dfs, set_name):
124
  df = dfs[set_name].copy(deep=True)
125
  return df.reset_index(drop=True)
@@ -272,45 +266,32 @@ def make_figure(df):
272
  speak_french = False
273
  if speak_french:
274
  fig.update_layout(
275
- xaxis_title="Date",
276
  title="La course au classement",
277
  yaxis_title="Score ELO",
278
  legend_title="Classement en Novembre 2024",
279
- xaxis_range=[pd.Timestamp("2024-01-01"), current_date], # Extend x-axis for labels
280
- yaxis_range=[1103, 1350],
281
  )
282
  else:
283
  fig.update_layout(
284
- xaxis_title="Date",
285
  yaxis_title="ELO score on Chatbot Arena",
286
  legend_title="Ranking as of November 2024",
287
  title="The race for the best LLM",
288
- hovermode="closest",
289
- xaxis_range=[pd.Timestamp("2024-01-01"), current_date], # Extend x-axis for labels
290
- yaxis_range=[1103, 1350],
291
  )
292
- # apply_template(fig)
 
 
 
 
 
 
293
 
294
  fig.update_xaxes(
295
  tickformat="%m-%Y",
296
  )
297
- print(fig)
298
  return fig, df
299
 
300
- def filter_df():
301
- return df
302
-
303
-
304
- set_dark_mode = """
305
- function refresh() {
306
- const url = new URL(window.location);
307
-
308
- if (url.searchParams.get('__theme') !== 'dark') {
309
- url.searchParams.set('__theme', 'dark');
310
- window.location.href = url.href;
311
- }
312
- }
313
- """
314
 
315
  with gr.Blocks(
316
  theme=gr.themes.Soft(
@@ -320,45 +301,49 @@ with gr.Blocks(
320
  text_size=gr.themes.sizes.text_sm,
321
  font=[
322
  gr.themes.GoogleFont("Open Sans"),
323
- "ui-sans-serif",
324
  "system-ui",
325
- "sans-serif",
326
  ],
327
  ),
328
- js=set_dark_mode,
329
  ) as demo:
330
- gr.Markdown(
331
- """
332
- <div style="text-align: center; max-width: 650px; margin: auto;">
333
- <h1 style="font-weight: 900; margin-top: 5px;">πŸš€ The race for the best LLM πŸš€</h1>
334
- <p style="text-align: left; margin-top: 30px; margin-bottom: 30px; line-height: 20px;">
335
- This app visualizes the progress of LLMs over time as scored by the <a href="https://leaderboard.lmsys.org/">LMSYS Chatbot Arena</a>.
336
- The app is adapted from <a href="https://huggingface.co/spaces/andrewrreed/closed-vs-open-arena-elo"> this app</a> by Andew Reed,
337
- and is intended to stay up-to-date as new models are released and evaluated.
338
- <div style="text-align: left;">
339
- <strong>Plot info:</strong>
340
- <br>
341
- <ul style="padding-left: 20px;">
342
- <li> The ELO score (y-axis) is a measure of the relative strength of a model based on its performance against other models in the arena. </li>
343
- <li> The Release Date (x-axis) corresponds to when the model was first publicly released or when its ELO results were first reported (for ease of automated updates). </li>
344
- <li> Trend lines are based on Ordinary Least Squares (OLS) regression and adjust based on the filter criteria. </li>
345
- <ul>
346
- </div>
347
- </p>
348
- </div>
349
- """
350
- )
351
  filtered_df = gr.State()
 
 
 
352
  with gr.Group():
353
  with gr.Tab("Plot"):
354
  plot = gr.Plot(show_label=False)
355
  with gr.Tab("Raw Data"):
356
  display_df = gr.DataFrame()
357
 
 
 
 
 
 
 
 
 
 
 
 
 
358
 
359
  demo.load(
360
  fn=filter_df,
361
- inputs=[],
 
 
 
 
 
 
 
 
 
 
362
  outputs=filtered_df,
363
  ).then(
364
  fn=make_figure,
@@ -366,4 +351,14 @@ with gr.Blocks(
366
  outputs=[plot, display_df],
367
  )
368
 
 
 
 
 
 
 
 
 
 
 
369
  demo.launch()
 
18
  format_data,
19
  get_trendlines,
20
  find_crossover_point,
21
+ sigmoid_transition,
22
+ apply_template,
23
  )
24
 
25
  ###################
 
106
  # get constants
107
  min_elo_score, max_elo_score, _ = get_constants(merged_dfs)
108
  date_updated = elo_results["full"]["last_updated_datetime"].split(" ")[0]
 
109
 
110
+ ratings_df = merged_dfs["Overall"]
111
+ ratings_df = ratings_df.loc[~ratings_df["Release Date"].isna()]
112
  ###################
113
  ### Build and Plot Data
114
  ###################
115
 
116
 
 
 
 
 
 
 
 
 
117
  def get_data_split(dfs, set_name):
118
  df = dfs[set_name].copy(deep=True)
119
  return df.reset_index(drop=True)
 
266
  speak_french = False
267
  if speak_french:
268
  fig.update_layout(
 
269
  title="La course au classement",
270
  yaxis_title="Score ELO",
271
  legend_title="Classement en Novembre 2024",
 
 
272
  )
273
  else:
274
  fig.update_layout(
 
275
  yaxis_title="ELO score on Chatbot Arena",
276
  legend_title="Ranking as of November 2024",
277
  title="The race for the best LLM",
 
 
 
278
  )
279
+ fig.update_layout(
280
+ xaxis_title="Date",
281
+ hovermode="closest",
282
+ xaxis_range=[pd.Timestamp("2024-01-01"), current_date], # Extend x-axis for labels
283
+ yaxis_range=[best_models_df["rating"].min() - 10, df["rating"].max() + 30],
284
+ )
285
+ apply_template(fig, annotation_text="Aymeric Roucher")
286
 
287
  fig.update_xaxes(
288
  tickformat="%m-%Y",
289
  )
 
290
  return fig, df
291
 
292
+ def filter_df(top_n_orgs=11, minimum_rating=1000):
293
+ top_orgs = ratings_df.groupby("Organization")["rating"].max().nlargest(top_n_orgs).index.tolist()
294
+ return ratings_df.loc[(ratings_df["Organization"].isin(top_orgs)) & (ratings_df["rating"] > minimum_rating)]
 
 
 
 
 
 
 
 
 
 
 
295
 
296
  with gr.Blocks(
297
  theme=gr.themes.Soft(
 
301
  text_size=gr.themes.sizes.text_sm,
302
  font=[
303
  gr.themes.GoogleFont("Open Sans"),
304
+ "ui-serif",
305
  "system-ui",
306
+ "serif",
307
  ],
308
  ),
 
309
  ) as demo:
310
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
  filtered_df = gr.State()
312
+ with gr.Row():
313
+ top_n_orgs = gr.Slider(minimum=1, maximum=30, value=10, label="View top N companies")
314
+ minimum_rating = gr.Slider(minimum=800, maximum=1300, value=1000, label="Restrict to ELO scores above N")
315
  with gr.Group():
316
  with gr.Tab("Plot"):
317
  plot = gr.Plot(show_label=False)
318
  with gr.Tab("Raw Data"):
319
  display_df = gr.DataFrame()
320
 
321
+ gr.Markdown(
322
+ """
323
+ This app visualizes the progress of LLMs over time as scored by the [LMSYS Chatbot Arena](https://leaderboard.lmsys.org/).
324
+ The app is adapted from [this app](https://huggingface.co/spaces/andrewrreed/closed-vs-open-arena-elo) by Andew Reed,
325
+ and is intended to stay up-to-date as new models are released and evaluated.
326
+
327
+ > ### Plot info
328
+ > The ELO score (y-axis) is a measure of the relative strength of a model based on its performance against other models in the arena.
329
+ > The Release Date (x-axis) corresponds to when the model was first publicly released or when its ELO results were first reported (for ease of automated updates).
330
+ > Trend lines are based on Ordinary Least Squares (OLS) regression and adjust based on the filter criteria.
331
+ """
332
+ )
333
 
334
  demo.load(
335
  fn=filter_df,
336
+ inputs=[top_n_orgs, minimum_rating],
337
+ outputs=filtered_df,
338
+ ).then(
339
+ fn=make_figure,
340
+ inputs=[filtered_df],
341
+ outputs=[plot, display_df],
342
+ )
343
+
344
+ minimum_rating.change(
345
+ fn=filter_df,
346
+ inputs=[top_n_orgs, minimum_rating],
347
  outputs=filtered_df,
348
  ).then(
349
  fn=make_figure,
 
351
  outputs=[plot, display_df],
352
  )
353
 
354
+ top_n_orgs.change(
355
+ fn=filter_df,
356
+ inputs=[top_n_orgs, minimum_rating],
357
+ outputs=filtered_df,
358
+ ).then(
359
+ fn=make_figure,
360
+ inputs=[filtered_df],
361
+ outputs=[plot, display_df],
362
+ )
363
+
364
  demo.launch()
utils.py CHANGED
@@ -233,3 +233,48 @@ def find_crossover_point(b1, m1, b2, m2):
233
  # Function to create sigmoid transition
234
  def sigmoid_transition(x, x0, k=0.1):
235
  return expit(k * (x - x0))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  # Function to create sigmoid transition
234
  def sigmoid_transition(x, x0, k=0.1):
235
  return expit(k * (x - x0))
236
+
237
+ def apply_template(
238
+ fig,
239
+ template="none",
240
+ annotation_text="",
241
+ title=None,
242
+ width=1200,
243
+ height=600,
244
+ ):
245
+ """Applies template in-place to input fig."""
246
+ layout_updates = {
247
+ "template": template,
248
+ "width": width,
249
+ "height": height,
250
+ "font": dict(family="Garamond", size=14),
251
+ "title_font_family": "Garamond",
252
+ "title_font_size": 24,
253
+ "title_xanchor": "center",
254
+ "legend": dict(
255
+ itemsizing="constant",
256
+ title_font_family="Garamond",
257
+ font=dict(family="Garamond", size=14),
258
+ itemwidth=30,
259
+ ),
260
+ }
261
+ if len(annotation_text) > 0:
262
+ layout_updates["annotations"] = [
263
+ dict(
264
+ text=f"<i>{annotation_text}</i>",
265
+ xref="paper",
266
+ yref="paper",
267
+ x=1.05,
268
+ y=-0.05,
269
+ xanchor="left",
270
+ yanchor="top",
271
+ showarrow=False,
272
+ font=dict(size=14),
273
+ )
274
+ ]
275
+ if title is not None:
276
+ layout_updates["title"] = title
277
+ fig.update_layout(layout_updates)
278
+ fig.update_xaxes(title_font_family="Garamond", tickfont_family="Garamond")
279
+ fig.update_yaxes(title_font_family="Garamond", tickfont_family="Garamond")
280
+ return