Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Interactive sliders
Browse files
app.py
CHANGED
@@ -18,7 +18,8 @@ from utils import (
|
|
18 |
format_data,
|
19 |
get_trendlines,
|
20 |
find_crossover_point,
|
21 |
-
sigmoid_transition
|
|
|
22 |
)
|
23 |
|
24 |
###################
|
@@ -105,21 +106,14 @@ merged_dfs = {k: format_data(v) for k, v in merged_dfs.items()}
|
|
105 |
# get constants
|
106 |
min_elo_score, max_elo_score, _ = get_constants(merged_dfs)
|
107 |
date_updated = elo_results["full"]["last_updated_datetime"].split(" ")[0]
|
108 |
-
orgs = merged_dfs["Overall"].Organization.unique().tolist()
|
109 |
|
|
|
|
|
110 |
###################
|
111 |
### Build and Plot Data
|
112 |
###################
|
113 |
|
114 |
|
115 |
-
df = merged_dfs["Overall"]
|
116 |
-
top_orgs = df.groupby("Organization")["rating"].max().nlargest(11).index.tolist()
|
117 |
-
|
118 |
-
df = df.loc[(df["Organization"].isin(top_orgs)) & (df["rating"] > 1000)]
|
119 |
-
print(df)
|
120 |
-
|
121 |
-
df = df.loc[~df["Release Date"].isna()]
|
122 |
-
|
123 |
def get_data_split(dfs, set_name):
|
124 |
df = dfs[set_name].copy(deep=True)
|
125 |
return df.reset_index(drop=True)
|
@@ -272,45 +266,32 @@ def make_figure(df):
|
|
272 |
speak_french = False
|
273 |
if speak_french:
|
274 |
fig.update_layout(
|
275 |
-
xaxis_title="Date",
|
276 |
title="La course au classement",
|
277 |
yaxis_title="Score ELO",
|
278 |
legend_title="Classement en Novembre 2024",
|
279 |
-
xaxis_range=[pd.Timestamp("2024-01-01"), current_date], # Extend x-axis for labels
|
280 |
-
yaxis_range=[1103, 1350],
|
281 |
)
|
282 |
else:
|
283 |
fig.update_layout(
|
284 |
-
xaxis_title="Date",
|
285 |
yaxis_title="ELO score on Chatbot Arena",
|
286 |
legend_title="Ranking as of November 2024",
|
287 |
title="The race for the best LLM",
|
288 |
-
hovermode="closest",
|
289 |
-
xaxis_range=[pd.Timestamp("2024-01-01"), current_date], # Extend x-axis for labels
|
290 |
-
yaxis_range=[1103, 1350],
|
291 |
)
|
292 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
293 |
|
294 |
fig.update_xaxes(
|
295 |
tickformat="%m-%Y",
|
296 |
)
|
297 |
-
print(fig)
|
298 |
return fig, df
|
299 |
|
300 |
-
def filter_df():
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
set_dark_mode = """
|
305 |
-
function refresh() {
|
306 |
-
const url = new URL(window.location);
|
307 |
-
|
308 |
-
if (url.searchParams.get('__theme') !== 'dark') {
|
309 |
-
url.searchParams.set('__theme', 'dark');
|
310 |
-
window.location.href = url.href;
|
311 |
-
}
|
312 |
-
}
|
313 |
-
"""
|
314 |
|
315 |
with gr.Blocks(
|
316 |
theme=gr.themes.Soft(
|
@@ -320,45 +301,49 @@ with gr.Blocks(
|
|
320 |
text_size=gr.themes.sizes.text_sm,
|
321 |
font=[
|
322 |
gr.themes.GoogleFont("Open Sans"),
|
323 |
-
"ui-
|
324 |
"system-ui",
|
325 |
-
"
|
326 |
],
|
327 |
),
|
328 |
-
js=set_dark_mode,
|
329 |
) as demo:
|
330 |
-
|
331 |
-
"""
|
332 |
-
<div style="text-align: center; max-width: 650px; margin: auto;">
|
333 |
-
<h1 style="font-weight: 900; margin-top: 5px;">π The race for the best LLM π</h1>
|
334 |
-
<p style="text-align: left; margin-top: 30px; margin-bottom: 30px; line-height: 20px;">
|
335 |
-
This app visualizes the progress of LLMs over time as scored by the <a href="https://leaderboard.lmsys.org/">LMSYS Chatbot Arena</a>.
|
336 |
-
The app is adapted from <a href="https://huggingface.co/spaces/andrewrreed/closed-vs-open-arena-elo"> this app</a> by Andew Reed,
|
337 |
-
and is intended to stay up-to-date as new models are released and evaluated.
|
338 |
-
<div style="text-align: left;">
|
339 |
-
<strong>Plot info:</strong>
|
340 |
-
<br>
|
341 |
-
<ul style="padding-left: 20px;">
|
342 |
-
<li> The ELO score (y-axis) is a measure of the relative strength of a model based on its performance against other models in the arena. </li>
|
343 |
-
<li> The Release Date (x-axis) corresponds to when the model was first publicly released or when its ELO results were first reported (for ease of automated updates). </li>
|
344 |
-
<li> Trend lines are based on Ordinary Least Squares (OLS) regression and adjust based on the filter criteria. </li>
|
345 |
-
<ul>
|
346 |
-
</div>
|
347 |
-
</p>
|
348 |
-
</div>
|
349 |
-
"""
|
350 |
-
)
|
351 |
filtered_df = gr.State()
|
|
|
|
|
|
|
352 |
with gr.Group():
|
353 |
with gr.Tab("Plot"):
|
354 |
plot = gr.Plot(show_label=False)
|
355 |
with gr.Tab("Raw Data"):
|
356 |
display_df = gr.DataFrame()
|
357 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
358 |
|
359 |
demo.load(
|
360 |
fn=filter_df,
|
361 |
-
inputs=[],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
362 |
outputs=filtered_df,
|
363 |
).then(
|
364 |
fn=make_figure,
|
@@ -366,4 +351,14 @@ with gr.Blocks(
|
|
366 |
outputs=[plot, display_df],
|
367 |
)
|
368 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
369 |
demo.launch()
|
|
|
18 |
format_data,
|
19 |
get_trendlines,
|
20 |
find_crossover_point,
|
21 |
+
sigmoid_transition,
|
22 |
+
apply_template,
|
23 |
)
|
24 |
|
25 |
###################
|
|
|
106 |
# get constants
|
107 |
min_elo_score, max_elo_score, _ = get_constants(merged_dfs)
|
108 |
date_updated = elo_results["full"]["last_updated_datetime"].split(" ")[0]
|
|
|
109 |
|
110 |
+
ratings_df = merged_dfs["Overall"]
|
111 |
+
ratings_df = ratings_df.loc[~ratings_df["Release Date"].isna()]
|
112 |
###################
|
113 |
### Build and Plot Data
|
114 |
###################
|
115 |
|
116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
def get_data_split(dfs, set_name):
|
118 |
df = dfs[set_name].copy(deep=True)
|
119 |
return df.reset_index(drop=True)
|
|
|
266 |
speak_french = False
|
267 |
if speak_french:
|
268 |
fig.update_layout(
|
|
|
269 |
title="La course au classement",
|
270 |
yaxis_title="Score ELO",
|
271 |
legend_title="Classement en Novembre 2024",
|
|
|
|
|
272 |
)
|
273 |
else:
|
274 |
fig.update_layout(
|
|
|
275 |
yaxis_title="ELO score on Chatbot Arena",
|
276 |
legend_title="Ranking as of November 2024",
|
277 |
title="The race for the best LLM",
|
|
|
|
|
|
|
278 |
)
|
279 |
+
fig.update_layout(
|
280 |
+
xaxis_title="Date",
|
281 |
+
hovermode="closest",
|
282 |
+
xaxis_range=[pd.Timestamp("2024-01-01"), current_date], # Extend x-axis for labels
|
283 |
+
yaxis_range=[best_models_df["rating"].min() - 10, df["rating"].max() + 30],
|
284 |
+
)
|
285 |
+
apply_template(fig, annotation_text="Aymeric Roucher")
|
286 |
|
287 |
fig.update_xaxes(
|
288 |
tickformat="%m-%Y",
|
289 |
)
|
|
|
290 |
return fig, df
|
291 |
|
292 |
+
def filter_df(top_n_orgs=11, minimum_rating=1000):
|
293 |
+
top_orgs = ratings_df.groupby("Organization")["rating"].max().nlargest(top_n_orgs).index.tolist()
|
294 |
+
return ratings_df.loc[(ratings_df["Organization"].isin(top_orgs)) & (ratings_df["rating"] > minimum_rating)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
295 |
|
296 |
with gr.Blocks(
|
297 |
theme=gr.themes.Soft(
|
|
|
301 |
text_size=gr.themes.sizes.text_sm,
|
302 |
font=[
|
303 |
gr.themes.GoogleFont("Open Sans"),
|
304 |
+
"ui-serif",
|
305 |
"system-ui",
|
306 |
+
"serif",
|
307 |
],
|
308 |
),
|
|
|
309 |
) as demo:
|
310 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
311 |
filtered_df = gr.State()
|
312 |
+
with gr.Row():
|
313 |
+
top_n_orgs = gr.Slider(minimum=1, maximum=30, value=10, label="View top N companies")
|
314 |
+
minimum_rating = gr.Slider(minimum=800, maximum=1300, value=1000, label="Restrict to ELO scores above N")
|
315 |
with gr.Group():
|
316 |
with gr.Tab("Plot"):
|
317 |
plot = gr.Plot(show_label=False)
|
318 |
with gr.Tab("Raw Data"):
|
319 |
display_df = gr.DataFrame()
|
320 |
|
321 |
+
gr.Markdown(
|
322 |
+
"""
|
323 |
+
This app visualizes the progress of LLMs over time as scored by the [LMSYS Chatbot Arena](https://leaderboard.lmsys.org/).
|
324 |
+
The app is adapted from [this app](https://huggingface.co/spaces/andrewrreed/closed-vs-open-arena-elo) by Andew Reed,
|
325 |
+
and is intended to stay up-to-date as new models are released and evaluated.
|
326 |
+
|
327 |
+
> ### Plot info
|
328 |
+
> The ELO score (y-axis) is a measure of the relative strength of a model based on its performance against other models in the arena.
|
329 |
+
> The Release Date (x-axis) corresponds to when the model was first publicly released or when its ELO results were first reported (for ease of automated updates).
|
330 |
+
> Trend lines are based on Ordinary Least Squares (OLS) regression and adjust based on the filter criteria.
|
331 |
+
"""
|
332 |
+
)
|
333 |
|
334 |
demo.load(
|
335 |
fn=filter_df,
|
336 |
+
inputs=[top_n_orgs, minimum_rating],
|
337 |
+
outputs=filtered_df,
|
338 |
+
).then(
|
339 |
+
fn=make_figure,
|
340 |
+
inputs=[filtered_df],
|
341 |
+
outputs=[plot, display_df],
|
342 |
+
)
|
343 |
+
|
344 |
+
minimum_rating.change(
|
345 |
+
fn=filter_df,
|
346 |
+
inputs=[top_n_orgs, minimum_rating],
|
347 |
outputs=filtered_df,
|
348 |
).then(
|
349 |
fn=make_figure,
|
|
|
351 |
outputs=[plot, display_df],
|
352 |
)
|
353 |
|
354 |
+
top_n_orgs.change(
|
355 |
+
fn=filter_df,
|
356 |
+
inputs=[top_n_orgs, minimum_rating],
|
357 |
+
outputs=filtered_df,
|
358 |
+
).then(
|
359 |
+
fn=make_figure,
|
360 |
+
inputs=[filtered_df],
|
361 |
+
outputs=[plot, display_df],
|
362 |
+
)
|
363 |
+
|
364 |
demo.launch()
|
utils.py
CHANGED
@@ -233,3 +233,48 @@ def find_crossover_point(b1, m1, b2, m2):
|
|
233 |
# Function to create sigmoid transition
|
234 |
def sigmoid_transition(x, x0, k=0.1):
|
235 |
return expit(k * (x - x0))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
233 |
# Function to create sigmoid transition
|
234 |
def sigmoid_transition(x, x0, k=0.1):
|
235 |
return expit(k * (x - x0))
|
236 |
+
|
237 |
+
def apply_template(
|
238 |
+
fig,
|
239 |
+
template="none",
|
240 |
+
annotation_text="",
|
241 |
+
title=None,
|
242 |
+
width=1200,
|
243 |
+
height=600,
|
244 |
+
):
|
245 |
+
"""Applies template in-place to input fig."""
|
246 |
+
layout_updates = {
|
247 |
+
"template": template,
|
248 |
+
"width": width,
|
249 |
+
"height": height,
|
250 |
+
"font": dict(family="Garamond", size=14),
|
251 |
+
"title_font_family": "Garamond",
|
252 |
+
"title_font_size": 24,
|
253 |
+
"title_xanchor": "center",
|
254 |
+
"legend": dict(
|
255 |
+
itemsizing="constant",
|
256 |
+
title_font_family="Garamond",
|
257 |
+
font=dict(family="Garamond", size=14),
|
258 |
+
itemwidth=30,
|
259 |
+
),
|
260 |
+
}
|
261 |
+
if len(annotation_text) > 0:
|
262 |
+
layout_updates["annotations"] = [
|
263 |
+
dict(
|
264 |
+
text=f"<i>{annotation_text}</i>",
|
265 |
+
xref="paper",
|
266 |
+
yref="paper",
|
267 |
+
x=1.05,
|
268 |
+
y=-0.05,
|
269 |
+
xanchor="left",
|
270 |
+
yanchor="top",
|
271 |
+
showarrow=False,
|
272 |
+
font=dict(size=14),
|
273 |
+
)
|
274 |
+
]
|
275 |
+
if title is not None:
|
276 |
+
layout_updates["title"] = title
|
277 |
+
fig.update_layout(layout_updates)
|
278 |
+
fig.update_xaxes(title_font_family="Garamond", tickfont_family="Garamond")
|
279 |
+
fig.update_yaxes(title_font_family="Garamond", tickfont_family="Garamond")
|
280 |
+
return
|