Spaces:
Running
Running
Commit
·
fb1f20c
1
Parent(s):
486ddb5
update UI
Browse files- app.py +262 -156
- data/2024-10/7b.xlsx +0 -0
- requirements.txt +1 -1
app.py
CHANGED
@@ -13,6 +13,7 @@ from huggingface_hub import HfApi
|
|
13 |
from huggingface_hub.hf_api import HTTPError
|
14 |
from huggingface_hub.utils._errors import GatedRepoError
|
15 |
from gradio_rangeslider import RangeSlider
|
|
|
16 |
|
17 |
|
18 |
load_dotenv()
|
@@ -139,7 +140,7 @@ def get_unique_column_names(all_data):
|
|
139 |
"arxiv_\u200bphysics",
|
140 |
"github_\u200bcpp",
|
141 |
"github_\u200bpython",
|
142 |
-
"ao3_\u200bchinese",
|
143 |
]
|
144 |
|
145 |
|
@@ -149,74 +150,98 @@ def color_cell(value):
|
|
149 |
|
150 |
def update_table(
|
151 |
period: str,
|
152 |
-
|
153 |
metric: str,
|
154 |
visible_columns: list,
|
155 |
color_columns: list,
|
156 |
size_range: list,
|
157 |
-
sort_by: str = "Average (
|
158 |
ascending: bool = True,
|
159 |
):
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
|
221 |
|
222 |
def create_world_languages_gdp_chart():
|
@@ -292,95 +317,172 @@ def submit_model(name):
|
|
292 |
return "ERROR: Unexpected error. Please try again later."
|
293 |
|
294 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
295 |
def create_scaling_plot(all_data, period):
|
296 |
selected_columns = ["Name", "Parameters Count (B)", "Average (The lower the better)"]
|
297 |
target_data = all_data[period]
|
298 |
new_df = pd.DataFrame()
|
299 |
|
300 |
for size in target_data.keys():
|
301 |
-
new_df = pd.concat([new_df, target_data[size]["cr"].loc[:, selected_columns]], axis=0)
|
302 |
-
|
303 |
-
new_df
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
)
|
316 |
-
|
317 |
-
fig.
|
318 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
319 |
)
|
320 |
|
321 |
-
|
322 |
-
"
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
"
|
328 |
-
|
329 |
-
|
330 |
-
"
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
y = connection_points["Log Compression Rate (%)"].values
|
346 |
-
model = LinearRegression().fit(X, y)
|
347 |
-
|
348 |
-
x_min = connection_points["Log Params(B)"].min()
|
349 |
-
x_max = connection_points["Log Params(B)"].max()
|
350 |
-
extended_x = np.linspace(x_min, x_max * 1.5, 100)
|
351 |
-
extended_x_original = np.exp(extended_x)
|
352 |
-
trend_line_y = model.predict(extended_x.reshape(-1, 1))
|
353 |
-
trend_line_y_original = np.exp(trend_line_y)
|
354 |
-
|
355 |
-
trend_line = go.Scatter(
|
356 |
-
x=extended_x,
|
357 |
-
y=trend_line_y,
|
358 |
-
mode="lines",
|
359 |
-
line=dict(color="skyblue", dash="dash"),
|
360 |
-
name="Trend Line",
|
361 |
-
hovertemplate="<b>Params(B):</b> %{customdata[0]:.2f}<br>" + "<b>Compression Rate (%):</b> %{customdata[1]:.2f}<extra></extra>",
|
362 |
-
customdata=np.stack((extended_x_original, trend_line_y_original), axis=-1),
|
363 |
)
|
364 |
|
365 |
-
fig.add_trace(trend_line)
|
366 |
-
|
367 |
-
x_min = new_df["Params(B)"].min()
|
368 |
-
x_max = new_df["Params(B)"].max()
|
369 |
-
x_tick_vals = np.geomspace(x_min, x_max, num=5)
|
370 |
-
x_tick_text = [f"{val:.1f}" for val in x_tick_vals]
|
371 |
-
|
372 |
-
y_min = new_df["Compression Rate (%)"].min()
|
373 |
-
y_max = new_df["Compression Rate (%)"].max()
|
374 |
-
y_tick_vals = np.geomspace(y_min, y_max, num=5)
|
375 |
-
y_tick_text = [f"{val:.1f}" for val in y_tick_vals]
|
376 |
-
|
377 |
-
fig.update_xaxes(tickvals=np.log(x_tick_vals), ticktext=x_tick_text, title="Params(B)")
|
378 |
-
fig.update_yaxes(tickvals=np.log(y_tick_vals), ticktext=y_tick_text, title="Compression Rate (%)", autorange="reversed")
|
379 |
-
|
380 |
-
fig.update_layout(xaxis=dict(showgrid=True, zeroline=False), yaxis=dict(showgrid=True, zeroline=False))
|
381 |
-
|
382 |
-
fig.update_traces(marker=dict(size=12))
|
383 |
-
|
384 |
return fig
|
385 |
|
386 |
|
@@ -423,19 +525,17 @@ time_list.sort()
|
|
423 |
last_period = time_list[-1]
|
424 |
|
425 |
initial_fig = create_scaling_plot(all_data, last_period)
|
426 |
-
initial_period = last_period
|
427 |
-
initial_models = model_size_list
|
428 |
initial_metric = metric_list[0]
|
429 |
initial_columns = get_unique_column_names(all_data)
|
430 |
-
initial_columns = initial_columns[:-1]
|
431 |
-
# initial_colors = ["Average"]
|
432 |
initial_colors = ["Average", "Individual Tests"]
|
433 |
initial_size_range = [0, 15]
|
434 |
-
initial_data = update_table(
|
435 |
|
436 |
css = """
|
437 |
.gradio-container {
|
438 |
max-width: 95% !important;
|
|
|
439 |
}
|
440 |
.tab-buttons button {
|
441 |
font-size: 1.3em;
|
@@ -444,7 +544,11 @@ css = """
|
|
444 |
white-space: normal;
|
445 |
word-break: break-word;
|
446 |
}
|
447 |
-
|
|
|
|
|
|
|
|
|
448 |
"""
|
449 |
|
450 |
TITLE_HTML = '<h1 style="text-align:center"><span style="font-size:1.3em">🏆 LLM Compression Leaderboard</span></h1>'
|
@@ -460,17 +564,18 @@ with gr.Blocks(css=css) as demo:
|
|
460 |
period_selector = gr.Dropdown(label="Period", choices=time_list, value=last_period)
|
461 |
model_selector = gr.CheckboxGroup(label="Model Size", choices=model_size_list, value=model_size_list)
|
462 |
size_range_slider = RangeSlider(minimum=0, maximum=15, value=[0, 15], step=0.1, label="Model Size Range")
|
463 |
-
metric_selector = gr.Dropdown(label="Metric", choices=metric_list, value=
|
464 |
with gr.Column():
|
465 |
color_selector = gr.CheckboxGroup(label="Colored Columns", choices=["Average", "Individual Tests"], value=initial_colors)
|
466 |
colfilter = gr.CheckboxGroup(label="Data Source", choices=get_unique_column_names(all_data), value=initial_columns)
|
467 |
|
468 |
-
table = gr.Dataframe(
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
)
|
|
|
474 |
|
475 |
period_selector.change(
|
476 |
update_table, inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider], outputs=table
|
@@ -494,6 +599,7 @@ with gr.Blocks(css=css) as demo:
|
|
494 |
with gr.Tab("🌍 MultiLang"):
|
495 |
gr.Markdown("## Coming soon...")
|
496 |
world_languages_plot = gr.Plot(create_world_languages_gdp_chart())
|
|
|
497 |
with gr.Tab("📈 Scaling Law"):
|
498 |
print(time_list)
|
499 |
period_selector_2 = gr.Dropdown(label="Period", choices=time_list, value=last_period)
|
@@ -507,13 +613,13 @@ with gr.Blocks(css=css) as demo:
|
|
507 |
|
508 |
with gr.Tab("ℹ️ About"):
|
509 |
gr.Markdown(about_md)
|
|
|
510 |
with gr.Tab("🚀 Submit"):
|
511 |
with gr.Group():
|
512 |
with gr.Row():
|
513 |
model_name = gr.Textbox(max_lines=1, placeholder="Enter model name...", show_label=False, scale=4)
|
514 |
submit = gr.Button("Submit", variant="primary", scale=0)
|
515 |
output = gr.Markdown("# Enter a public HF repo id, then hit Submit to add it to the evaluation queue.")
|
516 |
-
|
517 |
submit.click(fn=submit_model, inputs=model_name, outputs=output)
|
518 |
|
519 |
demo.launch(share=False)
|
|
|
13 |
from huggingface_hub.hf_api import HTTPError
|
14 |
from huggingface_hub.utils._errors import GatedRepoError
|
15 |
from gradio_rangeslider import RangeSlider
|
16 |
+
import datetime
|
17 |
|
18 |
|
19 |
load_dotenv()
|
|
|
140 |
"arxiv_\u200bphysics",
|
141 |
"github_\u200bcpp",
|
142 |
"github_\u200bpython",
|
143 |
+
# "ao3_\u200bchinese",
|
144 |
]
|
145 |
|
146 |
|
|
|
150 |
|
151 |
def update_table(
|
152 |
period: str,
|
153 |
+
models_size: list,
|
154 |
metric: str,
|
155 |
visible_columns: list,
|
156 |
color_columns: list,
|
157 |
size_range: list,
|
158 |
+
sort_by: str = "Average (lower=better)",
|
159 |
ascending: bool = True,
|
160 |
):
|
161 |
+
print(
|
162 |
+
f"Updating - time: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}, period: {period}, models: {models_size}, metric: {metric}, visible_columns: {visible_columns}, color_columns: {color_columns}, size_range: {size_range}, sort_by: {sort_by}, ascending: {ascending}\n"
|
163 |
+
)
|
164 |
+
|
165 |
+
if not models_size:
|
166 |
+
return "No data available for the selected models and period."
|
167 |
+
# return pd.DataFrame()
|
168 |
+
|
169 |
+
target_period_data = all_data[period]
|
170 |
+
target_file_name = [model_size_to_file_name[model] for model in models_size]
|
171 |
+
sheet_name = metric_to_sheet[metric]
|
172 |
+
|
173 |
+
# combined_data = pd.concat([target_period_data[file_name][sheet_name] for file_name in target_file_name], axis=0)
|
174 |
+
combined_data = pd.concat(
|
175 |
+
[df.dropna(axis=1, how="all") for df in [target_period_data[file_name][sheet_name] for file_name in target_file_name]], axis=0
|
176 |
+
)
|
177 |
+
if len(combined_data) == 0:
|
178 |
+
return "No data available for the selected models and period."
|
179 |
+
# return pd.DataFrame()
|
180 |
+
|
181 |
+
# Filter models based on the size range
|
182 |
+
combined_data = combined_data[combined_data["Parameters Count (B)"].between(size_range[0], size_range[1])]
|
183 |
+
combined_data.reset_index(drop=True, inplace=True)
|
184 |
+
if len(combined_data) == 0:
|
185 |
+
return "No data available for the selected models and period."
|
186 |
+
# return pd.DataFrame()
|
187 |
+
|
188 |
+
combined_data["Name"] = combined_data["Name"].apply(lambda x: x.replace(".pth", ""))
|
189 |
+
|
190 |
+
relevant_columns = [col for col in visible_columns if col not in ["Name", "Parameters Count (B)", "Average (The lower the better)"]]
|
191 |
+
if len(combined_data) > 0:
|
192 |
+
combined_data["Average (The lower the better)"] = round(combined_data[relevant_columns].mean(axis=1), 3)
|
193 |
+
combined_data = combined_data.rename(columns={"Parameters Count (B)": "Params (B)"})
|
194 |
+
combined_data = combined_data.rename(columns={"Average (The lower the better)": "Average (lower=better)"})
|
195 |
+
sorted_data = combined_data.sort_values(by=sort_by, ascending=ascending)
|
196 |
+
visible_columns = ["Name", "Params (B)", "Average (lower=better)"] + visible_columns
|
197 |
+
filtered_data = sorted_data[visible_columns]
|
198 |
+
filtered_data.columns = [col.replace("_", " ") for col in filtered_data.columns]
|
199 |
+
|
200 |
+
formatter = {col: "{:.3f}" for col in filtered_data.columns if filtered_data[col].dtype in ["float64", "float32"]}
|
201 |
+
|
202 |
+
# color gradient
|
203 |
+
colors = ["#63be7b", "#ffffff", "#f8696b"]
|
204 |
+
cmap = LinearSegmentedColormap.from_list("custom_cmap", colors)
|
205 |
+
vmin = {}
|
206 |
+
vmax = {}
|
207 |
+
for column in filtered_data.columns:
|
208 |
+
if column in ["Name", "Params (B)"]:
|
209 |
+
continue
|
210 |
+
col_values = filtered_data[column]
|
211 |
+
if len(col_values) > 1:
|
212 |
+
vmin[column] = col_values.min()
|
213 |
+
vmax[column] = col_values.max()
|
214 |
+
|
215 |
+
target_color_columns = []
|
216 |
+
if "Average" in color_columns:
|
217 |
+
target_color_columns.append("Average (lower=better)")
|
218 |
+
if "Individual Tests" in color_columns:
|
219 |
+
target_color_columns.extend([col for col in filtered_data.columns if col not in ["Name", "Params (B)", "Average (lower=better)"]])
|
220 |
+
|
221 |
+
styler = filtered_data.style.format(formatter).map(color_cell, subset=["Params (B)"])
|
222 |
+
|
223 |
+
for column in target_color_columns:
|
224 |
+
styler = styler.background_gradient(cmap=cmap, subset=[column], vmin=vmin[column], vmax=vmax[column])
|
225 |
+
|
226 |
+
# return styler
|
227 |
+
|
228 |
+
styler = styler.hide(axis="index")
|
229 |
+
|
230 |
+
widths = [300, 150, 150, 100, 100, 100, 100, 100, 100, 100, 100]
|
231 |
+
table_styles = []
|
232 |
+
|
233 |
+
for i, w in enumerate(widths):
|
234 |
+
table_styles.append(
|
235 |
+
{"selector": f"th.col{i}", "props": [("min-width", f"{w}px"), ("max-width", f"{w}px"), ("text-align", "center")]} # 添加表头文字居中
|
236 |
+
)
|
237 |
+
table_styles.append(
|
238 |
+
{"selector": f"td.col{i}", "props": [("min-width", f"{w}px"), ("max-width", f"{w}px"), ("text-align", "center")]} # 添加单元格文字居中
|
239 |
+
)
|
240 |
+
|
241 |
+
styler = styler.set_table_styles(table_styles)
|
242 |
+
|
243 |
+
html_output = styler.to_html()
|
244 |
+
return html_output
|
245 |
|
246 |
|
247 |
def create_world_languages_gdp_chart():
|
|
|
317 |
return "ERROR: Unexpected error. Please try again later."
|
318 |
|
319 |
|
320 |
+
# def create_scaling_plot(all_data, period):
|
321 |
+
# selected_columns = ["Name", "Parameters Count (B)", "Average (The lower the better)"]
|
322 |
+
# target_data = all_data[period]
|
323 |
+
# new_df = pd.DataFrame()
|
324 |
+
|
325 |
+
# for size in target_data.keys():
|
326 |
+
# new_df = pd.concat([new_df, target_data[size]["cr"].loc[:, selected_columns].dropna(axis=1, how="all")], axis=0)
|
327 |
+
|
328 |
+
# new_df.rename(columns={"Parameters Count (B)": "Params(B)", "Average (The lower the better)": "Compression Rate (%)"}, inplace=True)
|
329 |
+
|
330 |
+
# new_df["Log Params(B)"] = np.log(new_df["Params(B)"])
|
331 |
+
# new_df["Log Compression Rate (%)"] = np.log(new_df["Compression Rate (%)"])
|
332 |
+
|
333 |
+
# fig = px.scatter(
|
334 |
+
# new_df,
|
335 |
+
# x="Log Params(B)",
|
336 |
+
# y="Log Compression Rate (%)",
|
337 |
+
# title="Compression Rate Scaling Law",
|
338 |
+
# hover_name="Name",
|
339 |
+
# custom_data=["Params(B)", "Compression Rate (%)"],
|
340 |
+
# )
|
341 |
+
|
342 |
+
# fig.update_traces(
|
343 |
+
# hovertemplate="<b>%{hovertext}</b><br>Params(B): %{customdata[0]:.2f} B<br>Compression Rate (%): %{customdata[1]:.2f}<extra></extra>"
|
344 |
+
# )
|
345 |
+
# fig.update_layout(
|
346 |
+
# width=800, # 设置图像宽度
|
347 |
+
# height=600, # 设置图像高度
|
348 |
+
# title={"text": "Compression Rate Scaling Law", "x": 0.5, "xanchor": "center", "yanchor": "top"},
|
349 |
+
# showlegend=True,
|
350 |
+
# xaxis={"showgrid": True, "zeroline": False, "type": "linear", "title": "Params(B)"}, # 确保坐标轴类型正确
|
351 |
+
# yaxis={"showgrid": True, "zeroline": False, "type": "linear", "title": "Compression Rate (%)", "autorange": "reversed"},
|
352 |
+
# )
|
353 |
+
|
354 |
+
# names_to_connect_dict = {
|
355 |
+
# "2024-05": ["Meta-Llama-3-8B", "stablelm-3b-4e1t", "Qwen2-1.5B", "TinyLlama-1.1B-intermediate-step-1431k-3T", "Mistral-Nemo-Base-2407"],
|
356 |
+
# "2024-06": ["Meta-Llama-3-8B", "stablelm-3b-4e1t", "Qwen2-1.5B", "TinyLlama-1.1B-intermediate-step-1431k-3T", "Mistral-Nemo-Base-2407"],
|
357 |
+
# "2024-07": ["Meta-Llama-3.1-8B", "stablelm-3b-4e1t", "Qwen2-1.5B", "TinyLlama-1.1B-intermediate-step-1431k-3T", "Mistral-Nemo-Base-2407"],
|
358 |
+
# "2024-08": [
|
359 |
+
# "Meta-Llama-3.1-8B",
|
360 |
+
# "Rene-v0.1-1.3b-pytorch",
|
361 |
+
# "stablelm-3b-4e1t",
|
362 |
+
# "Qwen2-1.5B",
|
363 |
+
# "TinyLlama-1.1B-intermediate-step-1431k-3T",
|
364 |
+
# "Mistral-Nemo-Base-2407",
|
365 |
+
# ],
|
366 |
+
# "2025-01": ["Qwen2.5-1.5B"],
|
367 |
+
# }
|
368 |
+
|
369 |
+
# names_to_connect = names_to_connect_dict.get(period, names_to_connect_dict["2024-08"])
|
370 |
+
|
371 |
+
# connection_points = new_df[new_df["Name"].isin(names_to_connect)]
|
372 |
+
# print(connection_points)
|
373 |
+
|
374 |
+
# new_df["Color"] = new_df["Name"].apply(lambda name: "#39C5BB" if name in names_to_connect else "#636efa")
|
375 |
+
|
376 |
+
# fig.update_traces(marker=dict(color=new_df["Color"]))
|
377 |
+
|
378 |
+
# X = connection_points["Log Params(B)"].values.reshape(-1, 1)
|
379 |
+
# y = connection_points["Log Compression Rate (%)"].values
|
380 |
+
# model = LinearRegression().fit(X, y)
|
381 |
+
|
382 |
+
# x_min = connection_points["Log Params(B)"].min()
|
383 |
+
# x_max = connection_points["Log Params(B)"].max()
|
384 |
+
# extended_x = np.linspace(x_min, x_max * 1.5, 100)
|
385 |
+
# extended_x_original = np.exp(extended_x)
|
386 |
+
# trend_line_y = model.predict(extended_x.reshape(-1, 1))
|
387 |
+
# trend_line_y_original = np.exp(trend_line_y)
|
388 |
+
|
389 |
+
# trend_line = go.Scatter(
|
390 |
+
# x=extended_x,
|
391 |
+
# y=trend_line_y,
|
392 |
+
# mode="lines",
|
393 |
+
# line=dict(color="skyblue", dash="dash"),
|
394 |
+
# name="Trend Line",
|
395 |
+
# hovertemplate="<b>Params(B):</b> %{customdata[0]:.2f}<br>" + "<b>Compression Rate (%):</b> %{customdata[1]:.2f}<extra></extra>",
|
396 |
+
# customdata=np.stack((extended_x_original, trend_line_y_original), axis=-1),
|
397 |
+
# )
|
398 |
+
|
399 |
+
# fig.add_trace(trend_line)
|
400 |
+
|
401 |
+
# x_min = new_df["Params(B)"].min()
|
402 |
+
# x_max = new_df["Params(B)"].max()
|
403 |
+
# x_tick_vals = np.geomspace(x_min, x_max, num=5)
|
404 |
+
# x_tick_text = [f"{val:.1f}" for val in x_tick_vals]
|
405 |
+
|
406 |
+
# y_min = new_df["Compression Rate (%)"].min()
|
407 |
+
# y_max = new_df["Compression Rate (%)"].max()
|
408 |
+
# y_tick_vals = np.geomspace(y_min, y_max, num=5)
|
409 |
+
# y_tick_text = [f"{val:.1f}" for val in y_tick_vals]
|
410 |
+
|
411 |
+
# fig.update_xaxes(tickvals=np.log(x_tick_vals), ticktext=x_tick_text, title="Params(B)")
|
412 |
+
# fig.update_yaxes(tickvals=np.log(y_tick_vals), ticktext=y_tick_text, title="Compression Rate (%)", autorange="reversed")
|
413 |
+
|
414 |
+
# fig.update_layout(xaxis=dict(showgrid=True, zeroline=False), yaxis=dict(showgrid=True, zeroline=False))
|
415 |
+
|
416 |
+
# fig.update_traces(marker=dict(size=12))
|
417 |
+
|
418 |
+
# print(fig.layout)
|
419 |
+
|
420 |
+
# return fig
|
421 |
+
|
422 |
+
|
423 |
def create_scaling_plot(all_data, period):
|
424 |
selected_columns = ["Name", "Parameters Count (B)", "Average (The lower the better)"]
|
425 |
target_data = all_data[period]
|
426 |
new_df = pd.DataFrame()
|
427 |
|
428 |
for size in target_data.keys():
|
429 |
+
new_df = pd.concat([new_df, target_data[size]["cr"].loc[:, selected_columns].dropna(axis=1, how="all")], axis=0)
|
430 |
+
|
431 |
+
x_values = new_df["Parameters Count (B)"].astype(float).tolist()
|
432 |
+
y_values = new_df["Average (The lower the better)"].astype(float).tolist()
|
433 |
+
names = new_df["Name"].tolist()
|
434 |
+
|
435 |
+
# 计算对数空间的范围
|
436 |
+
x_min, x_max = np.log10(min(x_values)), np.log10(max(x_values))
|
437 |
+
y_min, y_max = np.log10(min(y_values)), np.log10(max(y_values))
|
438 |
+
|
439 |
+
# 计算合适的刻度间隔
|
440 |
+
x_dtick = (x_max - x_min) / 4 # 分成5个刻度
|
441 |
+
y_dtick = (y_max - y_min) / 4
|
442 |
+
|
443 |
+
fig = go.Figure()
|
444 |
+
|
445 |
+
fig.add_trace(
|
446 |
+
go.Scatter(
|
447 |
+
x=x_values,
|
448 |
+
y=y_values,
|
449 |
+
mode="markers",
|
450 |
+
name="Models",
|
451 |
+
marker=dict(size=12, color="#39C5BB", opacity=0.8),
|
452 |
+
text=names,
|
453 |
+
customdata=list(zip(x_values, y_values)),
|
454 |
+
hovertemplate=(
|
455 |
+
"<b>%{text}</b><br>" + "Params: %{customdata[0]:.2f}B<br>" + "Compression Rate: %{customdata[1]:.2f}%<br>" + "<extra></extra>"
|
456 |
+
),
|
457 |
+
)
|
458 |
)
|
459 |
|
460 |
+
fig.update_layout(
|
461 |
+
title={"text": "Compression Rate Scaling Law", "x": 0.5, "xanchor": "center", "yanchor": "top"},
|
462 |
+
width=800,
|
463 |
+
height=600,
|
464 |
+
showlegend=True,
|
465 |
+
xaxis=dict(
|
466 |
+
title="Parameters (B)",
|
467 |
+
showgrid=True,
|
468 |
+
zeroline=False,
|
469 |
+
type="log",
|
470 |
+
dtick=x_dtick,
|
471 |
+
tickformat=".2f", # 保留两位小数
|
472 |
+
range=[x_min - 0.1, x_max + 0.1],
|
473 |
+
),
|
474 |
+
yaxis=dict(
|
475 |
+
title="Compression Rate (%)",
|
476 |
+
showgrid=True,
|
477 |
+
zeroline=False,
|
478 |
+
type="log",
|
479 |
+
dtick=y_dtick,
|
480 |
+
tickformat=".2f", # 保留两位小数
|
481 |
+
range=[y_min - 0.1, y_max + 0.1],
|
482 |
+
autorange="reversed",
|
483 |
+
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
484 |
)
|
485 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
486 |
return fig
|
487 |
|
488 |
|
|
|
525 |
last_period = time_list[-1]
|
526 |
|
527 |
initial_fig = create_scaling_plot(all_data, last_period)
|
|
|
|
|
528 |
initial_metric = metric_list[0]
|
529 |
initial_columns = get_unique_column_names(all_data)
|
530 |
+
# initial_columns = initial_columns[:-1]
|
|
|
531 |
initial_colors = ["Average", "Individual Tests"]
|
532 |
initial_size_range = [0, 15]
|
533 |
+
initial_data = update_table(last_period, model_size_list, initial_metric, initial_columns, initial_colors, initial_size_range)
|
534 |
|
535 |
css = """
|
536 |
.gradio-container {
|
537 |
max-width: 95% !important;
|
538 |
+
margin: 0 auto;
|
539 |
}
|
540 |
.tab-buttons button {
|
541 |
font-size: 1.3em;
|
|
|
544 |
white-space: normal;
|
545 |
word-break: break-word;
|
546 |
}
|
547 |
+
table {
|
548 |
+
margin-left: auto !important;
|
549 |
+
margin-right: auto !important;
|
550 |
+
width: 100% !important;
|
551 |
+
}
|
552 |
"""
|
553 |
|
554 |
TITLE_HTML = '<h1 style="text-align:center"><span style="font-size:1.3em">🏆 LLM Compression Leaderboard</span></h1>'
|
|
|
564 |
period_selector = gr.Dropdown(label="Period", choices=time_list, value=last_period)
|
565 |
model_selector = gr.CheckboxGroup(label="Model Size", choices=model_size_list, value=model_size_list)
|
566 |
size_range_slider = RangeSlider(minimum=0, maximum=15, value=[0, 15], step=0.1, label="Model Size Range")
|
567 |
+
metric_selector = gr.Dropdown(label="Metric", choices=metric_list, value=initial_metric)
|
568 |
with gr.Column():
|
569 |
color_selector = gr.CheckboxGroup(label="Colored Columns", choices=["Average", "Individual Tests"], value=initial_colors)
|
570 |
colfilter = gr.CheckboxGroup(label="Data Source", choices=get_unique_column_names(all_data), value=initial_columns)
|
571 |
|
572 |
+
# table = gr.Dataframe(
|
573 |
+
# initial_data,
|
574 |
+
# column_widths=[130, 50, 50, 35, 35, 35, 35, 35, 35, 35, 35],
|
575 |
+
# wrap=True,
|
576 |
+
# max_height=800,
|
577 |
+
# )
|
578 |
+
table = gr.HTML(initial_data)
|
579 |
|
580 |
period_selector.change(
|
581 |
update_table, inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider], outputs=table
|
|
|
599 |
with gr.Tab("🌍 MultiLang"):
|
600 |
gr.Markdown("## Coming soon...")
|
601 |
world_languages_plot = gr.Plot(create_world_languages_gdp_chart())
|
602 |
+
|
603 |
with gr.Tab("📈 Scaling Law"):
|
604 |
print(time_list)
|
605 |
period_selector_2 = gr.Dropdown(label="Period", choices=time_list, value=last_period)
|
|
|
613 |
|
614 |
with gr.Tab("ℹ️ About"):
|
615 |
gr.Markdown(about_md)
|
616 |
+
|
617 |
with gr.Tab("🚀 Submit"):
|
618 |
with gr.Group():
|
619 |
with gr.Row():
|
620 |
model_name = gr.Textbox(max_lines=1, placeholder="Enter model name...", show_label=False, scale=4)
|
621 |
submit = gr.Button("Submit", variant="primary", scale=0)
|
622 |
output = gr.Markdown("# Enter a public HF repo id, then hit Submit to add it to the evaluation queue.")
|
|
|
623 |
submit.click(fn=submit_model, inputs=model_name, outputs=output)
|
624 |
|
625 |
demo.launch(share=False)
|
data/2024-10/7b.xlsx
CHANGED
Binary files a/data/2024-10/7b.xlsx and b/data/2024-10/7b.xlsx differ
|
|
requirements.txt
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
plotly==5.22.0
|
2 |
scikit-learn==1.5.0
|
3 |
-
gradio==
|
4 |
huggingface_hub==0.23.1
|
5 |
pandas==2.2.2
|
6 |
openpyxl==3.1.2
|
|
|
1 |
plotly==5.22.0
|
2 |
scikit-learn==1.5.0
|
3 |
+
gradio==5.15.0
|
4 |
huggingface_hub==0.23.1
|
5 |
pandas==2.2.2
|
6 |
openpyxl==3.1.2
|