Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
chriscanal
commited on
Commit
β’
e872e8a
1
Parent(s):
65fc294
Updated main to include title in the graph function parameters
Browse files
app.py
CHANGED
@@ -105,7 +105,6 @@ else:
|
|
105 |
original_df = get_leaderboard_df(eval_results, eval_results_private, COLS, BENCHMARK_COLS)
|
106 |
models = original_df["model_name_for_query"].tolist() # needed for model backlinks in their to the leaderboard
|
107 |
plot_df = create_plot_df(create_scores_df(join_model_info_with_results(original_df)))
|
108 |
-
|
109 |
to_be_dumped = f"models = {repr(models)}\n"
|
110 |
|
111 |
# with open("models_backlinks.py", "w") as f:
|
@@ -476,16 +475,21 @@ with demo:
|
|
476 |
with gr.TabItem("π Benchmark Graphs", elem_id="llm-benchmark-tab-table", id=4):
|
477 |
with gr.Row():
|
478 |
with gr.Column():
|
479 |
-
chart = create_metric_plot_obj(
|
480 |
-
|
|
|
|
|
|
|
481 |
)
|
482 |
gr.Plot(value=chart, interactive=False, width=500, height=500)
|
483 |
with gr.Column():
|
484 |
chart = create_metric_plot_obj(
|
485 |
-
plot_df,
|
486 |
-
|
|
|
|
|
|
|
487 |
gr.Plot(value=chart, interactive=False, width=500, height=500)
|
488 |
-
|
489 |
with gr.TabItem("π About", elem_id="llm-benchmark-tab-table", id=2):
|
490 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
491 |
|
@@ -608,4 +612,4 @@ with demo:
|
|
608 |
scheduler = BackgroundScheduler()
|
609 |
scheduler.add_job(restart_space, "interval", seconds=1800)
|
610 |
scheduler.start()
|
611 |
-
demo.queue(concurrency_count=40).launch()
|
|
|
105 |
original_df = get_leaderboard_df(eval_results, eval_results_private, COLS, BENCHMARK_COLS)
|
106 |
models = original_df["model_name_for_query"].tolist() # needed for model backlinks in their to the leaderboard
|
107 |
plot_df = create_plot_df(create_scores_df(join_model_info_with_results(original_df)))
|
|
|
108 |
to_be_dumped = f"models = {repr(models)}\n"
|
109 |
|
110 |
# with open("models_backlinks.py", "w") as f:
|
|
|
475 |
with gr.TabItem("π Benchmark Graphs", elem_id="llm-benchmark-tab-table", id=4):
|
476 |
with gr.Row():
|
477 |
with gr.Column():
|
478 |
+
chart = create_metric_plot_obj(
|
479 |
+
plot_df,
|
480 |
+
["Average β¬οΈ"],
|
481 |
+
HUMAN_BASELINES,
|
482 |
+
title="Average of Top Scores and Human Baseline Over Time",
|
483 |
)
|
484 |
gr.Plot(value=chart, interactive=False, width=500, height=500)
|
485 |
with gr.Column():
|
486 |
chart = create_metric_plot_obj(
|
487 |
+
plot_df,
|
488 |
+
["ARC", "HellaSwag", "MMLU", "TruthfulQA"],
|
489 |
+
HUMAN_BASELINES,
|
490 |
+
title="Top Scores and Human Baseline Over Time",
|
491 |
+
)
|
492 |
gr.Plot(value=chart, interactive=False, width=500, height=500)
|
|
|
493 |
with gr.TabItem("π About", elem_id="llm-benchmark-tab-table", id=2):
|
494 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
495 |
|
|
|
612 |
scheduler = BackgroundScheduler()
|
613 |
scheduler.add_job(restart_space, "interval", seconds=1800)
|
614 |
scheduler.start()
|
615 |
+
demo.queue(concurrency_count=40).launch()
|