Spaces:

optimum
/

llm-perf-leaderboard

Running

App Files Files Community

BenchmarkBot commited on Aug 30, 2023

Commit

531390e

•

1 Parent(s): 483e3a1

remove memory for now because there are errors

Browse files

Files changed (1) hide show

app.py +22 -22

app.py CHANGED Viewed

@@ -40,7 +40,7 @@ ALL_COLUMNS_MAPPING = {
     "optimizations": "Optimizations 🛠️",
     #
     "generate.throughput(tokens/s)": "Throughput (tokens/s) ⬆️",
-    "forward.peak_memory(MB)": "Peak Memory (MB) ⬇️",
     #
     "best_scored_model": "Best Scored Model 🏆",
     "best_score": "Best Score (%) ⬆️",
@@ -84,9 +84,9 @@ def get_benchmark_df(benchmark="1xA100-80GB"):
     )
     # convert peak memory to int
-    merged_df["forward.peak_memory(MB)"] = merged_df["forward.peak_memory(MB)"].apply(
-        lambda x: int(x)
-    )
     # add optimizations
     merged_df["optimizations"] = merged_df[
@@ -149,13 +149,13 @@ def get_benchmark_plot(bench_df):
         x="generate.latency(s)",
         y="best_score",
         color="model_type",
-        size="forward.peak_memory(MB)",
         custom_data=[
             "best_scored_model",
             "backend.name",
             "backend.torch_dtype",
             "optimizations",
-            "forward.peak_memory(MB)",
             "generate.throughput(tokens/s)",
         ],
         color_discrete_sequence=px.colors.qualitative.Light24,
@@ -163,7 +163,7 @@ def get_benchmark_plot(bench_df):
     fig.update_layout(
         title={
-            "text": "Model Score vs. Latency vs. Memory",
             "y": 0.95,
             "x": 0.5,
             "xanchor": "center",
@@ -183,8 +183,8 @@ def get_benchmark_plot(bench_df):
                 "Backend: %{customdata[1]}",
                 "Load Datatype: %{customdata[2]}",
                 "Optimizations: %{customdata[3]}",
-                "Peak Memory (MB): %{customdata[4]}",
-                "Throughput (tokens/s): %{customdata[5]}",
                 "Per 1000 Tokens Latency (s): %{x}",
                 "Open LLM Score (%): %{y}",
             ]
@@ -200,7 +200,7 @@ def filter_query(
     datatypes,
     optimizations,
     score,
-    memory,
     benchmark="1xA100-80GB",
 ):
     raw_df = get_benchmark_df(benchmark=benchmark)
@@ -221,7 +221,7 @@ def filter_query(
             else True
         )
         & (raw_df["best_score"] >= score)
-        & (raw_df["forward.peak_memory(MB)"] <= memory)
     ]
     filtered_table = get_benchmark_table(filtered_df)
@@ -291,16 +291,16 @@ with demo:
                             value=0,
                             elem_id="threshold-slider",
                         )
-                with gr.Column(scale=1):
-                    with gr.Box():
-                        memory_slider = gr.Slider(
-                            label="Peak Memory (MB) 📈",
-                            info="🎚️ Slide to maximum Peak Memory",
-                            minimum=0,
-                            maximum=80 * 1024,
-                            value=80 * 1024,
-                            elem_id="memory-slider",
-                        )
             with gr.Row():
                 with gr.Column(scale=1):
@@ -352,7 +352,7 @@ with demo:
             datatype_checkboxes,
             optimizations_checkboxes,
             score_slider,
-            memory_slider,
         ],
         [A100_leaderboard, A100_plotly],
     )

     "optimizations": "Optimizations 🛠️",
     #
     "generate.throughput(tokens/s)": "Throughput (tokens/s) ⬆️",
+    # "forward.peak_memory(MB)": "Peak Memory (MB) ⬇️",
     #
     "best_scored_model": "Best Scored Model 🏆",
     "best_score": "Best Score (%) ⬆️",
     )
     # convert peak memory to int
+    # merged_df["forward.peak_memory(MB)"] = merged_df["forward.peak_memory(MB)"].apply(
+    #     lambda x: int(x)
+    # )
     # add optimizations
     merged_df["optimizations"] = merged_df[
         x="generate.latency(s)",
         y="best_score",
         color="model_type",
+        # size="forward.peak_memory(MB)",
         custom_data=[
             "best_scored_model",
             "backend.name",
             "backend.torch_dtype",
             "optimizations",
+            # "forward.peak_memory(MB)",
             "generate.throughput(tokens/s)",
         ],
         color_discrete_sequence=px.colors.qualitative.Light24,
     fig.update_layout(
         title={
+            "text": "Model Score vs. Latency",
             "y": 0.95,
             "x": 0.5,
             "xanchor": "center",
                 "Backend: %{customdata[1]}",
                 "Load Datatype: %{customdata[2]}",
                 "Optimizations: %{customdata[3]}",
+                # "Peak Memory (MB): %{customdata[4]}",
+                "Throughput (tokens/s): %{customdata[4]}",
                 "Per 1000 Tokens Latency (s): %{x}",
                 "Open LLM Score (%): %{y}",
             ]
     datatypes,
     optimizations,
     score,
+    # memory,
     benchmark="1xA100-80GB",
 ):
     raw_df = get_benchmark_df(benchmark=benchmark)
             else True
         )
         & (raw_df["best_score"] >= score)
+        # & (raw_df["forward.peak_memory(MB)"] <= memory)
     ]
     filtered_table = get_benchmark_table(filtered_df)
                             value=0,
                             elem_id="threshold-slider",
                         )
+                # with gr.Column(scale=1):
+                #     with gr.Box():
+                #         memory_slider = gr.Slider(
+                #             label="Peak Memory (MB) 📈",
+                #             info="🎚️ Slide to maximum Peak Memory",
+                #             minimum=0,
+                #             maximum=80 * 1024,
+                #             value=80 * 1024,
+                #             elem_id="memory-slider",
+                #         )
             with gr.Row():
                 with gr.Column(scale=1):
             datatype_checkboxes,
             optimizations_checkboxes,
             score_slider,
+            # memory_slider,
         ],
         [A100_leaderboard, A100_plotly],
     )