Spaces:

optimum
/

llm-perf-leaderboard

Running

App Files Files Community

BenchmarkBot commited on Sep 3, 2023

Commit

3c37eb3

1 Parent(s): e2e1ee9

move things around

Browse files

Files changed (2) hide show

app.py +16 -21
src/assets/text_content.py +5 -5

app.py CHANGED Viewed

@@ -27,13 +27,13 @@ LLM_PERF_DATASET_REPO = "optimum/llm-perf-dataset"
 OPTIMUM_TOKEN = os.environ.get("OPTIMUM_TOKEN", None)
 ALL_COLUMNS_MAPPING = {
-    "weight_class": "Class 🏋️",
-    "model_type": "Type 🤗",
-    #
     "backend.name": "Backend 🏭",
     "backend.torch_dtype": "Dtype 📥",
-    "quantization": "Quantization 🗜️",
     "optimizations": "Optimizations 🛠️",
     #
     "generate.peak_memory(MB)": "Memory (MB) ⬇️",
     "generate.throughput(tokens/s)": "Throughput (tokens/s) ⬆️",
@@ -45,9 +45,9 @@ ALL_COLUMNS_MAPPING = {
 ALL_COLUMNS_DATATYPES = [
     "str",
     "str",
-    #
     "str",
     "str",
     "str",
     "str",
     #
@@ -85,21 +85,16 @@ def get_benchmark_df(benchmark="Succeeded-1xA100-80GB"):
     merged_df["quantization"] = merged_df["backend.quantization_strategy"].apply(
         lambda x: "BnB.4bit" if x == "bnb" else ("GPTQ.4bit" if x == "gptq" else "None")
     )
-    # distance to 100% score, normalized to 0, 1
-    score_distance = (100 - merged_df["best_score"]) / 100
-    # distance to 0s latency, normalized to 0, 1
-    latency_distance = merged_df["generate.latency(s)"] / (
-        merged_df["generate.latency(s)"].max() - merged_df["generate.latency(s)"].min()
-    )
-    # distance to 0MB memory
-    memory_distance = merged_df["forward.peak_memory(MB)"] / (
-        merged_df["forward.peak_memory(MB)"].max()
-        - merged_df["forward.peak_memory(MB)"].min()
-    )
-    # add perf distance
-    merged_df["perf_distance"] = (
-        score_distance**2 + latency_distance**2 + memory_distance**2
-    ) ** 0.5
     return merged_df
@@ -121,7 +116,7 @@ def get_benchmark_table(bench_df):
     # rename
     copy_df.rename(columns=ALL_COLUMNS_MAPPING, inplace=True)
     # transform
-    copy_df["LLM Type 🤗"] = copy_df["LLM Type 🤗"].apply(process_model_type)
     copy_df["Best Scored LLM 🏆"] = copy_df["Best Scored LLM 🏆"].apply(
         process_model_name
     )

 OPTIMUM_TOKEN = os.environ.get("OPTIMUM_TOKEN", None)
 ALL_COLUMNS_MAPPING = {
     "backend.name": "Backend 🏭",
     "backend.torch_dtype": "Dtype 📥",
     "optimizations": "Optimizations 🛠️",
+    "quantization": "Quantization 🗜️",
+    #
+    "weight_class": "Class 🏋️",
+    "model_type": "Type 🤗",
     #
     "generate.peak_memory(MB)": "Memory (MB) ⬇️",
     "generate.throughput(tokens/s)": "Throughput (tokens/s) ⬆️",
 ALL_COLUMNS_DATATYPES = [
     "str",
     "str",
     "str",
     "str",
+    #
     "str",
     "str",
     #
     merged_df["quantization"] = merged_df["backend.quantization_strategy"].apply(
         lambda x: "BnB.4bit" if x == "bnb" else ("GPTQ.4bit" if x == "gptq" else "None")
     )
+    # # distance to 100% score
+    # score_distance = 100 - merged_df["best_score"]
+    # # distance to 0s latency
+    # latency_distance = merged_df["generate.latency(s)"]
+    # # distance to 0MB memory
+    # memory_distance = merged_df["forward.peak_memory(MB)"]
+    # # add perf distance
+    # merged_df["perf_distance"] = (
+    #     score_distance**2 + latency_distance**2 + memory_distance**2
+    # ) ** 0.5
     return merged_df
     # rename
     copy_df.rename(columns=ALL_COLUMNS_MAPPING, inplace=True)
     # transform
+    copy_df["Type 🤗"] = copy_df["Type 🤗"].apply(process_model_type)
     copy_df["Best Scored LLM 🏆"] = copy_df["Best Scored LLM 🏆"].apply(
         process_model_name
     )

src/assets/text_content.py CHANGED Viewed

@@ -1,14 +1,14 @@
-TITLE = """<h1 align="center" id="space-title">🤗 Open LLM-Perf Leaderboard 🏋️</h1>"""
 INTRODUCTION_TEXT = f"""
-The 🤗 Open LLM-Perf Leaderboard 🏋️ aims to benchmark the performance (latency, throughput & memory) of Large Language Models (LLMs) with different hardwares, backends and optimizations using [Optimum-Benchmark](https://github.com/huggingface/optimum-benchmark) and [Optimum](https://github.com/huggingface/optimum) flavors.
 Anyone from the community can request a model or a hardware/backend/optimization configuration for automated benchmarking:
-- Model evaluation requests should be made in the [🤗 Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) and will be added to the 🤗 Open LLM-Perf Leaderboard 🏋️ automatically.
 - Hardware/Backend/Optimization performance requests should be made in the [community discussions](https://huggingface.co/spaces/optimum/llm-perf-leaderboard/discussions) to assess their relevance and feasibility.
 """
-ABOUT_TEXT = """<h3>About the 🤗 Open LLM-Perf Leaderboard 🏋️</h3>
 <ul>
     <li>To avoid communication-dependent results, only one GPU is used.</li>
     <li>LLMs are evaluated on a singleton batch with a prompt size of 512 and generating 1000 tokens.</li>
@@ -63,7 +63,7 @@ benchmark:
 CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results."
 CITATION_BUTTON_TEXT = r"""@misc{open-llm-perf-leaderboard,
   author = {Ilyas Moutawwakil, Régis Pierrard},
-  title = {Open LLM-Perf Leaderboard},
   year = {2023},
   publisher = {Hugging Face},
   howpublished = "\url{https://huggingface.co/spaces/optimum/llm-perf-leaderboard}",

+TITLE = """<h1 align="center" id="space-title">🤗 LLM-Perf Leaderboard 🏋️</h1>"""
 INTRODUCTION_TEXT = f"""
+The 🤗 LLM-Perf Leaderboard 🏋️ aims to benchmark the performance (latency, throughput & memory) of Large Language Models (LLMs) with different hardwares, backends and optimizations using [Optimum-Benchmark](https://github.com/huggingface/optimum-benchmark) and [Optimum](https://github.com/huggingface/optimum) flavors.
 Anyone from the community can request a model or a hardware/backend/optimization configuration for automated benchmarking:
+- Model evaluation requests should be made in the [🤗 Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) and will be added to the 🤗 LLM-Perf Leaderboard 🏋️ automatically.
 - Hardware/Backend/Optimization performance requests should be made in the [community discussions](https://huggingface.co/spaces/optimum/llm-perf-leaderboard/discussions) to assess their relevance and feasibility.
 """
+ABOUT_TEXT = """<h3>About the 🤗 LLM-Perf Leaderboard 🏋️</h3>
 <ul>
     <li>To avoid communication-dependent results, only one GPU is used.</li>
     <li>LLMs are evaluated on a singleton batch with a prompt size of 512 and generating 1000 tokens.</li>
 CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results."
 CITATION_BUTTON_TEXT = r"""@misc{open-llm-perf-leaderboard,
   author = {Ilyas Moutawwakil, Régis Pierrard},
+  title = {LLM-Perf Leaderboard},
   year = {2023},
   publisher = {Hugging Face},
   howpublished = "\url{https://huggingface.co/spaces/optimum/llm-perf-leaderboard}",