BenchmarkBot commited on
Commit
9e3eaf4
β€’
1 Parent(s): df1a500

remove about

Browse files
Files changed (2) hide show
  1. app.py +6 -14
  2. src/assets/text_content.py +5 -12
app.py CHANGED
@@ -9,7 +9,6 @@ from src.assets.text_content import (
9
  TITLE,
10
  INTRODUCTION_TEXT,
11
  A100_TEXT,
12
- ABOUT_TEXT,
13
  CITATION_BUTTON_LABEL,
14
  CITATION_BUTTON_TEXT,
15
  )
@@ -29,16 +28,14 @@ OPTIMUM_TOKEN = os.environ.get("OPTIMUM_TOKEN", None)
29
 
30
 
31
  ALL_COLUMNS_MAPPING = {
32
- "best_scored_model": "Best Scored Model πŸ†",
33
- "model_type": "Type πŸ€—",
34
- "weight_class": "Class πŸ‹οΈ",
35
  #
36
  "backend.name": "Backend 🏭",
37
- "backend.torch_dtype": "Dtype πŸ“₯",
38
  "optimizations": "Optimizations πŸ› οΈ",
39
  #
40
- # "tradeoff": "Tradeoff* ⬇️",
41
- #
42
  "generate.throughput(tokens/s)": "Throughput (tokens/s) ⬆️",
43
  "forward.peak_memory(MB)": "Peak Memory (MB) ⬇️",
44
  "best_score": "Score (%) ⬆️",
@@ -53,8 +50,6 @@ ALL_COLUMNS_DATATYPES = [
53
  "str",
54
  "str",
55
  #
56
- # "number",
57
- #
58
  "number",
59
  "number",
60
  "number",
@@ -112,8 +107,8 @@ def get_benchmark_table(bench_df):
112
  # rename
113
  bench_df.rename(columns=ALL_COLUMNS_MAPPING, inplace=True)
114
  # transform
115
- bench_df["Type πŸ€—"] = bench_df["Type πŸ€—"].apply(process_model_type)
116
- bench_df["Class πŸ‹οΈ"] = bench_df["Class πŸ‹οΈ"].apply(
117
  process_weight_class
118
  )
119
  bench_df["Best Scored Model πŸ†"] = bench_df["Best Scored Model πŸ†"].apply(
@@ -309,9 +304,6 @@ with demo:
309
  elem_id="filter-button",
310
  )
311
 
312
- with gr.TabItem("❔ About πŸ“–", id=4):
313
- gr.HTML(ABOUT_TEXT)
314
-
315
  demo.load(
316
  change_tab,
317
  A100_tabs,
 
9
  TITLE,
10
  INTRODUCTION_TEXT,
11
  A100_TEXT,
 
12
  CITATION_BUTTON_LABEL,
13
  CITATION_BUTTON_TEXT,
14
  )
 
28
 
29
 
30
  ALL_COLUMNS_MAPPING = {
31
+ "best_scored_model": "Best Scored LLM πŸ†",
32
+ "model_type": "LLM Type πŸ€—",
33
+ "weight_class": "Weight Class πŸ‹οΈ",
34
  #
35
  "backend.name": "Backend 🏭",
36
+ "backend.torch_dtype": "Load Datatype πŸ“₯",
37
  "optimizations": "Optimizations πŸ› οΈ",
38
  #
 
 
39
  "generate.throughput(tokens/s)": "Throughput (tokens/s) ⬆️",
40
  "forward.peak_memory(MB)": "Peak Memory (MB) ⬇️",
41
  "best_score": "Score (%) ⬆️",
 
50
  "str",
51
  "str",
52
  #
 
 
53
  "number",
54
  "number",
55
  "number",
 
107
  # rename
108
  bench_df.rename(columns=ALL_COLUMNS_MAPPING, inplace=True)
109
  # transform
110
+ bench_df["LLM Type πŸ€—"] = bench_df["LLM Type πŸ€—"].apply(process_model_type)
111
+ bench_df["Weight Class πŸ‹οΈ"] = bench_df["Weight Class πŸ‹οΈ"].apply(
112
  process_weight_class
113
  )
114
  bench_df["Best Scored Model πŸ†"] = bench_df["Best Scored Model πŸ†"].apply(
 
304
  elem_id="filter-button",
305
  )
306
 
 
 
 
307
  demo.load(
308
  change_tab,
309
  A100_tabs,
src/assets/text_content.py CHANGED
@@ -10,18 +10,11 @@ Anyone from the community can request a model or a hardware/backend/optimization
10
 
11
  A100_TEXT = """<h3>Single-GPU Benchmark (1xA100):</h3>
12
  <ul>
13
- <li>Singleton Batch (1)</li>
14
- <li>Thousand Tokens (1000)</li>
15
- </ul>
16
- """
17
-
18
- ABOUT_TEXT = """<h3>About the benchmarks:</h3>
19
- <ul>
20
- <li>The performances benchmarks were obtained using <a href="https://github.com/huggingface/optimum-benchmark">Optimum-Benchmark</a>.</li>
21
- <li>Throughput is measured in tokens per second when generating 1000 tokens with a batch size of 1.</li>
22
- <li>Peak memory is measured in MB during the first forward pass of the model (no warmup).</li>
23
- <li>Open LLM Score is an average evaluation score obtained from the <a href="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard">πŸ€— Open LLM Leaderboard</a>.</li>
24
- <li>Open LLM Tradeoff is the euclidean distance between an LLM and the "perfect LLM" (i.e. 0 latency and 100% accuracy) translating the tradeoff between latency and accuracy.</li>
25
  </ul>
26
  """
27
 
 
10
 
11
  A100_TEXT = """<h3>Single-GPU Benchmark (1xA100):</h3>
12
  <ul>
13
+ <li>LLMs are evaluated on a singleton batch and genrating a thousand tokens.</li>
14
+ <li>Peak memory is measured in MB during the first forward pass of the LLM (no warmup).</li>
15
+ <li>Each pair of (LLM Type, Weight Class) is represented by the best scored LLM. This LLM is the one used for all the hardware/backend/optimization experiments.</li>
16
+ <li>Score is the average evaluation score obtained from the <a href="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard">πŸ€— Open LLM Leaderboard</a>.</li>
17
+ <li>Ranking is based on the euclidean distance from "perfect LLM" (i.e. 0 latency and 100% accuracy).</li>
 
 
 
 
 
 
 
18
  </ul>
19
  """
20