File size: 3,967 Bytes
1d4a48e
6f3a090
3c37eb3
c8763bd
ab5f5f1
341eaa4
bee5389
ad5bd56
bd9edb7
 
c8763bd
9dc4521
ab5f5f1
e747f4e
c382b2a
9e3eaf4
a1135a9
d574374
a830adb
df1a500
67b4a03
 
ab5f5f1
483e3a1
 
 
ab5f5f1
 
 
 
a1135a9
ab5f5f1
 
 
 
 
a1135a9
 
 
 
 
 
 
ab5f5f1
 
 
 
 
483e3a1
a1135a9
483e3a1
 
 
 
 
 
 
a1135a9
483e3a1
 
ab5f5f1
a1135a9
 
ab5f5f1
 
483e3a1
 
a1135a9
483e3a1
 
ab5f5f1
483e3a1
f3dc796
ab5f5f1
483e3a1
 
f3dc796
 
a1135a9
 
ab5f5f1
 
483e3a1
 
 
 
bee5389
ab5f5f1
2ff4a74
3c37eb3
9dc4521
 
bee5389
9dc4521
2ff4a74
00642fb
ad5bd56
9dc4521
bee5389
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
LOGO = '<img src="https://raw.githubusercontent.com/huggingface/optimum-benchmark/main/logo.png">'

TITLE = """<h1 align="center" id="space-title">πŸ€— LLM-Perf Leaderboard πŸ‹οΈ</h1>"""

INTRODUCTION = """
The πŸ€— LLM-Perf Leaderboard πŸ‹οΈ aims to benchmark the performance (latency, throughput, memory & energy) of Large Language Models (LLMs) with different hardwares, backends and optimizations using [Optimum-Benchmark](https://github.com/huggingface/optimum-benchmark) and [Optimum](https://github.com/huggingface/optimum) flavors.

Anyone from the community can request a model or a hardware/backend/optimization configuration for automated benchmarking:
- Model evaluation requests should be made in the [πŸ€— Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) and will be added to the [πŸ€— LLM-Perf Leaderboard πŸ‹οΈ](https://huggingface.co/spaces/optimum/llm-perf-leaderboard) automatically.
- Hardware/Backend/Optimization performance requests should be made in the [llm-perf-backend repository](https://github.com/IlyasMoutawwakil/llm-perf-backend) and will be added to the [πŸ€— LLM-Perf Leaderboard πŸ‹οΈ](https://huggingface.co/spaces/optimum/llm-perf-leaderboard) automatically.
"""

ABOUT = """<h3>About the πŸ€— LLM-Perf Leaderboard πŸ‹οΈ</h3>
<ul>
    <li>To avoid communication-dependent results, only one GPU is used.</li>
    <li>Score is the average evaluation score obtained from the <a href="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard">πŸ€— Open LLM Leaderboard</a>.</li>
    <li>LLMs are running on a singleton batch with a prompt size of 256 and generating a 256 tokens.</li>
    <li>Energy consumption is measured in kWh using CodeCarbon and taking into consideration the GPU, CPU, RAM and location of the machine.</li>
    <li>We measure three types of memory: Max Allocated Memory, Max Reserved Memory and Max Used Memory. The first two being reported by PyTorch and the last one being observed using PyNVML.</li>
</ul>
"""

EXAMPLE_CONFIG = """
Here's an example of the configuration file used to benchmark the models with Optimum-Benchmark:
```yaml
defaults:
  - backend: pytorch
  - _base_ # inheriting from base config
  - _self_ # for hydra 1.1 compatibility

experiment_name: pytorch+cuda+float16+gptq-4bit+exllama-v1
device: cuda

backend:
  no_weights: true
  torch_dtype: float16
  quantization_scheme: gptq
  quantization_config:
    bits: 4
    use_cuda_fp16: false
    use_exllama: true
    exllama_config:
      version: 1
```

Where the base config is:
```yaml
defaults:
  - benchmark: inference # default benchmark
  - launcher: process # isolated process launcher
  - experiment # inheriting from experiment config
  - _self_ # for hydra 1.1 compatibility
  - override hydra/job_logging: colorlog # colorful logging
  - override hydra/hydra_logging: colorlog # colorful logging

hydra:
  run:
    dir: dataset/${oc.env:HOSTNAME}/${experiment_name}/${model}
  job:
    chdir: true
    env_set:
      COUNTRY_ISO_CODE: FRA
      OVERRIDE_BENCHMARKS: 0
      CUDA_VISIBLE_DEVICES: 0
      CUDA_DEVICE_ORDER: PCI_BUS_ID

backend:
  continuous_isolation: true

benchmark:
  duration: 10
  memory: true
  energy: true

  input_shapes:
    batch_size: 1
    sequence_length: 256

  new_tokens: 256

hub_kwargs:
  trust_remote_code: true
```
"""


CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results."
CITATION_BUTTON = r"""@misc{llm-perf-leaderboard,
  author = {Ilyas Moutawwakil, RΓ©gis Pierrard},
  title = {LLM-Perf Leaderboard},
  year = {2023},
  publisher = {Hugging Face},
  howpublished = "\url{https://huggingface.co/spaces/optimum/llm-perf-leaderboard}",
@software{optimum-benchmark,
  author = {Ilyas Moutawwakil, RΓ©gis Pierrard},
  publisher = {Hugging Face},
  title = {Optimum-Benchmark: A framework for benchmarking the performance of Transformers models with different hardwares, backends and optimizations.},
}
"""