Spaces:
Running
Running
Julien Simon
commited on
Commit
•
b63ff12
1
Parent(s):
6094dcf
Update
Browse files- app.py +1 -1
- results.py +1 -0
- results_arcee_meraj.py +24 -0
- results_arcee_nova.py +23 -0
- results_llama_spark.py +36 -10
app.py
CHANGED
@@ -202,7 +202,7 @@ with gr.Blocks() as demo:
|
|
202 |
"""This table shows the benchmark results for each model. \n\n
|
203 |
Configurations are default unless noted.
|
204 |
[TGI](https://huggingface.co/docs/text-generation-inference/reference/launcher),
|
205 |
-
[vLLM](https://docs.djl.ai/master/docs/serving/serving/docs/lmi/user_guides/vllm_user_guide.html)
|
206 |
)
|
207 |
model_dropdown = gr.Dropdown(choices=get_model_names(), label="Select Model")
|
208 |
|
|
|
202 |
"""This table shows the benchmark results for each model. \n\n
|
203 |
Configurations are default unless noted.
|
204 |
[TGI](https://huggingface.co/docs/text-generation-inference/reference/launcher),
|
205 |
+
[vLLM](https://docs.djl.ai/master/docs/serving/serving/docs/lmi/user_guides/vllm_user_guide.html)"""
|
206 |
)
|
207 |
model_dropdown = gr.Dropdown(choices=get_model_names(), label="Select Model")
|
208 |
|
results.py
CHANGED
@@ -18,6 +18,7 @@ instance_type_mappings = {
|
|
18 |
"g6.12xlarge": {"cloud": "AWS", "gpu": "4xNVIDIA L4", "gpuRAM": "96 GB"},
|
19 |
"g6.48xlarge": {"cloud": "AWS", "gpu": "8xNVIDIA L4", "gpuRAM": "192 GB"},
|
20 |
"g6e.2xlarge": {"cloud": "AWS", "gpu": "1xNVIDIA L40S", "gpuRAM": "48 GB"},
|
|
|
21 |
"g4dn.12xlarge": {"cloud": "AWS", "gpu": "4xNVIDIA T4", "gpuRAM": "64 GB"},
|
22 |
"p4d.24xlarge": {"cloud": "AWS", "gpu": "4xNVIDIA A100", "gpuRAM": "320 GB"},
|
23 |
"p4de.24xlarge": {"cloud": "AWS", "gpu": "8xNVIDIA A100", "gpuRAM": "320 GB"},
|
|
|
18 |
"g6.12xlarge": {"cloud": "AWS", "gpu": "4xNVIDIA L4", "gpuRAM": "96 GB"},
|
19 |
"g6.48xlarge": {"cloud": "AWS", "gpu": "8xNVIDIA L4", "gpuRAM": "192 GB"},
|
20 |
"g6e.2xlarge": {"cloud": "AWS", "gpu": "1xNVIDIA L40S", "gpuRAM": "48 GB"},
|
21 |
+
"g6e.12xlarge": {"cloud": "AWS", "gpu": "4xNVIDIA L40S", "gpuRAM": "192 GB"},
|
22 |
"g4dn.12xlarge": {"cloud": "AWS", "gpu": "4xNVIDIA T4", "gpuRAM": "64 GB"},
|
23 |
"p4d.24xlarge": {"cloud": "AWS", "gpu": "4xNVIDIA A100", "gpuRAM": "320 GB"},
|
24 |
"p4de.24xlarge": {"cloud": "AWS", "gpu": "8xNVIDIA A100", "gpuRAM": "320 GB"},
|
results_arcee_meraj.py
CHANGED
@@ -12,6 +12,30 @@ results_arcee_meraj = {
|
|
12 |
"tokensPerSecond": "33",
|
13 |
"notes": "",
|
14 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
{
|
16 |
"instanceType": "p4d.24xlarge",
|
17 |
"quantization": "none",
|
|
|
12 |
"tokensPerSecond": "33",
|
13 |
"notes": "",
|
14 |
},
|
15 |
+
{
|
16 |
+
"instanceType": "g6e.12xlarge",
|
17 |
+
"quantization": "awq",
|
18 |
+
"container": "vLLM 0.5.5",
|
19 |
+
"status": "OK",
|
20 |
+
"tokensPerSecond": "45",
|
21 |
+
"notes": "",
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"instanceType": "g6e.12xlarge",
|
25 |
+
"quantization": "awq",
|
26 |
+
"container": "TGI 2.2.0",
|
27 |
+
"status": "OK",
|
28 |
+
"tokensPerSecond": "46",
|
29 |
+
"notes": "",
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"instanceType": "g6e.12xlarge",
|
33 |
+
"quantization": "awq",
|
34 |
+
"container": "SGLang 0.2.13",
|
35 |
+
"status": "OK",
|
36 |
+
"tokensPerSecond": "47.1",
|
37 |
+
"notes": "",
|
38 |
+
},
|
39 |
{
|
40 |
"instanceType": "p4d.24xlarge",
|
41 |
"quantization": "none",
|
results_arcee_nova.py
CHANGED
@@ -108,6 +108,29 @@ results_arcee_nova = {
|
|
108 |
"status": "OK",
|
109 |
"tokensPerSecond": "12",
|
110 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
{
|
112 |
"instanceType": "p4d.24xlarge",
|
113 |
"quantization": "none",
|
|
|
108 |
"status": "OK",
|
109 |
"tokensPerSecond": "12",
|
110 |
},
|
111 |
+
{
|
112 |
+
"instanceType": "g6e.12xlarge",
|
113 |
+
"configurations": [
|
114 |
+
{
|
115 |
+
"quantization": "none",
|
116 |
+
"container": "TGI 2.2.0",
|
117 |
+
"status": "OK",
|
118 |
+
"tokensPerSecond": "17",
|
119 |
+
},
|
120 |
+
{
|
121 |
+
"quantization": "none",
|
122 |
+
"container": "vLLM 0.5.5",
|
123 |
+
"status": "OK",
|
124 |
+
"tokensPerSecond": "17.8",
|
125 |
+
},
|
126 |
+
{
|
127 |
+
"quantization": "none",
|
128 |
+
"container": "SGLang 0.2.13",
|
129 |
+
"status": "OK",
|
130 |
+
"tokensPerSecond": "18.2",
|
131 |
+
},
|
132 |
+
],
|
133 |
+
},
|
134 |
{
|
135 |
"instanceType": "p4d.24xlarge",
|
136 |
"quantization": "none",
|
results_llama_spark.py
CHANGED
@@ -46,22 +46,48 @@ results_llama_spark = {
|
|
46 |
},
|
47 |
{
|
48 |
"instanceType": "g6e.2xlarge",
|
49 |
-
"quantization": "none",
|
50 |
-
"status": "OK",
|
51 |
"configurations": [
|
52 |
-
{
|
53 |
-
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
],
|
56 |
},
|
57 |
{
|
58 |
"instanceType": "g6e.12xlarge",
|
59 |
-
"quantization": "none",
|
60 |
-
"status": "OK",
|
61 |
"configurations": [
|
62 |
-
{
|
63 |
-
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
],
|
66 |
},
|
67 |
{
|
|
|
46 |
},
|
47 |
{
|
48 |
"instanceType": "g6e.2xlarge",
|
|
|
|
|
49 |
"configurations": [
|
50 |
+
{
|
51 |
+
"container": "TGI 2.2.0",
|
52 |
+
"quantization": "none",
|
53 |
+
"status": "OK",
|
54 |
+
"tokensPerSecond": "42.1",
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"container": "SGLang 0.2.13",
|
58 |
+
"quantization": "none",
|
59 |
+
"status": "OK",
|
60 |
+
"tokensPerSecond": "45",
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"container": "vLLM 0.5.5",
|
64 |
+
"quantization": "none",
|
65 |
+
"status": "OK",
|
66 |
+
"tokensPerSecond": "43.4",
|
67 |
+
},
|
68 |
],
|
69 |
},
|
70 |
{
|
71 |
"instanceType": "g6e.12xlarge",
|
|
|
|
|
72 |
"configurations": [
|
73 |
+
{
|
74 |
+
"container": "TGI 2.2.0",
|
75 |
+
"quantization": "none",
|
76 |
+
"status": "OK",
|
77 |
+
"tokensPerSecond": "112",
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"container": "SGLang 0.2.13",
|
81 |
+
"quantization": "none",
|
82 |
+
"status": "OK",
|
83 |
+
"tokensPerSecond": "123",
|
84 |
+
},
|
85 |
+
{
|
86 |
+
"container": "vLLM 0.5.5",
|
87 |
+
"quantization": "none",
|
88 |
+
"status": "OK",
|
89 |
+
"tokensPerSecond": "106",
|
90 |
+
},
|
91 |
],
|
92 |
},
|
93 |
{
|