Spaces:

arcee-ai
/

Benchmarks

Running

App Files Files Community

Julien Simon commited on Sep 4, 2024

Commit

b63ff12

•

1 Parent(s): 6094dcf

Update

Browse files

Files changed (5) hide show

app.py +1 -1
results.py +1 -0
results_arcee_meraj.py +24 -0
results_arcee_nova.py +23 -0
results_llama_spark.py +36 -10

app.py CHANGED Viewed

@@ -202,7 +202,7 @@ with gr.Blocks() as demo:
         """This table shows the benchmark results for each model. \n\n
         Configurations are default unless noted.
         [TGI](https://huggingface.co/docs/text-generation-inference/reference/launcher),
-        [vLLM](https://docs.djl.ai/master/docs/serving/serving/docs/lmi/user_guides/vllm_user_guide.html), etc.)are default unless noted."""
     )
     model_dropdown = gr.Dropdown(choices=get_model_names(), label="Select Model")

         """This table shows the benchmark results for each model. \n\n
         Configurations are default unless noted.
         [TGI](https://huggingface.co/docs/text-generation-inference/reference/launcher),
+        [vLLM](https://docs.djl.ai/master/docs/serving/serving/docs/lmi/user_guides/vllm_user_guide.html)"""
     )
     model_dropdown = gr.Dropdown(choices=get_model_names(), label="Select Model")

results.py CHANGED Viewed

@@ -18,6 +18,7 @@ instance_type_mappings = {
     "g6.12xlarge": {"cloud": "AWS", "gpu": "4xNVIDIA L4", "gpuRAM": "96 GB"},
     "g6.48xlarge": {"cloud": "AWS", "gpu": "8xNVIDIA L4", "gpuRAM": "192 GB"},
     "g6e.2xlarge": {"cloud": "AWS", "gpu": "1xNVIDIA L40S", "gpuRAM": "48 GB"},
     "g4dn.12xlarge": {"cloud": "AWS", "gpu": "4xNVIDIA T4", "gpuRAM": "64 GB"},
     "p4d.24xlarge": {"cloud": "AWS", "gpu": "4xNVIDIA A100", "gpuRAM": "320 GB"},
     "p4de.24xlarge": {"cloud": "AWS", "gpu": "8xNVIDIA A100", "gpuRAM": "320 GB"},

     "g6.12xlarge": {"cloud": "AWS", "gpu": "4xNVIDIA L4", "gpuRAM": "96 GB"},
     "g6.48xlarge": {"cloud": "AWS", "gpu": "8xNVIDIA L4", "gpuRAM": "192 GB"},
     "g6e.2xlarge": {"cloud": "AWS", "gpu": "1xNVIDIA L40S", "gpuRAM": "48 GB"},
+    "g6e.12xlarge": {"cloud": "AWS", "gpu": "4xNVIDIA L40S", "gpuRAM": "192 GB"},
     "g4dn.12xlarge": {"cloud": "AWS", "gpu": "4xNVIDIA T4", "gpuRAM": "64 GB"},
     "p4d.24xlarge": {"cloud": "AWS", "gpu": "4xNVIDIA A100", "gpuRAM": "320 GB"},
     "p4de.24xlarge": {"cloud": "AWS", "gpu": "8xNVIDIA A100", "gpuRAM": "320 GB"},

results_arcee_meraj.py CHANGED Viewed

@@ -12,6 +12,30 @@ results_arcee_meraj = {
             "tokensPerSecond": "33",
             "notes": "",
         },
         {
             "instanceType": "p4d.24xlarge",
             "quantization": "none",

             "tokensPerSecond": "33",
             "notes": "",
         },
+        {
+            "instanceType": "g6e.12xlarge",
+            "quantization": "awq",
+            "container": "vLLM 0.5.5",
+            "status": "OK",
+            "tokensPerSecond": "45",
+            "notes": "",
+        },
+        {
+            "instanceType": "g6e.12xlarge",
+            "quantization": "awq",
+            "container": "TGI 2.2.0",
+            "status": "OK",
+            "tokensPerSecond": "46",
+            "notes": "",
+        },
+        {
+            "instanceType": "g6e.12xlarge",
+            "quantization": "awq",
+            "container": "SGLang 0.2.13",
+            "status": "OK",
+            "tokensPerSecond": "47.1",
+            "notes": "",
+        },
         {
             "instanceType": "p4d.24xlarge",
             "quantization": "none",

results_arcee_nova.py CHANGED Viewed

@@ -108,6 +108,29 @@ results_arcee_nova = {
             "status": "OK",
             "tokensPerSecond": "12",
         },
         {
             "instanceType": "p4d.24xlarge",
             "quantization": "none",

             "status": "OK",
             "tokensPerSecond": "12",
         },
+        {
+            "instanceType": "g6e.12xlarge",
+            "configurations": [
+                {
+                    "quantization": "none",
+                    "container": "TGI 2.2.0",
+                    "status": "OK",
+                    "tokensPerSecond": "17",
+                },
+                {
+                    "quantization": "none",
+                    "container": "vLLM 0.5.5",
+                    "status": "OK",
+                    "tokensPerSecond": "17.8",
+                },
+                {
+                    "quantization": "none",
+                    "container": "SGLang 0.2.13",
+                    "status": "OK",
+                    "tokensPerSecond": "18.2",
+                },
+            ],
+        },
         {
             "instanceType": "p4d.24xlarge",
             "quantization": "none",

results_llama_spark.py CHANGED Viewed

@@ -46,22 +46,48 @@ results_llama_spark = {
         },
         {
             "instanceType": "g6e.2xlarge",
-            "quantization": "none",
-            "status": "OK",
             "configurations": [
-                {"container": "TGI 2.2.0", "tokensPerSecond": "42.1"},
-                {"container": "SGLang 0.2.13", "tokensPerSecond": "45"},
-                {"container": "vLLM 0.5.5", "tokensPerSecond": "43.4"},
             ],
         },
         {
             "instanceType": "g6e.12xlarge",
-            "quantization": "none",
-            "status": "OK",
             "configurations": [
-                {"container": "TGI 2.2.0", "tokensPerSecond": "112"},
-                {"container": "SGLang 0.2.13", "tokensPerSecond": "123"},
-                {"container": "vLLM 0.5.5", "tokensPerSecond": "106"},
             ],
         },
         {

         },
         {
             "instanceType": "g6e.2xlarge",
             "configurations": [
+                {
+                    "container": "TGI 2.2.0",
+                    "quantization": "none",
+                    "status": "OK",
+                    "tokensPerSecond": "42.1",
+                },
+                {
+                    "container": "SGLang 0.2.13",
+                    "quantization": "none",
+                    "status": "OK",
+                    "tokensPerSecond": "45",
+                },
+                {
+                    "container": "vLLM 0.5.5",
+                    "quantization": "none",
+                    "status": "OK",
+                    "tokensPerSecond": "43.4",
+                },
             ],
         },
         {
             "instanceType": "g6e.12xlarge",
             "configurations": [
+                {
+                    "container": "TGI 2.2.0",
+                    "quantization": "none",
+                    "status": "OK",
+                    "tokensPerSecond": "112",
+                },
+                {
+                    "container": "SGLang 0.2.13",
+                    "quantization": "none",
+                    "status": "OK",
+                    "tokensPerSecond": "123",
+                },
+                {
+                    "container": "vLLM 0.5.5",
+                    "quantization": "none",
+                    "status": "OK",
+                    "tokensPerSecond": "106",
+                },
             ],
         },
         {