Spaces:

OpenSourceRonin
/

VPTQ_demo

Running on Zero

OpenSourceRonin commited on Oct 16, 2024

Commit

d1789cc

verified ·

1 Parent(s): a005089

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -12,18 +12,22 @@ from huggingface_hub import snapshot_download
 from vptq.app_utils import get_chat_loop_generator
 models = [
     {
         "name": "VPTQ-community/Meta-Llama-3.1-8B-Instruct-v8-k65536-256-woft",
         "bits": "3 bits"
     },
     {
-        "name": "VPTQ-community/Meta-Llama-3.1-70B-Instruct-v16-k65536-65536-woft",
-        "bits": "2 bits"
     },
     {
-        "name": "VPTQ-community/Qwen2.5-72B-Instruct-v16-k65536-65536-woft",
-        "bits": "2 bits"
-    }
 ]
 # Queues for storing historical data (saving the last 100 GPU utilization and memory usage values)

 from vptq.app_utils import get_chat_loop_generator
 models = [
+    {
+        "name": "VPTQ-community/Meta-Llama-3.1-8B-Instruct-v12-k65536-4096-woft",
+        "bits": "2.3 bits"
+    },
     {
         "name": "VPTQ-community/Meta-Llama-3.1-8B-Instruct-v8-k65536-256-woft",
         "bits": "3 bits"
     },
     {
+        "name": "VPTQ-community/Meta-Llama-3.1-8B-Instruct-v8-k65536-4096-woft",
+        "bits": "3.5 bits"
     },
     {
+        "name": "VPTQ-community/Meta-Llama-3.1-70B-Instruct-v8-k32768-0-woft",
+        "bits": "1.85 bits"
+    },
 ]
 # Queues for storing historical data (saving the last 100 GPU utilization and memory usage values)