Spaces:

OpenSourceRonin
/

VPTQ_demo

Running on Zero

OpenSourceRonin commited on Oct 16, 2024

Commit

a005089

verified ·

1 Parent(s): 0ec2418

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -16,26 +16,14 @@ models = [
         "name": "VPTQ-community/Meta-Llama-3.1-8B-Instruct-v8-k65536-256-woft",
         "bits": "3 bits"
     },
-    {
-        "name": "VPTQ-community/Meta-Llama-3.1-8B-Instruct-v8-k65536-65536-woft",
-        "bits": "4 bits"
-    },
     {
         "name": "VPTQ-community/Meta-Llama-3.1-70B-Instruct-v16-k65536-65536-woft",
         "bits": "2 bits"
     },
-    {
-        "name": "VPTQ-community/Meta-Llama-3.1-70B-Instruct-v8-k65536-256-woft",
-        "bits": "3 bits"
-    },
     {
         "name": "VPTQ-community/Qwen2.5-72B-Instruct-v16-k65536-65536-woft",
         "bits": "2 bits"
-    },
-    {
-        "name": "VPTQ-community/Qwen2.5-72B-Instruct-v8-k65536-256-woft",
-        "bits": "3 bits"
-    },
 ]
 # Queues for storing historical data (saving the last 100 GPU utilization and memory usage values)

         "name": "VPTQ-community/Meta-Llama-3.1-8B-Instruct-v8-k65536-256-woft",
         "bits": "3 bits"
     },
     {
         "name": "VPTQ-community/Meta-Llama-3.1-70B-Instruct-v16-k65536-65536-woft",
         "bits": "2 bits"
     },
     {
         "name": "VPTQ-community/Qwen2.5-72B-Instruct-v16-k65536-65536-woft",
         "bits": "2 bits"
+    }
 ]
 # Queues for storing historical data (saving the last 100 GPU utilization and memory usage values)