Spaces:

medmekk
/

TorchAO_Quantization

Running on A100

MekkCyber commited on Oct 22, 2024

Commit

1c806d1

1 Parent(s): f71fb6d

add zero gpu

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ from huggingface_hub import HfApi
 from huggingface_hub import list_models
 from packaging import version
 import os
 def hello(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None) -> str:
     # ^ expect a gr.OAuthProfile object as input to get the user's profile
@@ -59,6 +60,7 @@ model = AutoModel.from_pretrained("{model_name}")"""
     return model_card
 def quantize_model(model_name, quantization_type, group_size=128, auth_token=None, username=None, device="cuda"):
     print(f"Quantizing model: {quantization_type}")
     if quantization_type == "int4_weight_only" :

 from huggingface_hub import list_models
 from packaging import version
 import os
+import spaces
 def hello(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None) -> str:
     # ^ expect a gr.OAuthProfile object as input to get the user's profile
     return model_card
+@spaces.GPU
 def quantize_model(model_name, quantization_type, group_size=128, auth_token=None, username=None, device="cuda"):
     print(f"Quantizing model: {quantization_type}")
     if quantization_type == "int4_weight_only" :