Spaces:

medmekk
/

TorchAO_Quantization

Running on A100

App Files Files Community

MekkCyber commited on Oct 22, 2024

Commit

f71fb6d

1 Parent(s): e5bb0c6

update repo name

Browse files

Files changed (1) hide show

app.py +9 -5

app.py CHANGED Viewed

@@ -73,9 +73,7 @@ def quantize_model(model_name, quantization_type, group_size=128, auth_token=Non
 def save_model(model, model_name, quantization_type, group_size=128, username=None, auth_token=None, quantized_model_name=None):
     print("Saving quantized model")
     with tempfile.TemporaryDirectory() as tmpdirname:
-        model_card = create_model_card(model_name, quantization_type, group_size)
-        with open(os.path.join(tmpdirname, "README.md"), "w") as f:
-            f.write(model_card)
         model.save_pretrained(tmpdirname, safe_serialization=False, use_auth_token=auth_token.token)
         if quantized_model_name :
@@ -86,6 +84,9 @@ def save_model(model, model_name, quantization_type, group_size=128, username=No
             else :
                 repo_name = f"{username}/{model_name.split('/')[-1]}-torchao-{quantization_type.lower()}"
         # Push to Hub
         api = HfApi(token=auth_token.token)
         api.create_repo(repo_name, exist_ok=True)
@@ -107,8 +108,11 @@ def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToke
         return exists_message
     if quantization_type == "int4_weight_only" and device == "cpu" :
         return "int4_weight_only not supported on cpu"
-    quantized_model = quantize_model(model_name, quantization_type, group_size, oauth_token, profile.username, device)
-    return save_model(quantized_model, model_name, quantization_type, group_size, profile.username, oauth_token, quantized_model_name)
 with gr.Blocks(theme=gr.themes.Soft()) as app:

 def save_model(model, model_name, quantization_type, group_size=128, username=None, auth_token=None, quantized_model_name=None):
     print("Saving quantized model")
     with tempfile.TemporaryDirectory() as tmpdirname:
         model.save_pretrained(tmpdirname, safe_serialization=False, use_auth_token=auth_token.token)
         if quantized_model_name :
             else :
                 repo_name = f"{username}/{model_name.split('/')[-1]}-torchao-{quantization_type.lower()}"
+        model_card = create_model_card(repo_name, quantization_type, group_size)
+        with open(os.path.join(tmpdirname, "README.md"), "w") as f:
+            f.write(model_card)
         # Push to Hub
         api = HfApi(token=auth_token.token)
         api.create_repo(repo_name, exist_ok=True)
         return exists_message
     if quantization_type == "int4_weight_only" and device == "cpu" :
         return "int4_weight_only not supported on cpu"
+    try :
+        quantized_model = quantize_model(model_name, quantization_type, group_size, oauth_token, profile.username, device)
+        return save_model(quantized_model, model_name, quantization_type, group_size, profile.username, oauth_token, quantized_model_name)
+    except Exception as e :
+        return e
 with gr.Blocks(theme=gr.themes.Soft()) as app: