Spaces:
Running
on
A100
Running
on
A100
MekkCyber
commited on
Commit
·
f71fb6d
1
Parent(s):
e5bb0c6
update repo name
Browse files
app.py
CHANGED
@@ -73,9 +73,7 @@ def quantize_model(model_name, quantization_type, group_size=128, auth_token=Non
|
|
73 |
def save_model(model, model_name, quantization_type, group_size=128, username=None, auth_token=None, quantized_model_name=None):
|
74 |
print("Saving quantized model")
|
75 |
with tempfile.TemporaryDirectory() as tmpdirname:
|
76 |
-
|
77 |
-
with open(os.path.join(tmpdirname, "README.md"), "w") as f:
|
78 |
-
f.write(model_card)
|
79 |
|
80 |
model.save_pretrained(tmpdirname, safe_serialization=False, use_auth_token=auth_token.token)
|
81 |
if quantized_model_name :
|
@@ -86,6 +84,9 @@ def save_model(model, model_name, quantization_type, group_size=128, username=No
|
|
86 |
else :
|
87 |
repo_name = f"{username}/{model_name.split('/')[-1]}-torchao-{quantization_type.lower()}"
|
88 |
|
|
|
|
|
|
|
89 |
# Push to Hub
|
90 |
api = HfApi(token=auth_token.token)
|
91 |
api.create_repo(repo_name, exist_ok=True)
|
@@ -107,8 +108,11 @@ def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToke
|
|
107 |
return exists_message
|
108 |
if quantization_type == "int4_weight_only" and device == "cpu" :
|
109 |
return "int4_weight_only not supported on cpu"
|
110 |
-
|
111 |
-
|
|
|
|
|
|
|
112 |
|
113 |
|
114 |
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
|
|
73 |
def save_model(model, model_name, quantization_type, group_size=128, username=None, auth_token=None, quantized_model_name=None):
|
74 |
print("Saving quantized model")
|
75 |
with tempfile.TemporaryDirectory() as tmpdirname:
|
76 |
+
|
|
|
|
|
77 |
|
78 |
model.save_pretrained(tmpdirname, safe_serialization=False, use_auth_token=auth_token.token)
|
79 |
if quantized_model_name :
|
|
|
84 |
else :
|
85 |
repo_name = f"{username}/{model_name.split('/')[-1]}-torchao-{quantization_type.lower()}"
|
86 |
|
87 |
+
model_card = create_model_card(repo_name, quantization_type, group_size)
|
88 |
+
with open(os.path.join(tmpdirname, "README.md"), "w") as f:
|
89 |
+
f.write(model_card)
|
90 |
# Push to Hub
|
91 |
api = HfApi(token=auth_token.token)
|
92 |
api.create_repo(repo_name, exist_ok=True)
|
|
|
108 |
return exists_message
|
109 |
if quantization_type == "int4_weight_only" and device == "cpu" :
|
110 |
return "int4_weight_only not supported on cpu"
|
111 |
+
try :
|
112 |
+
quantized_model = quantize_model(model_name, quantization_type, group_size, oauth_token, profile.username, device)
|
113 |
+
return save_model(quantized_model, model_name, quantization_type, group_size, profile.username, oauth_token, quantized_model_name)
|
114 |
+
except Exception as e :
|
115 |
+
return e
|
116 |
|
117 |
|
118 |
with gr.Blocks(theme=gr.themes.Soft()) as app:
|