Spaces:
Running
Running
Upload app.py
Browse files
app.py
CHANGED
@@ -6,7 +6,7 @@ import os
|
|
6 |
import tempfile
|
7 |
import shutil
|
8 |
import urllib
|
9 |
-
from huggingface_hub import whoami, HfApi, hf_hub_download
|
10 |
from huggingface_hub.utils import build_hf_headers, hf_raise_for_status
|
11 |
from gradio_huggingfacehub_search import HuggingfaceHubSearch
|
12 |
|
@@ -15,10 +15,23 @@ ENDPOINT = "https://huggingface.co"
|
|
15 |
|
16 |
REPO_TYPES = ["model", "dataset", "space"]
|
17 |
HF_REPO = os.environ.get("HF_REPO") if os.environ.get("HF_REPO") else "" # set your default repo
|
|
|
|
|
|
|
18 |
REGEX_HF_REPO = r'^[\w_\-\.]+/[\w_\-\.]+$'
|
19 |
|
20 |
-
def
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
hf_token = oauth_token.token
|
23 |
api = HfApi(token=hf_token)
|
24 |
try:
|
@@ -41,7 +54,9 @@ def duplicate(source_repo, dst_repo, repo_type, private, overwrite, auto_dir, oa
|
|
41 |
subfolder = subfolder.removesuffix("/")
|
42 |
if auto_dir: subfolder = source_repo
|
43 |
|
44 |
-
if overwrite and api.repo_exists(repo_id=dst_repo, repo_type=repo_type, token=hf_token)
|
|
|
|
|
45 |
temp_dir = tempfile.mkdtemp()
|
46 |
api.create_repo(repo_id=dst_repo, repo_type=repo_type, private=private, exist_ok=True, token=hf_token)
|
47 |
for path in api.list_repo_files(repo_id=source_repo, repo_type=repo_type, token=hf_token):
|
@@ -67,6 +82,8 @@ def duplicate(source_repo, dst_repo, repo_type, private, overwrite, auto_dir, oa
|
|
67 |
|
68 |
repo_url = r.json().get("url")
|
69 |
|
|
|
|
|
70 |
return (
|
71 |
f'Find your repo <a href=\'{repo_url}\' target="_blank" style="text-decoration:underline">here</a>',
|
72 |
"sp.jpg",
|
@@ -76,41 +93,207 @@ def duplicate(source_repo, dst_repo, repo_type, private, overwrite, auto_dir, oa
|
|
76 |
print(e)
|
77 |
raise gr.Error(f"Error occured: {e}")
|
78 |
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
|
103 |
def swap_visibilty(profile: gr.OAuthProfile | None):
|
104 |
return gr.update(elem_classes=["main_ui_logged_in"]) if profile else gr.update(elem_classes=["main_ui_logged_out"])
|
105 |
-
|
106 |
css = '''
|
107 |
.main_ui_logged_out{opacity: 0.3; pointer-events: none}
|
|
|
108 |
'''
|
109 |
with gr.Blocks(css=css) as demo:
|
110 |
gr.LoginButton()
|
111 |
with gr.Column(elem_classes="main_ui_logged_out") as main_ui:
|
112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
demo.load(fn=swap_visibilty, outputs=main_ui)
|
114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
demo.queue()
|
116 |
demo.launch()
|
|
|
6 |
import tempfile
|
7 |
import shutil
|
8 |
import urllib
|
9 |
+
from huggingface_hub import whoami, HfApi, hf_hub_download, RepoCard
|
10 |
from huggingface_hub.utils import build_hf_headers, hf_raise_for_status
|
11 |
from gradio_huggingfacehub_search import HuggingfaceHubSearch
|
12 |
|
|
|
15 |
|
16 |
REPO_TYPES = ["model", "dataset", "space"]
|
17 |
HF_REPO = os.environ.get("HF_REPO") if os.environ.get("HF_REPO") else "" # set your default repo
|
18 |
+
HF_REPO_PREFIX = os.environ.get("HF_REPO_PREFIX") if os.environ.get("HF_REPO_PREFIX") else "" # set your default repo prefix
|
19 |
+
HF_REPO_SUFFIX = os.environ.get("HF_REPO_SUFFIX") if os.environ.get("HF_REPO_SUFFIX") else "" # set your default repo suffix
|
20 |
+
HF_USER = os.environ.get("HF_USER") if os.environ.get("HF_USER") else "" # set your username
|
21 |
REGEX_HF_REPO = r'^[\w_\-\.]+/[\w_\-\.]+$'
|
22 |
|
23 |
+
def remove_repo_tags(repo_id: str, tags: list[str], repo_type: str, hf_token: str):
|
24 |
+
try:
|
25 |
+
card = RepoCard.load(repo_id, repo_type=repo_type, token=hf_token)
|
26 |
+
orig_content = card.content
|
27 |
+
for tag in tags:
|
28 |
+
if 'tags' in card.data and tag in card.data['tags']: card.data['tags'].remove(tag)
|
29 |
+
if card.content == orig_content: return
|
30 |
+
card.push_to_hub(repo_id=repo_id, repo_type=repo_type, token=hf_token)
|
31 |
+
except Exception as e:
|
32 |
+
print(f"Failed to remove tags from repocard. {e}")
|
33 |
+
|
34 |
+
def duplicate(source_repo, dst_repo, repo_type, private, overwrite, auto_dir, remove_tag, oauth_token: gr.OAuthToken | None, progress=gr.Progress(track_tqdm=True)):
|
35 |
hf_token = oauth_token.token
|
36 |
api = HfApi(token=hf_token)
|
37 |
try:
|
|
|
54 |
subfolder = subfolder.removesuffix("/")
|
55 |
if auto_dir: subfolder = source_repo
|
56 |
|
57 |
+
if not overwrite and api.repo_exists(repo_id=dst_repo, repo_type=repo_type, token=hf_token): raise gr.Error(f"Repo already exists {dst_repo}")
|
58 |
+
|
59 |
+
if overwrite or subfolder:
|
60 |
temp_dir = tempfile.mkdtemp()
|
61 |
api.create_repo(repo_id=dst_repo, repo_type=repo_type, private=private, exist_ok=True, token=hf_token)
|
62 |
for path in api.list_repo_files(repo_id=source_repo, repo_type=repo_type, token=hf_token):
|
|
|
82 |
|
83 |
repo_url = r.json().get("url")
|
84 |
|
85 |
+
if remove_tag: remove_repo_tags(dst_repo, ["not-for-all-audiences"], repo_type, hf_token)
|
86 |
+
|
87 |
return (
|
88 |
f'Find your repo <a href=\'{repo_url}\' target="_blank" style="text-decoration:underline">here</a>',
|
89 |
"sp.jpg",
|
|
|
93 |
print(e)
|
94 |
raise gr.Error(f"Error occured: {e}")
|
95 |
|
96 |
+
def parse_repos(s):
|
97 |
+
repo_pattern = r'[^\w_\-\.]?([\w_\-\.]+/[\w_\-\.]+)[^\w_\-\.]?'
|
98 |
+
try:
|
99 |
+
s = re.sub("https?://[\\w/:%#\\$&\\?\\(\\)~\\.=\\+\\-]+", "", s)
|
100 |
+
repos = re.findall(repo_pattern, s)
|
101 |
+
return list(repos)
|
102 |
+
except Exception:
|
103 |
+
return []
|
104 |
+
|
105 |
+
def duplicate_m2o(source_repos_str, dst_repo, repo_type, private, overwrite, oauth_token: gr.OAuthToken | None, progress=gr.Progress(track_tqdm=True)):
|
106 |
+
hf_token = oauth_token.token
|
107 |
+
api = HfApi(token=hf_token)
|
108 |
+
try:
|
109 |
+
if not repo_type in REPO_TYPES:
|
110 |
+
raise ValueError("need to select valid repo type")
|
111 |
+
_ = whoami(oauth_token.token)
|
112 |
+
# ^ this will throw if token is invalid
|
113 |
+
except Exception as e:
|
114 |
+
raise gr.Error(f"""Oops, you forgot to login. Please use the loggin button on the top left to migrate your repo {e}""")
|
115 |
+
|
116 |
+
try:
|
117 |
+
if re.fullmatch(REGEX_HF_REPO, dst_repo): subfolder_prefix = ""
|
118 |
+
else:
|
119 |
+
dst_repo, subfolder_prefix = re.findall(r'^([\w_\-\.]+/[\w_\-\.]+)/?(.+)?$', dst_repo)[0]
|
120 |
+
subfolder_prefix = subfolder.removesuffix("/")
|
121 |
+
if not overwrite and api.repo_exists(repo_id=dst_repo, repo_type=repo_type, token=hf_token): raise gr.Error(f"Repo already exists {dst_repo}")
|
122 |
+
source_repos = parse_repos(source_repos_str)
|
123 |
+
for source_repo in source_repos:
|
124 |
+
if re.fullmatch(REGEX_HF_REPO, source_repo): target = ""
|
125 |
+
else:
|
126 |
+
source_repo, target = re.findall(r'^(?:http.+\.co/)?(?:datasets)?(?:spaces)?([\w_\-\.]+/[\w_\-\.]+)/?(?:blob/main/)?(?:resolve/main/)?(.+)?$', source_repo)[0]
|
127 |
+
target = urllib.parse.unquote(target.removesuffix("/"))
|
128 |
+
|
129 |
+
subfolder = subfolder_prefix + "/" + source_repo if subfolder_prefix else source_repo
|
130 |
+
|
131 |
+
temp_dir = tempfile.mkdtemp()
|
132 |
+
api.create_repo(repo_id=dst_repo, repo_type=repo_type, private=private, exist_ok=True, token=hf_token)
|
133 |
+
for path in api.list_repo_files(repo_id=source_repo, repo_type=repo_type, token=hf_token):
|
134 |
+
if target and target not in path: continue
|
135 |
+
file = hf_hub_download(repo_id=source_repo, filename=path, repo_type=repo_type, local_dir=temp_dir, token=hf_token)
|
136 |
+
if not Path(file).exists(): continue
|
137 |
+
if Path(file).is_dir(): # unused for now
|
138 |
+
api.upload_folder(repo_id=dst_repo, folder_path=file, path_in_repo=f"{subfolder}/{path}" if subfolder else path, repo_type=repo_type, token=hf_token)
|
139 |
+
elif Path(file).is_file():
|
140 |
+
api.upload_file(repo_id=dst_repo, path_or_fileobj=file, path_in_repo=f"{subfolder}/{path}" if subfolder else path, repo_type=repo_type, token=hf_token)
|
141 |
+
if Path(file).exists(): Path(file).unlink()
|
142 |
+
if repo_type == "dataset": repo_url = f"https://huggingface.co/datasets/{dst_repo}"
|
143 |
+
elif repo_type == "space": repo_url = f"https://huggingface.co/spaces/{dst_repo}"
|
144 |
+
else: repo_url = f"https://huggingface.co/{dst_repo}"
|
145 |
+
shutil.rmtree(temp_dir)
|
146 |
+
|
147 |
+
return (
|
148 |
+
f'Find your repo <a href=\'{repo_url}\' target="_blank" style="text-decoration:underline">here</a>',
|
149 |
+
"sp.jpg",
|
150 |
+
)
|
151 |
+
|
152 |
+
except Exception as e:
|
153 |
+
print(e)
|
154 |
+
raise gr.Error(f"Error occured: {e}")
|
155 |
+
|
156 |
+
def duplicate_m2m(source_repos_str, hf_user, repo_type, private, overwrite, remove_tag, repo_prefix, repo_suffix, oauth_token: gr.OAuthToken | None, progress=gr.Progress(track_tqdm=True)):
|
157 |
+
hf_token = oauth_token.token
|
158 |
+
api = HfApi(token=hf_token)
|
159 |
+
try:
|
160 |
+
if not repo_type in REPO_TYPES:
|
161 |
+
raise ValueError("need to select valid repo type")
|
162 |
+
_ = whoami(oauth_token.token)
|
163 |
+
# ^ this will throw if token is invalid
|
164 |
+
except Exception as e:
|
165 |
+
raise gr.Error(f"""Oops, you forgot to login. Please use the loggin button on the top left to migrate your repo {e}""")
|
166 |
+
|
167 |
+
try:
|
168 |
+
source_repos = parse_repos(source_repos_str)
|
169 |
+
repo_url_result = 'Find your repo '
|
170 |
+
for source_repo in source_repos:
|
171 |
+
if not re.fullmatch(REGEX_HF_REPO, source_repo) or not api.repo_exists(repo_id=source_repo, repo_type=repo_type, token=hf_token): continue
|
172 |
+
dst_repo = hf_user + "/" + repo_prefix + source_repo.split("/")[-1] + repo_suffix
|
173 |
+
if not re.fullmatch(REGEX_HF_REPO, dst_repo): continue
|
174 |
+
if not overwrite and api.repo_exists(repo_id=dst_repo, repo_type=repo_type, token=hf_token):
|
175 |
+
gr.Info(f"Repo already exists {dst_repo}")
|
176 |
+
continue
|
177 |
+
|
178 |
+
r = requests.post(
|
179 |
+
f"{ENDPOINT}/api/{repo_type}s/{source_repo}/duplicate",
|
180 |
+
headers=build_hf_headers(token=oauth_token.token),
|
181 |
+
json={"repository": dst_repo, "private": private},
|
182 |
+
)
|
183 |
+
hf_raise_for_status(r)
|
184 |
+
|
185 |
+
repo_url = r.json().get("url")
|
186 |
+
repo_url_result += f'<a href=\'{repo_url}\' target="_blank" style="text-decoration:underline">{dst_repo}</a><br>\n'
|
187 |
+
|
188 |
+
if remove_tag: remove_repo_tags(dst_repo, ["not-for-all-audiences"], repo_type, hf_token)
|
189 |
+
|
190 |
+
return (
|
191 |
+
repo_url_result,
|
192 |
+
"sp.jpg",
|
193 |
+
)
|
194 |
+
|
195 |
+
except Exception as e:
|
196 |
+
print(e)
|
197 |
+
raise gr.Error(f"Error occured: {e}")
|
198 |
+
|
199 |
+
def add_repo_text(repo_id: str, source_repos: str):
|
200 |
+
return source_repos + "\n" + repo_id if source_repos else repo_id
|
201 |
|
202 |
def swap_visibilty(profile: gr.OAuthProfile | None):
|
203 |
return gr.update(elem_classes=["main_ui_logged_in"]) if profile else gr.update(elem_classes=["main_ui_logged_out"])
|
204 |
+
|
205 |
css = '''
|
206 |
.main_ui_logged_out{opacity: 0.3; pointer-events: none}
|
207 |
+
.title {text-align: center; align-items: center}
|
208 |
'''
|
209 |
with gr.Blocks(css=css) as demo:
|
210 |
gr.LoginButton()
|
211 |
with gr.Column(elem_classes="main_ui_logged_out") as main_ui:
|
212 |
+
gr.Markdown("# Duplicate your repo!", elem_classes="title")
|
213 |
+
gr.Markdown("Duplicate a Hugging Face repository! This Space is a an experimental demo.")
|
214 |
+
with gr.Tab("One to One"):
|
215 |
+
with gr.Row():
|
216 |
+
with gr.Column():
|
217 |
+
search = HuggingfaceHubSearch(
|
218 |
+
label="source_repo",
|
219 |
+
placeholder="Source repository (e.g. osanseviero/src)",
|
220 |
+
search_type=["model", "dataset", "space"],
|
221 |
+
sumbit_on_select=False,
|
222 |
+
)
|
223 |
+
with gr.Group():
|
224 |
+
dst_repo = gr.Textbox(label="dst_repo", placeholder="Destination repository (e.g. osanseviero/dst)", value=HF_REPO)
|
225 |
+
repo_type = gr.Dropdown(label="repo_type", choices=REPO_TYPES, value="model")
|
226 |
+
with gr.Row():
|
227 |
+
is_private = gr.Checkbox(label="Make new repo private?", value=True)
|
228 |
+
is_overwrite = gr.Checkbox(label="Overwrite existing repo?", value=True)
|
229 |
+
is_subdir = gr.Checkbox(label="Create subdirectories automatically?", value=True)
|
230 |
+
is_remtag = gr.Checkbox(label="Remove NFAA tag?", value=True)
|
231 |
+
with gr.Row():
|
232 |
+
submit_button = gr.Button("Submit", variant="primary")
|
233 |
+
clear_button = gr.Button("Clear", variant="secondary")
|
234 |
+
with gr.Column():
|
235 |
+
output_md = gr.Markdown(label="output")
|
236 |
+
output_image = gr.Image(show_label=False)
|
237 |
+
with gr.Tab("Multi to One"):
|
238 |
+
with gr.Row():
|
239 |
+
with gr.Column():
|
240 |
+
m2o_search = HuggingfaceHubSearch(
|
241 |
+
label="source_repo",
|
242 |
+
placeholder="Source repository (e.g. osanseviero/src)",
|
243 |
+
search_type=["model", "dataset", "space"],
|
244 |
+
sumbit_on_select=True,
|
245 |
+
)
|
246 |
+
m2o_source_repos = gr.Textbox(label="source_repos", placeholder="Source repositories (e.g. osanseviero/src)\n...", value="", lines=10)
|
247 |
+
with gr.Group():
|
248 |
+
m2o_dst_repo = gr.Textbox(label="dst_repo", placeholder="Destination repository (e.g. osanseviero/dst)", value=HF_REPO)
|
249 |
+
m2o_repo_type = gr.Dropdown(label="repo_type", choices=REPO_TYPES, value="model")
|
250 |
+
with gr.Row():
|
251 |
+
m2o_is_private = gr.Checkbox(label="Make new repo private?", value=True)
|
252 |
+
m2o_is_overwrite = gr.Checkbox(label="Overwrite existing repo?", value=True)
|
253 |
+
with gr.Row():
|
254 |
+
m2o_submit_button = gr.Button("Submit", variant="primary")
|
255 |
+
m2o_clear_button = gr.Button("Clear", variant="secondary")
|
256 |
+
with gr.Column():
|
257 |
+
m2o_output_md = gr.Markdown(label="output")
|
258 |
+
m2o_output_image = gr.Image(show_label=False)
|
259 |
+
with gr.Tab("Multi to Multi"):
|
260 |
+
with gr.Row():
|
261 |
+
with gr.Column():
|
262 |
+
m2m_search = HuggingfaceHubSearch(
|
263 |
+
label="source_repo",
|
264 |
+
placeholder="Source repository (e.g. osanseviero/src)",
|
265 |
+
search_type=["model", "dataset", "space"],
|
266 |
+
sumbit_on_select=True,
|
267 |
+
)
|
268 |
+
m2m_source_repos = gr.Textbox(label="source_repos", placeholder="Source repositories (e.g. osanseviero/src)\n...", value="", lines=10)
|
269 |
+
with gr.Group():
|
270 |
+
with gr.Row():
|
271 |
+
m2m_user = gr.Textbox(label="hf_user", placeholder="Your HF username", value=HF_USER)
|
272 |
+
m2m_prefix = gr.Textbox(label="repo_prefix", value=HF_REPO_PREFIX)
|
273 |
+
m2m_suffix = gr.Textbox(label="repo_suffix", value=HF_REPO_SUFFIX)
|
274 |
+
m2m_repo_type = gr.Dropdown(label="repo_type", choices=REPO_TYPES, value="model")
|
275 |
+
with gr.Row():
|
276 |
+
m2m_is_private = gr.Checkbox(label="Make new repo private?", value=True)
|
277 |
+
m2m_is_overwrite = gr.Checkbox(label="Overwrite existing repo?", value=False)
|
278 |
+
m2m_is_remtag = gr.Checkbox(label="Remove NFAA tag?", value=True)
|
279 |
+
with gr.Row():
|
280 |
+
m2m_submit_button = gr.Button("Submit", variant="primary")
|
281 |
+
m2m_clear_button = gr.Button("Clear", variant="secondary")
|
282 |
+
with gr.Column():
|
283 |
+
m2m_output_md = gr.Markdown(label="output")
|
284 |
+
m2m_output_image = gr.Image(show_label=False)
|
285 |
demo.load(fn=swap_visibilty, outputs=main_ui)
|
286 |
+
submit_button.click(duplicate, [search, dst_repo, repo_type, is_private, is_overwrite, is_subdir, is_remtag], [output_md, output_image])
|
287 |
+
clear_button.click(lambda: ("", HF_REPO, "model", True, True, True, True), None, [search, dst_repo, repo_type, is_private, is_overwrite, is_subdir, is_remtag], queue=False)
|
288 |
+
m2o_search.submit(add_repo_text, [m2o_search, m2o_source_repos], [m2o_source_repos], queue=False)
|
289 |
+
m2o_submit_button.click(duplicate_m2o, [m2o_source_repos, m2o_dst_repo, m2o_repo_type, m2o_is_private, m2o_is_overwrite], [m2o_output_md, m2o_output_image])
|
290 |
+
m2o_clear_button.click(lambda: ("", HF_REPO, "model", True, True, ""), None,
|
291 |
+
[m2o_search, m2o_dst_repo, m2o_repo_type, m2o_is_private, m2o_is_overwrite, m2o_source_repos], queue=False)
|
292 |
+
m2m_search.submit(add_repo_text, [m2m_search, m2m_source_repos], [m2m_source_repos], queue=False)
|
293 |
+
m2m_submit_button.click(duplicate_m2m, [m2m_source_repos, m2m_user, m2m_repo_type, m2m_is_private, m2m_is_overwrite, m2m_is_remtag, m2m_prefix, m2m_suffix],
|
294 |
+
[m2m_output_md, m2m_output_image])
|
295 |
+
m2m_clear_button.click(lambda: ("", HF_USER, "model", True, False, True, "", HF_REPO_PREFIX, HF_REPO_SUFFIX), None,
|
296 |
+
[m2m_search, m2m_user, m2m_repo_type, m2m_is_private, m2m_is_overwrite, m2m_is_remtag, m2m_source_repos, m2m_prefix, m2m_suffix], queue=False)
|
297 |
+
|
298 |
demo.queue()
|
299 |
demo.launch()
|