John6666 commited on
Commit
269ab06
1 Parent(s): 23c1766

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +213 -30
app.py CHANGED
@@ -6,7 +6,7 @@ import os
6
  import tempfile
7
  import shutil
8
  import urllib
9
- from huggingface_hub import whoami, HfApi, hf_hub_download
10
  from huggingface_hub.utils import build_hf_headers, hf_raise_for_status
11
  from gradio_huggingfacehub_search import HuggingfaceHubSearch
12
 
@@ -15,10 +15,23 @@ ENDPOINT = "https://huggingface.co"
15
 
16
  REPO_TYPES = ["model", "dataset", "space"]
17
  HF_REPO = os.environ.get("HF_REPO") if os.environ.get("HF_REPO") else "" # set your default repo
 
 
 
18
  REGEX_HF_REPO = r'^[\w_\-\.]+/[\w_\-\.]+$'
19
 
20
- def duplicate(source_repo, dst_repo, repo_type, private, overwrite, auto_dir, oauth_token: gr.OAuthToken | None, progress=gr.Progress(track_tqdm=True)):
21
- print(oauth_token.token)
 
 
 
 
 
 
 
 
 
 
22
  hf_token = oauth_token.token
23
  api = HfApi(token=hf_token)
24
  try:
@@ -41,7 +54,9 @@ def duplicate(source_repo, dst_repo, repo_type, private, overwrite, auto_dir, oa
41
  subfolder = subfolder.removesuffix("/")
42
  if auto_dir: subfolder = source_repo
43
 
44
- if overwrite and api.repo_exists(repo_id=dst_repo, repo_type=repo_type, token=hf_token) or subfolder:
 
 
45
  temp_dir = tempfile.mkdtemp()
46
  api.create_repo(repo_id=dst_repo, repo_type=repo_type, private=private, exist_ok=True, token=hf_token)
47
  for path in api.list_repo_files(repo_id=source_repo, repo_type=repo_type, token=hf_token):
@@ -67,6 +82,8 @@ def duplicate(source_repo, dst_repo, repo_type, private, overwrite, auto_dir, oa
67
 
68
  repo_url = r.json().get("url")
69
 
 
 
70
  return (
71
  f'Find your repo <a href=\'{repo_url}\' target="_blank" style="text-decoration:underline">here</a>',
72
  "sp.jpg",
@@ -76,41 +93,207 @@ def duplicate(source_repo, dst_repo, repo_type, private, overwrite, auto_dir, oa
76
  print(e)
77
  raise gr.Error(f"Error occured: {e}")
78
 
79
- interface = gr.Interface(
80
- fn=duplicate,
81
- inputs=[
82
- HuggingfaceHubSearch(
83
- placeholder="Source repository (e.g. osanseviero/src)",
84
- search_type=["model", "dataset", "space"],
85
- sumbit_on_select=False,
86
- ),
87
- gr.Textbox(placeholder="Destination repository (e.g. osanseviero/dst)", value=HF_REPO),
88
- gr.Dropdown(choices=REPO_TYPES, value="model"),
89
- gr.Checkbox(label="Make new repo private?", value=True),
90
- gr.Checkbox(label="Overwrite existing repo?", value=True),
91
- gr.Checkbox(label="Create subdirectories automatically?", value=True),
92
- ],
93
- outputs=[
94
- gr.Markdown(label="output"),
95
- gr.Image(show_label=False),
96
- ],
97
- title="Duplicate your repo!",
98
- description="Duplicate a Hugging Face repository! This Space is a an experimental demo.",
99
- allow_flagging="never",
100
- live=False
101
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
  def swap_visibilty(profile: gr.OAuthProfile | None):
104
  return gr.update(elem_classes=["main_ui_logged_in"]) if profile else gr.update(elem_classes=["main_ui_logged_out"])
105
-
106
  css = '''
107
  .main_ui_logged_out{opacity: 0.3; pointer-events: none}
 
108
  '''
109
  with gr.Blocks(css=css) as demo:
110
  gr.LoginButton()
111
  with gr.Column(elem_classes="main_ui_logged_out") as main_ui:
112
- interface.render()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  demo.load(fn=swap_visibilty, outputs=main_ui)
114
-
 
 
 
 
 
 
 
 
 
 
 
115
  demo.queue()
116
  demo.launch()
 
6
  import tempfile
7
  import shutil
8
  import urllib
9
+ from huggingface_hub import whoami, HfApi, hf_hub_download, RepoCard
10
  from huggingface_hub.utils import build_hf_headers, hf_raise_for_status
11
  from gradio_huggingfacehub_search import HuggingfaceHubSearch
12
 
 
15
 
16
  REPO_TYPES = ["model", "dataset", "space"]
17
  HF_REPO = os.environ.get("HF_REPO") if os.environ.get("HF_REPO") else "" # set your default repo
18
+ HF_REPO_PREFIX = os.environ.get("HF_REPO_PREFIX") if os.environ.get("HF_REPO_PREFIX") else "" # set your default repo prefix
19
+ HF_REPO_SUFFIX = os.environ.get("HF_REPO_SUFFIX") if os.environ.get("HF_REPO_SUFFIX") else "" # set your default repo suffix
20
+ HF_USER = os.environ.get("HF_USER") if os.environ.get("HF_USER") else "" # set your username
21
  REGEX_HF_REPO = r'^[\w_\-\.]+/[\w_\-\.]+$'
22
 
23
+ def remove_repo_tags(repo_id: str, tags: list[str], repo_type: str, hf_token: str):
24
+ try:
25
+ card = RepoCard.load(repo_id, repo_type=repo_type, token=hf_token)
26
+ orig_content = card.content
27
+ for tag in tags:
28
+ if 'tags' in card.data and tag in card.data['tags']: card.data['tags'].remove(tag)
29
+ if card.content == orig_content: return
30
+ card.push_to_hub(repo_id=repo_id, repo_type=repo_type, token=hf_token)
31
+ except Exception as e:
32
+ print(f"Failed to remove tags from repocard. {e}")
33
+
34
+ def duplicate(source_repo, dst_repo, repo_type, private, overwrite, auto_dir, remove_tag, oauth_token: gr.OAuthToken | None, progress=gr.Progress(track_tqdm=True)):
35
  hf_token = oauth_token.token
36
  api = HfApi(token=hf_token)
37
  try:
 
54
  subfolder = subfolder.removesuffix("/")
55
  if auto_dir: subfolder = source_repo
56
 
57
+ if not overwrite and api.repo_exists(repo_id=dst_repo, repo_type=repo_type, token=hf_token): raise gr.Error(f"Repo already exists {dst_repo}")
58
+
59
+ if overwrite or subfolder:
60
  temp_dir = tempfile.mkdtemp()
61
  api.create_repo(repo_id=dst_repo, repo_type=repo_type, private=private, exist_ok=True, token=hf_token)
62
  for path in api.list_repo_files(repo_id=source_repo, repo_type=repo_type, token=hf_token):
 
82
 
83
  repo_url = r.json().get("url")
84
 
85
+ if remove_tag: remove_repo_tags(dst_repo, ["not-for-all-audiences"], repo_type, hf_token)
86
+
87
  return (
88
  f'Find your repo <a href=\'{repo_url}\' target="_blank" style="text-decoration:underline">here</a>',
89
  "sp.jpg",
 
93
  print(e)
94
  raise gr.Error(f"Error occured: {e}")
95
 
96
+ def parse_repos(s):
97
+ repo_pattern = r'[^\w_\-\.]?([\w_\-\.]+/[\w_\-\.]+)[^\w_\-\.]?'
98
+ try:
99
+ s = re.sub("https?://[\\w/:%#\\$&\\?\\(\\)~\\.=\\+\\-]+", "", s)
100
+ repos = re.findall(repo_pattern, s)
101
+ return list(repos)
102
+ except Exception:
103
+ return []
104
+
105
+ def duplicate_m2o(source_repos_str, dst_repo, repo_type, private, overwrite, oauth_token: gr.OAuthToken | None, progress=gr.Progress(track_tqdm=True)):
106
+ hf_token = oauth_token.token
107
+ api = HfApi(token=hf_token)
108
+ try:
109
+ if not repo_type in REPO_TYPES:
110
+ raise ValueError("need to select valid repo type")
111
+ _ = whoami(oauth_token.token)
112
+ # ^ this will throw if token is invalid
113
+ except Exception as e:
114
+ raise gr.Error(f"""Oops, you forgot to login. Please use the loggin button on the top left to migrate your repo {e}""")
115
+
116
+ try:
117
+ if re.fullmatch(REGEX_HF_REPO, dst_repo): subfolder_prefix = ""
118
+ else:
119
+ dst_repo, subfolder_prefix = re.findall(r'^([\w_\-\.]+/[\w_\-\.]+)/?(.+)?$', dst_repo)[0]
120
+ subfolder_prefix = subfolder.removesuffix("/")
121
+ if not overwrite and api.repo_exists(repo_id=dst_repo, repo_type=repo_type, token=hf_token): raise gr.Error(f"Repo already exists {dst_repo}")
122
+ source_repos = parse_repos(source_repos_str)
123
+ for source_repo in source_repos:
124
+ if re.fullmatch(REGEX_HF_REPO, source_repo): target = ""
125
+ else:
126
+ source_repo, target = re.findall(r'^(?:http.+\.co/)?(?:datasets)?(?:spaces)?([\w_\-\.]+/[\w_\-\.]+)/?(?:blob/main/)?(?:resolve/main/)?(.+)?$', source_repo)[0]
127
+ target = urllib.parse.unquote(target.removesuffix("/"))
128
+
129
+ subfolder = subfolder_prefix + "/" + source_repo if subfolder_prefix else source_repo
130
+
131
+ temp_dir = tempfile.mkdtemp()
132
+ api.create_repo(repo_id=dst_repo, repo_type=repo_type, private=private, exist_ok=True, token=hf_token)
133
+ for path in api.list_repo_files(repo_id=source_repo, repo_type=repo_type, token=hf_token):
134
+ if target and target not in path: continue
135
+ file = hf_hub_download(repo_id=source_repo, filename=path, repo_type=repo_type, local_dir=temp_dir, token=hf_token)
136
+ if not Path(file).exists(): continue
137
+ if Path(file).is_dir(): # unused for now
138
+ api.upload_folder(repo_id=dst_repo, folder_path=file, path_in_repo=f"{subfolder}/{path}" if subfolder else path, repo_type=repo_type, token=hf_token)
139
+ elif Path(file).is_file():
140
+ api.upload_file(repo_id=dst_repo, path_or_fileobj=file, path_in_repo=f"{subfolder}/{path}" if subfolder else path, repo_type=repo_type, token=hf_token)
141
+ if Path(file).exists(): Path(file).unlink()
142
+ if repo_type == "dataset": repo_url = f"https://huggingface.co/datasets/{dst_repo}"
143
+ elif repo_type == "space": repo_url = f"https://huggingface.co/spaces/{dst_repo}"
144
+ else: repo_url = f"https://huggingface.co/{dst_repo}"
145
+ shutil.rmtree(temp_dir)
146
+
147
+ return (
148
+ f'Find your repo <a href=\'{repo_url}\' target="_blank" style="text-decoration:underline">here</a>',
149
+ "sp.jpg",
150
+ )
151
+
152
+ except Exception as e:
153
+ print(e)
154
+ raise gr.Error(f"Error occured: {e}")
155
+
156
+ def duplicate_m2m(source_repos_str, hf_user, repo_type, private, overwrite, remove_tag, repo_prefix, repo_suffix, oauth_token: gr.OAuthToken | None, progress=gr.Progress(track_tqdm=True)):
157
+ hf_token = oauth_token.token
158
+ api = HfApi(token=hf_token)
159
+ try:
160
+ if not repo_type in REPO_TYPES:
161
+ raise ValueError("need to select valid repo type")
162
+ _ = whoami(oauth_token.token)
163
+ # ^ this will throw if token is invalid
164
+ except Exception as e:
165
+ raise gr.Error(f"""Oops, you forgot to login. Please use the loggin button on the top left to migrate your repo {e}""")
166
+
167
+ try:
168
+ source_repos = parse_repos(source_repos_str)
169
+ repo_url_result = 'Find your repo '
170
+ for source_repo in source_repos:
171
+ if not re.fullmatch(REGEX_HF_REPO, source_repo) or not api.repo_exists(repo_id=source_repo, repo_type=repo_type, token=hf_token): continue
172
+ dst_repo = hf_user + "/" + repo_prefix + source_repo.split("/")[-1] + repo_suffix
173
+ if not re.fullmatch(REGEX_HF_REPO, dst_repo): continue
174
+ if not overwrite and api.repo_exists(repo_id=dst_repo, repo_type=repo_type, token=hf_token):
175
+ gr.Info(f"Repo already exists {dst_repo}")
176
+ continue
177
+
178
+ r = requests.post(
179
+ f"{ENDPOINT}/api/{repo_type}s/{source_repo}/duplicate",
180
+ headers=build_hf_headers(token=oauth_token.token),
181
+ json={"repository": dst_repo, "private": private},
182
+ )
183
+ hf_raise_for_status(r)
184
+
185
+ repo_url = r.json().get("url")
186
+ repo_url_result += f'<a href=\'{repo_url}\' target="_blank" style="text-decoration:underline">{dst_repo}</a><br>\n'
187
+
188
+ if remove_tag: remove_repo_tags(dst_repo, ["not-for-all-audiences"], repo_type, hf_token)
189
+
190
+ return (
191
+ repo_url_result,
192
+ "sp.jpg",
193
+ )
194
+
195
+ except Exception as e:
196
+ print(e)
197
+ raise gr.Error(f"Error occured: {e}")
198
+
199
+ def add_repo_text(repo_id: str, source_repos: str):
200
+ return source_repos + "\n" + repo_id if source_repos else repo_id
201
 
202
  def swap_visibilty(profile: gr.OAuthProfile | None):
203
  return gr.update(elem_classes=["main_ui_logged_in"]) if profile else gr.update(elem_classes=["main_ui_logged_out"])
204
+
205
  css = '''
206
  .main_ui_logged_out{opacity: 0.3; pointer-events: none}
207
+ .title {text-align: center; align-items: center}
208
  '''
209
  with gr.Blocks(css=css) as demo:
210
  gr.LoginButton()
211
  with gr.Column(elem_classes="main_ui_logged_out") as main_ui:
212
+ gr.Markdown("# Duplicate your repo!", elem_classes="title")
213
+ gr.Markdown("Duplicate a Hugging Face repository! This Space is a an experimental demo.")
214
+ with gr.Tab("One to One"):
215
+ with gr.Row():
216
+ with gr.Column():
217
+ search = HuggingfaceHubSearch(
218
+ label="source_repo",
219
+ placeholder="Source repository (e.g. osanseviero/src)",
220
+ search_type=["model", "dataset", "space"],
221
+ sumbit_on_select=False,
222
+ )
223
+ with gr.Group():
224
+ dst_repo = gr.Textbox(label="dst_repo", placeholder="Destination repository (e.g. osanseviero/dst)", value=HF_REPO)
225
+ repo_type = gr.Dropdown(label="repo_type", choices=REPO_TYPES, value="model")
226
+ with gr.Row():
227
+ is_private = gr.Checkbox(label="Make new repo private?", value=True)
228
+ is_overwrite = gr.Checkbox(label="Overwrite existing repo?", value=True)
229
+ is_subdir = gr.Checkbox(label="Create subdirectories automatically?", value=True)
230
+ is_remtag = gr.Checkbox(label="Remove NFAA tag?", value=True)
231
+ with gr.Row():
232
+ submit_button = gr.Button("Submit", variant="primary")
233
+ clear_button = gr.Button("Clear", variant="secondary")
234
+ with gr.Column():
235
+ output_md = gr.Markdown(label="output")
236
+ output_image = gr.Image(show_label=False)
237
+ with gr.Tab("Multi to One"):
238
+ with gr.Row():
239
+ with gr.Column():
240
+ m2o_search = HuggingfaceHubSearch(
241
+ label="source_repo",
242
+ placeholder="Source repository (e.g. osanseviero/src)",
243
+ search_type=["model", "dataset", "space"],
244
+ sumbit_on_select=True,
245
+ )
246
+ m2o_source_repos = gr.Textbox(label="source_repos", placeholder="Source repositories (e.g. osanseviero/src)\n...", value="", lines=10)
247
+ with gr.Group():
248
+ m2o_dst_repo = gr.Textbox(label="dst_repo", placeholder="Destination repository (e.g. osanseviero/dst)", value=HF_REPO)
249
+ m2o_repo_type = gr.Dropdown(label="repo_type", choices=REPO_TYPES, value="model")
250
+ with gr.Row():
251
+ m2o_is_private = gr.Checkbox(label="Make new repo private?", value=True)
252
+ m2o_is_overwrite = gr.Checkbox(label="Overwrite existing repo?", value=True)
253
+ with gr.Row():
254
+ m2o_submit_button = gr.Button("Submit", variant="primary")
255
+ m2o_clear_button = gr.Button("Clear", variant="secondary")
256
+ with gr.Column():
257
+ m2o_output_md = gr.Markdown(label="output")
258
+ m2o_output_image = gr.Image(show_label=False)
259
+ with gr.Tab("Multi to Multi"):
260
+ with gr.Row():
261
+ with gr.Column():
262
+ m2m_search = HuggingfaceHubSearch(
263
+ label="source_repo",
264
+ placeholder="Source repository (e.g. osanseviero/src)",
265
+ search_type=["model", "dataset", "space"],
266
+ sumbit_on_select=True,
267
+ )
268
+ m2m_source_repos = gr.Textbox(label="source_repos", placeholder="Source repositories (e.g. osanseviero/src)\n...", value="", lines=10)
269
+ with gr.Group():
270
+ with gr.Row():
271
+ m2m_user = gr.Textbox(label="hf_user", placeholder="Your HF username", value=HF_USER)
272
+ m2m_prefix = gr.Textbox(label="repo_prefix", value=HF_REPO_PREFIX)
273
+ m2m_suffix = gr.Textbox(label="repo_suffix", value=HF_REPO_SUFFIX)
274
+ m2m_repo_type = gr.Dropdown(label="repo_type", choices=REPO_TYPES, value="model")
275
+ with gr.Row():
276
+ m2m_is_private = gr.Checkbox(label="Make new repo private?", value=True)
277
+ m2m_is_overwrite = gr.Checkbox(label="Overwrite existing repo?", value=False)
278
+ m2m_is_remtag = gr.Checkbox(label="Remove NFAA tag?", value=True)
279
+ with gr.Row():
280
+ m2m_submit_button = gr.Button("Submit", variant="primary")
281
+ m2m_clear_button = gr.Button("Clear", variant="secondary")
282
+ with gr.Column():
283
+ m2m_output_md = gr.Markdown(label="output")
284
+ m2m_output_image = gr.Image(show_label=False)
285
  demo.load(fn=swap_visibilty, outputs=main_ui)
286
+ submit_button.click(duplicate, [search, dst_repo, repo_type, is_private, is_overwrite, is_subdir, is_remtag], [output_md, output_image])
287
+ clear_button.click(lambda: ("", HF_REPO, "model", True, True, True, True), None, [search, dst_repo, repo_type, is_private, is_overwrite, is_subdir, is_remtag], queue=False)
288
+ m2o_search.submit(add_repo_text, [m2o_search, m2o_source_repos], [m2o_source_repos], queue=False)
289
+ m2o_submit_button.click(duplicate_m2o, [m2o_source_repos, m2o_dst_repo, m2o_repo_type, m2o_is_private, m2o_is_overwrite], [m2o_output_md, m2o_output_image])
290
+ m2o_clear_button.click(lambda: ("", HF_REPO, "model", True, True, ""), None,
291
+ [m2o_search, m2o_dst_repo, m2o_repo_type, m2o_is_private, m2o_is_overwrite, m2o_source_repos], queue=False)
292
+ m2m_search.submit(add_repo_text, [m2m_search, m2m_source_repos], [m2m_source_repos], queue=False)
293
+ m2m_submit_button.click(duplicate_m2m, [m2m_source_repos, m2m_user, m2m_repo_type, m2m_is_private, m2m_is_overwrite, m2m_is_remtag, m2m_prefix, m2m_suffix],
294
+ [m2m_output_md, m2m_output_image])
295
+ m2m_clear_button.click(lambda: ("", HF_USER, "model", True, False, True, "", HF_REPO_PREFIX, HF_REPO_SUFFIX), None,
296
+ [m2m_search, m2m_user, m2m_repo_type, m2m_is_private, m2m_is_overwrite, m2m_is_remtag, m2m_source_repos, m2m_prefix, m2m_suffix], queue=False)
297
+
298
  demo.queue()
299
  demo.launch()