Spaces:
Sleeping
Sleeping
from __future__ import annotations | |
from typing import List, TypedDict | |
import huggingface_hub | |
from huggingface_hub.hf_api import SpaceInfo | |
from concurrent.futures import ThreadPoolExecutor | |
import os | |
import json | |
import datetime | |
import tqdm | |
class SpaceData(TypedDict): | |
id: str | |
likes: int | |
subdomain: str | |
lastModified: str | |
repo = huggingface_hub.Repository( | |
local_dir="data", | |
repo_type="dataset", | |
clone_from="freddyaboulton/gradio-theme-subdomains", | |
token=os.getenv("HF_TOKEN") | |
) | |
repo.git_pull() | |
api = huggingface_hub.HfApi(token=os.getenv("HF_TOKEN")) | |
def get_theme_preview_spaces() -> List[SpaceInfo]: | |
return list(iter(api.list_spaces(filter="gradio-theme"))) | |
def get_info(space_name: SpaceInfo) -> SpaceData | None: | |
if not space_name.id: | |
print(f"no space_name for {space_name}") | |
return None | |
space_info = api.space_info(space_name.id, token=os.getenv("HF_TOKEN")) | |
subdomain: str | None = getattr(space_info, "subdomain", None) | |
if subdomain is None: | |
print(f"no subdomain for {space_info.id}") | |
return None | |
return {"id": space_info.id, "likes": space_info.likes, | |
"subdomain": f"https://{space_info.subdomain}.hf.space", | |
"lastModified": space_info.lastModified} # type: ignore | |
def get_all_info(spaces: List[SpaceInfo]) -> List[SpaceData]: | |
with ThreadPoolExecutor(max_workers=10) as executor: | |
all_info = list(tqdm.tqdm(executor.map(get_info, spaces), total=len(spaces))) | |
return [info for info in all_info if info] | |
def process_spaces(): | |
theme_spaces = list(iter(get_theme_preview_spaces())) | |
all_info = get_all_info(theme_spaces) | |
json.dump(all_info, open("data/subdomains.json", "w")) | |
repo.push_to_hub(blocking=False, commit_message=f"Updating data at {datetime.datetime.now()}") | |