from __future__ import annotations from typing import List, TypedDict import huggingface_hub from huggingface_hub.hf_api import SpaceInfo from concurrent.futures import ThreadPoolExecutor import os import json import datetime import tqdm class SpaceData(TypedDict): id: str likes: int subdomain: str lastModified: str repo = huggingface_hub.Repository( local_dir="data", repo_type="dataset", clone_from="freddyaboulton/gradio-theme-subdomains", token=os.getenv("HF_TOKEN") ) repo.git_pull() api = huggingface_hub.HfApi(token=os.getenv("HF_TOKEN")) def get_theme_preview_spaces() -> List[SpaceInfo]: return list(iter(api.list_spaces(filter="gradio-theme"))) def get_info(space_name: SpaceInfo) -> SpaceData | None: if not space_name.id: print(f"no space_name for {space_name}") return None space_info = api.space_info(space_name.id, token=os.getenv("HF_TOKEN")) subdomain: str | None = getattr(space_info, "subdomain", None) if subdomain is None: print(f"no subdomain for {space_info.id}") return None return {"id": space_info.id, "likes": space_info.likes, "subdomain": f"https://{space_info.subdomain}.hf.space", "lastModified": space_info.lastModified} # type: ignore def get_all_info(spaces: List[SpaceInfo]) -> List[SpaceData]: with ThreadPoolExecutor(max_workers=10) as executor: all_info = list(tqdm.tqdm(executor.map(get_info, spaces), total=len(spaces))) return [info for info in all_info if info] def process_spaces(): theme_spaces = list(iter(get_theme_preview_spaces())) all_info = get_all_info(theme_spaces) json.dump(all_info, open("data/subdomains.json", "w")) repo.push_to_hub(blocking=False, commit_message=f"Updating data at {datetime.datetime.now()}")