theme-gallery / processing.py
freddyaboulton's picture
Modify backend
69f3076
raw
history blame
1.83 kB
from __future__ import annotations
from typing import List, TypedDict
import huggingface_hub
from huggingface_hub.hf_api import SpaceInfo
from concurrent.futures import ThreadPoolExecutor
import os
import json
import datetime
import tqdm
class SpaceData(TypedDict):
id: str
likes: int
subdomain: str
lastModified: str
repo = huggingface_hub.Repository(
local_dir="data",
repo_type="dataset",
clone_from="freddyaboulton/gradio-theme-subdomains",
token=os.getenv("HF_TOKEN")
)
repo.git_pull()
api = huggingface_hub.HfApi(token=os.getenv("HF_TOKEN"))
def get_theme_preview_spaces() -> List[SpaceInfo]:
return list(iter(api.list_spaces(filter="gradio-theme")))
def get_info(space_name: SpaceInfo) -> SpaceData | None:
if not space_name.id:
print(f"no space_name for {space_name}")
return None
space_info = api.space_info(space_name.id, token=os.getenv("HF_TOKEN"))
subdomain: str | None = getattr(space_info, "subdomain", None)
if subdomain is None:
print(f"no subdomain for {space_info.id}")
return None
return {"id": space_info.id, "likes": space_info.likes,
"subdomain": f"https://{space_info.subdomain}.hf.space",
"lastModified": space_info.lastModified} # type: ignore
def get_all_info(spaces: List[SpaceInfo]) -> List[SpaceData]:
with ThreadPoolExecutor(max_workers=10) as executor:
all_info = list(tqdm.tqdm(executor.map(get_info, spaces), total=len(spaces)))
return [info for info in all_info if info]
def process_spaces():
theme_spaces = list(iter(get_theme_preview_spaces()))
all_info = get_all_info(theme_spaces)
json.dump(all_info, open("data/subdomains.json", "w"))
repo.push_to_hub(blocking=False, commit_message=f"Updating data at {datetime.datetime.now()}")