import copy import datetime import os import pathlib import pandas as pd import tqdm.auto import yaml from huggingface_hub import HfApi repo_dir = pathlib.Path(__file__).parent HF_TOKEN = os.getenv('HF_TOKEN') class DemoList: def __init__(self): self.api = HfApi(token=HF_TOKEN) self.data = self.load_data() self.df = self.to_df() def update_data(self) -> None: self.data = self.load_data() self.df = self.to_df() def load_data(self) -> dict: with open(repo_dir / 'list.yaml') as f: data = yaml.safe_load(f) for url in tqdm.auto.tqdm(list(data)): space_id = self.get_space_id(url) space_info = self.api.space_info(repo_id=space_id) card = space_info.cardData info = data[url] info['title'] = card['title'] info['sdk'] = card['sdk'] info['sdk_version'] = card.get('sdk_version', '') info['likes'] = space_info.likes last_modified = datetime.datetime.strptime( space_info.lastModified, '%Y-%m-%dT%H:%M:%S.000Z') info['last_modified'] = last_modified.strftime('%Y/%m/%d %H:%M:%S') created = datetime.datetime.strptime(info['created'], '%Y-%m-%d-%H-%M-%S') info['created'] = created.strftime('%Y/%m/%d %H:%M:%S') info['status'] = space_info.runtime['stage'] info['hardware'] = space_info.runtime['hardware']['current'] return data @staticmethod def get_space_id(url: str) -> str: return '/'.join(url.split('/')[-2:]) @staticmethod def create_link(text: str, url: str) -> str: return f'{text}' def get_arxiv_link(self, url: str) -> str: links = sorted(self.data[url].get('arxiv', [])) links = [self.create_link(link.split('/')[-1], link) for link in links] return '\n'.join(links) def get_github_link(self, url: str) -> str: links = sorted(self.data[url].get('github', [])) links = [self.create_link('github', link) for link in links] return '\n'.join(links) def get_tag_list(self, url: str) -> str: tags = sorted(self.data[url].get('tags', [])) return ', '.join(tags) def to_df(self) -> pd.DataFrame: data = copy.deepcopy(self.data) for url in list(data): info = data[url] info['title'] = self.create_link(info['title'], url) info['arxiv'] = self.get_arxiv_link(url) info['github'] = self.get_github_link(url) info['tags'] = self.get_tag_list(url) df = pd.DataFrame(data).T.loc[:, [ 'title', 'arxiv', 'github', 'likes', 'tags', 'last_modified', 'created', 'sdk', 'sdk_version', 'status', 'hardware', ]] return df