import copy import datetime import operator import pathlib import pandas as pd import tqdm.auto import yaml from huggingface_hub import HfApi repo_dir = pathlib.Path(__file__).parent class DemoList: COLUMN_INFO = [ ['status', 'markdown'], ['hardware', 'markdown'], ['title', 'markdown'], ['arxiv', 'markdown'], ['github', 'markdown'], ['likes', 'number'], ['tags', 'str'], ['last_modified', 'str'], ['created', 'str'], ['sdk', 'markdown'], ['sdk_version', 'str'], ['suggested_hardware', 'markdown'], ] def __init__(self): self.api = HfApi() self.data = self.load_data() self.df = self.to_df() def update_data(self) -> None: self.data = self.load_data() self.df = self.to_df() def load_data(self) -> dict: with open(repo_dir / 'list.yaml') as f: data = yaml.safe_load(f) for url in tqdm.auto.tqdm(list(data)): space_id = self.get_space_id(url) space_info = self.api.space_info(repo_id=space_id) card = space_info.cardData info = data[url] info['title'] = card['title'] info['sdk'] = self.to_div(card['sdk'], category_name='sdk') info['sdk_version'] = card.get('sdk_version', '') info['likes'] = space_info.likes last_modified = datetime.datetime.strptime( space_info.lastModified, '%Y-%m-%dT%H:%M:%S.000Z') info['last_modified'] = last_modified.strftime('%Y/%m/%d %H:%M:%S') created = datetime.datetime.strptime(info['created'], '%Y-%m-%d-%H-%M-%S') info['created'] = created.strftime('%Y/%m/%d %H:%M:%S') info['status'] = self.to_div(space_info.runtime['stage'], category_name='status') info['suggested_hardware'] = self.to_div(card.get( 'suggested_hardware', ''), category_name='hardware') info['hardware'] = self.to_div( space_info.runtime['hardware']['current'], category_name='hardware') return data @staticmethod def get_space_id(url: str) -> str: return '/'.join(url.split('/')[-2:]) @staticmethod def create_link(text: str, url: str) -> str: return f'{text}' def get_arxiv_link(self, url: str) -> str: links = sorted(self.data[url].get('arxiv', [])) links = [self.create_link(link.split('/')[-1], link) for link in links] return '\n'.join(links) def get_github_link(self, url: str) -> str: links = sorted(self.data[url].get('github', [])) links = [self.create_link('github', link) for link in links] return '\n'.join(links) def get_tag_list(self, url: str) -> str: tags = sorted(self.data[url].get('tags', [])) return ', '.join(tags) @property def column_names(self): return list(map(operator.itemgetter(0), self.COLUMN_INFO)) @property def column_datatype(self): return list(map(operator.itemgetter(1), self.COLUMN_INFO)) def to_div(self, text: str | None, category_name: str) -> str: if text is None: text = '' class_name = f'{category_name}-{text.lower()}' return f'
{text}
' def to_df(self) -> pd.DataFrame: data = copy.deepcopy(self.data) for url in list(data): info = data[url] info['title'] = self.create_link(info['title'], url) info['arxiv'] = self.get_arxiv_link(url) info['github'] = self.get_github_link(url) info['tags'] = self.get_tag_list(url) df = pd.DataFrame(data).T.loc[:, self.column_names] return df