Spaces:
Runtime error
Runtime error
import datetime | |
import operator | |
import pathlib | |
import pandas as pd | |
import tqdm.auto | |
import yaml | |
from huggingface_hub import HfApi | |
repo_dir = pathlib.Path(__file__).parent | |
class DemoList: | |
COLUMN_INFO = [ | |
['status', 'markdown'], | |
['hardware', 'markdown'], | |
['title', 'markdown'], | |
['owner', 'markdown'], | |
['arxiv', 'markdown'], | |
['github', 'markdown'], | |
['likes', 'number'], | |
['tags', 'str'], | |
['last_modified', 'str'], | |
['created', 'str'], | |
['sdk', 'markdown'], | |
['sdk_version', 'str'], | |
['suggested_hardware', 'markdown'], | |
['sleep_time', 'str'], | |
] | |
TO_TIME_STR = { | |
-1: '', | |
300: '5 minutes', | |
600: '10 minutes', | |
900: '15 minutes', | |
1800: '30 minutes', | |
3600: '1 hour', | |
36000: '10 hours', | |
86400: '24 hours', | |
172800: '48 hours', | |
259200: '72 hours', | |
604800: '1 week', | |
} | |
def __init__(self): | |
self.api = HfApi() | |
self._raw_data = self.load_data() | |
self.df_raw = pd.DataFrame(self._raw_data) | |
self.df = self.prettify_df() | |
def column_names(self): | |
return list(map(operator.itemgetter(0), self.COLUMN_INFO)) | |
def column_datatype(self): | |
return list(map(operator.itemgetter(1), self.COLUMN_INFO)) | |
def get_space_id(url: str) -> str: | |
return '/'.join(url.split('/')[-2:]) | |
def load_data(self) -> list[dict]: | |
with open(repo_dir / 'list.yaml') as f: | |
data = yaml.safe_load(f) | |
res = [] | |
for url in tqdm.auto.tqdm(list(data)): | |
space_id = self.get_space_id(url) | |
space_info = self.api.space_info(repo_id=space_id) | |
card = space_info.cardData | |
info = data[url] | |
for tag in ['arxiv', 'github', 'tags']: | |
if tag not in info: | |
info[tag] = [] | |
info['url'] = url | |
info['owner'] = space_id.split('/')[0] | |
info['title'] = card['title'] | |
info['sdk'] = card['sdk'] | |
info['sdk_version'] = card.get('sdk_version', '') | |
info['likes'] = space_info.likes | |
info['last_modified'] = space_info.lastModified | |
info['status'] = space_info.runtime['stage'] | |
info['sleep_time'] = space_info.runtime['gcTimeout'] or -1 | |
info['suggested_hardware'] = card.get('suggested_hardware', '') | |
info['hardware'] = space_info.runtime['hardware']['current'] | |
if info['hardware'] is None: | |
info['hardware'] = space_info.runtime['hardware']['requested'] | |
res.append(info) | |
return res | |
def get_arxiv_link(self, links: list[str]) -> str: | |
links = [self.create_link(link.split('/')[-1], link) for link in links] | |
return '\n'.join(links) | |
def get_github_link(self, links: list[str]) -> str: | |
links = [self.create_link('github', link) for link in links] | |
return '\n'.join(links) | |
def get_tag_list(self, tags: list[str]) -> str: | |
return ', '.join(tags) | |
def create_link(text: str, url: str) -> str: | |
return f'<a href={url} target="_blank">{text}</a>' | |
def to_div(self, text: str | None, category_name: str) -> str: | |
if text is None: | |
text = '' | |
class_name = f'{category_name}-{text.lower()}' | |
return f'<div class="{class_name}">{text}</div>' | |
def format_timestamp(timestamp: str) -> str: | |
s = datetime.datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S.000Z') | |
return s.strftime('%Y/%m/%d %H:%M:%S') | |
def prettify_df(self) -> pd.DataFrame: | |
new_rows = [] | |
for _, row in self.df_raw.copy().iterrows(): | |
new_row = { | |
'status': | |
self.to_div(row.status, 'status'), | |
'hardware': | |
self.to_div(row.hardware, 'hardware'), | |
'suggested_hardware': | |
self.to_div(row.suggested_hardware, 'hardware'), | |
'title': | |
self.create_link(row.title, row.url), | |
'owner': | |
self.create_link(row.owner, | |
f'https://huggingface.co/{row.owner}'), | |
'arxiv': | |
self.get_arxiv_link(row.arxiv), | |
'github': | |
self.get_github_link(row.github), | |
'likes': | |
row.likes, | |
'tags': | |
self.get_tag_list(row.tags), | |
'last_modified': | |
self.format_timestamp(row.last_modified), | |
'created': | |
self.format_timestamp(row.created), | |
'sdk': | |
self.to_div(row.sdk, 'sdk'), | |
'sdk_version': | |
row.sdk_version, | |
'sleep_time': | |
self.TO_TIME_STR[row.sleep_time], | |
} | |
new_rows.append(new_row) | |
df = pd.DataFrame(new_rows).loc[:, self.column_names] | |
return df | |