File size: 3,021 Bytes
e51e8f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import copy
import datetime
import os
import pathlib

import pandas as pd
import tqdm.auto
import yaml
from huggingface_hub import HfApi

repo_dir = pathlib.Path(__file__).parent

HF_TOKEN = os.getenv('HF_TOKEN')


class DemoList:
    def __init__(self):
        self.api = HfApi(token=HF_TOKEN)
        self.data = self.load_data()
        self.df = self.to_df()

    def update_data(self) -> None:
        self.data = self.load_data()
        self.df = self.to_df()

    def load_data(self) -> dict:
        with open(repo_dir / 'list.yaml') as f:
            data = yaml.safe_load(f)
        for url in tqdm.auto.tqdm(list(data)):
            space_id = self.get_space_id(url)
            space_info = self.api.space_info(repo_id=space_id)
            card = space_info.cardData
            info = data[url]
            info['title'] = card['title']
            info['sdk'] = card['sdk']
            info['sdk_version'] = card.get('sdk_version', '')
            info['likes'] = space_info.likes
            last_modified = datetime.datetime.strptime(
                space_info.lastModified, '%Y-%m-%dT%H:%M:%S.000Z')
            info['last_modified'] = last_modified.strftime('%Y/%m/%d %H:%M:%S')
            created = datetime.datetime.strptime(info['created'],
                                                 '%Y-%m-%d-%H-%M-%S')
            info['created'] = created.strftime('%Y/%m/%d %H:%M:%S')
            info['status'] = space_info.runtime['stage']
            info['hardware'] = space_info.runtime['hardware']['current']
        return data

    @staticmethod
    def get_space_id(url: str) -> str:
        return '/'.join(url.split('/')[-2:])

    @staticmethod
    def create_link(text: str, url: str) -> str:
        return f'<a href={url} target="_blank">{text}</a>'

    def get_arxiv_link(self, url: str) -> str:
        links = sorted(self.data[url].get('arxiv', []))
        links = [self.create_link(link.split('/')[-1], link) for link in links]
        return '\n'.join(links)

    def get_github_link(self, url: str) -> str:
        links = sorted(self.data[url].get('github', []))
        links = [self.create_link('github', link) for link in links]
        return '\n'.join(links)

    def get_tag_list(self, url: str) -> str:
        tags = sorted(self.data[url].get('tags', []))
        return ', '.join(tags)

    def to_df(self) -> pd.DataFrame:
        data = copy.deepcopy(self.data)
        for url in list(data):
            info = data[url]
            info['title'] = self.create_link(info['title'], url)
            info['arxiv'] = self.get_arxiv_link(url)
            info['github'] = self.get_github_link(url)
            info['tags'] = self.get_tag_list(url)
        df = pd.DataFrame(data).T.loc[:, [
            'title',
            'arxiv',
            'github',
            'likes',
            'tags',
            'last_modified',
            'created',
            'sdk',
            'sdk_version',
            'status',
            'hardware',
        ]]
        return df