File size: 5,173 Bytes
0702f61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
from __future__ import annotations

import numpy as np
import pandas as pd
import requests
from huggingface_hub.hf_api import SpaceInfo


class PaperList:
    def __init__(self):
        self.organization_name = 'ECCV2022'
        self.table = pd.read_csv('papers.csv')
        self._preprcess_table()

        self.table_header = '''
            <tr>
                <td width="50%">Paper Title</td>
                <td width="22%">Authors</td>
                <td width="4%">pdf</td>
                <td width="4%">Session</td>
                <td width="4%">arXiv</td>
                <td width="4%">GitHub</td>
                <td width="4%">HF Spaces</td>
                <td width="4%">HF Models</td>
                <td width="4%">HF Datasets</td>
            </tr>'''

    @staticmethod
    def load_space_info(author: str) -> list[SpaceInfo]:
        path = 'https://huggingface.co/api/spaces'
        r = requests.get(path, params={'author': author})
        d = r.json()
        return [SpaceInfo(**x) for x in d]

    def add_spaces_to_table(self, organization_name: str,
                            df: pd.DataFrame) -> pd.DataFrame:
        spaces = self.load_space_info(organization_name)
        name2space = {
            s.id.split('/')[1].lower(): f'https://huggingface.co/spaces/{s.id}'
            for s in spaces
        }
        df['hf_space'] = df.loc[:, ['hf_space', 'github']].apply(
            lambda x: x[0] if isinstance(x[0], str) else name2space.get(
                x[1].split('/')[-1].lower()
                if isinstance(x[1], str) else '', np.nan),
            axis=1)
        return df

    def _preprcess_table(self) -> None:
        self.table = self.add_spaces_to_table(self.organization_name,
                                              self.table)
        self.table['title_lowercase'] = self.table.title.str.lower()

        rows = []
        for row in self.table.itertuples():
            paper = f'<a href="{row.url}" target="_blank">{row.title}</a>' if isinstance(
                row.url, str) else row.title
            pdf = f'<a href="{row.pdf}" target="_blank">pdf</a>' if isinstance(
                row.pdf, str) else ''
            arxiv = f'<a href="{row.arxiv}" target="_blank">arXiv</a>' if isinstance(
                row.arxiv, str) else ''
            github = f'<a href="{row.github}" target="_blank">GitHub</a>' if isinstance(
                row.github, str) else ''
            hf_space = f'<a href="{row.hf_space}" target="_blank">Space</a>' if isinstance(
                row.hf_space, str) else ''
            hf_model = f'<a href="{row.hf_model}" target="_blank">Model</a>' if isinstance(
                row.hf_model, str) else ''
            hf_dataset = f'<a href="{row.hf_dataset}" target="_blank">Dataset</a>' if isinstance(
                row.hf_dataset, str) else ''
            row = f'''
                <tr>
                    <td>{paper}</td>
                    <td>{row.authors}</td>
                    <td>{pdf}</td>
                    <td>{row.session}</td>
                    <td>{arxiv}</td>
                    <td>{github}</td>
                    <td>{hf_space}</td>
                    <td>{hf_model}</td>
                    <td>{hf_dataset}</td>
                </tr>'''
            rows.append(row)
        self.table['html_table_content'] = rows

    def render(self, search_query: str, case_sensitive: bool,
               filter_names: list[str],
               paper_sessions: list[str]) -> tuple[int, str]:
        df = self.add_spaces_to_table(self.organization_name, self.table)
        if search_query:
            if case_sensitive:
                df = df[df.title.str.contains(search_query)]
            else:
                df = df[df.title_lowercase.str.contains(search_query.lower())]
        has_arxiv = 'arXiv' in filter_names
        has_github = 'GitHub' in filter_names
        has_hf_space = 'HF Space' in filter_names
        has_hf_model = 'HF Model' in filter_names
        has_hf_dataset = 'HF Dataset' in filter_names
        df = self.filter_table(df, has_arxiv, has_github, has_hf_space,
                               has_hf_model, has_hf_dataset, paper_sessions)
        return len(df), self.to_html(df, self.table_header)

    @staticmethod
    def filter_table(df: pd.DataFrame, has_arxiv: bool, has_github: bool,
                     has_hf_space: bool, has_hf_model: bool,
                     has_hf_dataset: bool,
                     paper_sessions: list[str]) -> pd.DataFrame:
        if has_arxiv:
            df = df[~df.arxiv.isna()]
        if has_github:
            df = df[~df.github.isna()]
        if has_hf_space:
            df = df[~df.hf_space.isna()]
        if has_hf_model:
            df = df[~df.hf_model.isna()]
        if has_hf_dataset:
            df = df[~df.hf_dataset.isna()]
        df = df[df.session.isin(set(paper_sessions))]
        return df

    @staticmethod
    def to_html(df: pd.DataFrame, table_header: str) -> str:
        table_data = ''.join(df.html_table_content)
        html = f'''
        <table>
            {table_header}
            {table_data}
        </table>'''
        return html