Spaces:
Running
Running
Update
Browse files- .pre-commit-config.yaml +59 -35
- .style.yapf +0 -5
- README.md +1 -1
- app.py +26 -26
- paper_list.py +37 -38
- style.css +1 -4
.pre-commit-config.yaml
CHANGED
@@ -1,36 +1,60 @@
|
|
1 |
repos:
|
2 |
-
- repo: https://github.com/pre-commit/pre-commit-hooks
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
- repo: https://github.com/pre-commit/mirrors-mypy
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
repos:
|
2 |
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
3 |
+
rev: v4.6.0
|
4 |
+
hooks:
|
5 |
+
- id: check-executables-have-shebangs
|
6 |
+
- id: check-json
|
7 |
+
- id: check-merge-conflict
|
8 |
+
- id: check-shebang-scripts-are-executable
|
9 |
+
- id: check-toml
|
10 |
+
- id: check-yaml
|
11 |
+
- id: end-of-file-fixer
|
12 |
+
- id: mixed-line-ending
|
13 |
+
args: ["--fix=lf"]
|
14 |
+
- id: requirements-txt-fixer
|
15 |
+
- id: trailing-whitespace
|
16 |
+
- repo: https://github.com/myint/docformatter
|
17 |
+
rev: v1.7.5
|
18 |
+
hooks:
|
19 |
+
- id: docformatter
|
20 |
+
args: ["--in-place"]
|
21 |
+
- repo: https://github.com/pycqa/isort
|
22 |
+
rev: 5.13.2
|
23 |
+
hooks:
|
24 |
+
- id: isort
|
25 |
+
args: ["--profile", "black"]
|
26 |
+
- repo: https://github.com/pre-commit/mirrors-mypy
|
27 |
+
rev: v1.10.0
|
28 |
+
hooks:
|
29 |
+
- id: mypy
|
30 |
+
args: ["--ignore-missing-imports"]
|
31 |
+
additional_dependencies:
|
32 |
+
[
|
33 |
+
"types-python-slugify",
|
34 |
+
"types-requests",
|
35 |
+
"types-PyYAML",
|
36 |
+
"types-pytz",
|
37 |
+
]
|
38 |
+
- repo: https://github.com/psf/black
|
39 |
+
rev: 24.4.2
|
40 |
+
hooks:
|
41 |
+
- id: black
|
42 |
+
language_version: python3.10
|
43 |
+
args: ["--line-length", "119"]
|
44 |
+
- repo: https://github.com/kynan/nbstripout
|
45 |
+
rev: 0.7.1
|
46 |
+
hooks:
|
47 |
+
- id: nbstripout
|
48 |
+
args:
|
49 |
+
[
|
50 |
+
"--extra-keys",
|
51 |
+
"metadata.interpreter metadata.kernelspec cell.metadata.pycharm",
|
52 |
+
]
|
53 |
+
- repo: https://github.com/nbQA-dev/nbQA
|
54 |
+
rev: 1.8.5
|
55 |
+
hooks:
|
56 |
+
- id: nbqa-black
|
57 |
+
- id: nbqa-pyupgrade
|
58 |
+
args: ["--py37-plus"]
|
59 |
+
- id: nbqa-isort
|
60 |
+
args: ["--float-to-top"]
|
.style.yapf
DELETED
@@ -1,5 +0,0 @@
|
|
1 |
-
[style]
|
2 |
-
based_on_style = pep8
|
3 |
-
blank_line_before_nested_class_or_def = false
|
4 |
-
spaces_before_comment = 2
|
5 |
-
split_before_logical_operator = true
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
@@ -4,7 +4,7 @@ emoji: 🏢
|
|
4 |
colorFrom: yellow
|
5 |
colorTo: red
|
6 |
sdk: gradio
|
7 |
-
sdk_version:
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
|
|
4 |
colorFrom: yellow
|
5 |
colorTo: red
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 4.36.1
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
app.py
CHANGED
@@ -6,44 +6,44 @@ import gradio as gr
|
|
6 |
|
7 |
from paper_list import PaperList
|
8 |
|
9 |
-
DESCRIPTION =
|
10 |
-
NOTES =
|
11 |
- [NAACL 2022](https://2022.naacl.org/)
|
12 |
- [NAACL'22 Reproducibility Track](https://naacl2022-reproducibility-track.github.io/results/)
|
13 |
-
|
14 |
|
15 |
paper_list = PaperList()
|
16 |
|
17 |
-
with gr.Blocks(css=
|
18 |
gr.Markdown(DESCRIPTION)
|
19 |
|
20 |
search_box = gr.Textbox(
|
21 |
-
label=
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
)
|
25 |
-
case_sensitive = gr.Checkbox(label='Case Sensitive')
|
26 |
-
filter_names = gr.CheckboxGroup(label='Filter',
|
27 |
-
choices=[
|
28 |
-
'arXiv',
|
29 |
-
'GitHub',
|
30 |
-
'HF Space',
|
31 |
-
'HF Model',
|
32 |
-
'HF Dataset',
|
33 |
-
])
|
34 |
paper_category_names = [
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
]
|
41 |
-
paper_categories = gr.CheckboxGroup(
|
42 |
-
|
43 |
-
|
44 |
-
search_button = gr.Button(
|
45 |
|
46 |
-
number_of_papers = gr.Textbox(label=
|
47 |
table = gr.HTML(show_label=False)
|
48 |
|
49 |
gr.Markdown(NOTES)
|
|
|
6 |
|
7 |
from paper_list import PaperList
|
8 |
|
9 |
+
DESCRIPTION = "# NAACL 2022 Papers"
|
10 |
+
NOTES = """
|
11 |
- [NAACL 2022](https://2022.naacl.org/)
|
12 |
- [NAACL'22 Reproducibility Track](https://naacl2022-reproducibility-track.github.io/results/)
|
13 |
+
"""
|
14 |
|
15 |
paper_list = PaperList()
|
16 |
|
17 |
+
with gr.Blocks(css="style.css") as demo:
|
18 |
gr.Markdown(DESCRIPTION)
|
19 |
|
20 |
search_box = gr.Textbox(
|
21 |
+
label="Search Title", placeholder="You can search for titles with regular expressions. e.g. (?<!sur)face"
|
22 |
+
)
|
23 |
+
case_sensitive = gr.Checkbox(label="Case Sensitive")
|
24 |
+
filter_names = gr.CheckboxGroup(
|
25 |
+
label="Filter",
|
26 |
+
choices=[
|
27 |
+
"arXiv",
|
28 |
+
"GitHub",
|
29 |
+
"HF Space",
|
30 |
+
"HF Model",
|
31 |
+
"HF Dataset",
|
32 |
+
],
|
33 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
paper_category_names = [
|
35 |
+
"Long Paper",
|
36 |
+
"Short Paper",
|
37 |
+
"Special Theme Paper",
|
38 |
+
"Findings",
|
39 |
+
"Industry Track",
|
40 |
]
|
41 |
+
paper_categories = gr.CheckboxGroup(
|
42 |
+
label="Paper Categories", choices=paper_category_names, value=paper_category_names
|
43 |
+
)
|
44 |
+
search_button = gr.Button("Search")
|
45 |
|
46 |
+
number_of_papers = gr.Textbox(label="Number of Papers Found")
|
47 |
table = gr.HTML(show_label=False)
|
48 |
|
49 |
gr.Markdown(NOTES)
|
paper_list.py
CHANGED
@@ -5,10 +5,10 @@ import pandas as pd
|
|
5 |
|
6 |
class PaperList:
|
7 |
def __init__(self):
|
8 |
-
self.table = pd.read_csv(
|
9 |
self._preprcess_table()
|
10 |
|
11 |
-
self.table_header =
|
12 |
<tr>
|
13 |
<td width="50%">Paper</td>
|
14 |
<td width="22%">Authors</td>
|
@@ -19,28 +19,23 @@ class PaperList:
|
|
19 |
<td width="4%">HF Spaces</td>
|
20 |
<td width="4%">HF Models</td>
|
21 |
<td width="4%">HF Datasets</td>
|
22 |
-
</tr>
|
23 |
|
24 |
def _preprcess_table(self) -> None:
|
25 |
-
self.table[
|
26 |
|
27 |
rows = []
|
28 |
for row in self.table.itertuples():
|
29 |
-
paper = f'<a href="{row.url}" target="_blank">{row.title}</a>' if isinstance(
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
row.
|
37 |
-
|
38 |
-
|
39 |
-
hf_model = f'<a href="{row.hf_model}" target="_blank">Model</a>' if isinstance(
|
40 |
-
row.hf_model, str) else ''
|
41 |
-
hf_dataset = f'<a href="{row.hf_dataset}" target="_blank">Dataset</a>' if isinstance(
|
42 |
-
row.hf_dataset, str) else ''
|
43 |
-
row = f'''
|
44 |
<tr>
|
45 |
<td>{paper}</td>
|
46 |
<td>{row.authors}</td>
|
@@ -51,33 +46,37 @@ class PaperList:
|
|
51 |
<td>{hf_space}</td>
|
52 |
<td>{hf_model}</td>
|
53 |
<td>{hf_dataset}</td>
|
54 |
-
</tr>
|
55 |
rows.append(row)
|
56 |
-
self.table[
|
57 |
|
58 |
-
def render(
|
59 |
-
|
60 |
-
|
61 |
df = self.table
|
62 |
if search_query:
|
63 |
if case_sensitive:
|
64 |
df = df[df.title.str.contains(search_query)]
|
65 |
else:
|
66 |
df = df[df.title_lowercase.str.contains(search_query.lower())]
|
67 |
-
has_arxiv =
|
68 |
-
has_github =
|
69 |
-
has_hf_space =
|
70 |
-
has_hf_model =
|
71 |
-
has_hf_dataset =
|
72 |
-
df = self.filter_table(df, has_arxiv, has_github, has_hf_space,
|
73 |
-
has_hf_model, has_hf_dataset, paper_categories)
|
74 |
return len(df), self.to_html(df, self.table_header)
|
75 |
|
76 |
@staticmethod
|
77 |
-
def filter_table(
|
78 |
-
|
79 |
-
|
80 |
-
|
|
|
|
|
|
|
|
|
|
|
81 |
if has_arxiv:
|
82 |
df = df[~df.arxiv.isna()]
|
83 |
if has_github:
|
@@ -93,10 +92,10 @@ class PaperList:
|
|
93 |
|
94 |
@staticmethod
|
95 |
def to_html(df: pd.DataFrame, table_header: str) -> str:
|
96 |
-
table_data =
|
97 |
-
html = f
|
98 |
<table>
|
99 |
{table_header}
|
100 |
{table_data}
|
101 |
-
</table>
|
102 |
return html
|
|
|
5 |
|
6 |
class PaperList:
|
7 |
def __init__(self):
|
8 |
+
self.table = pd.read_csv("papers.csv")
|
9 |
self._preprcess_table()
|
10 |
|
11 |
+
self.table_header = """
|
12 |
<tr>
|
13 |
<td width="50%">Paper</td>
|
14 |
<td width="22%">Authors</td>
|
|
|
19 |
<td width="4%">HF Spaces</td>
|
20 |
<td width="4%">HF Models</td>
|
21 |
<td width="4%">HF Datasets</td>
|
22 |
+
</tr>"""
|
23 |
|
24 |
def _preprcess_table(self) -> None:
|
25 |
+
self.table["title_lowercase"] = self.table.title.str.lower()
|
26 |
|
27 |
rows = []
|
28 |
for row in self.table.itertuples():
|
29 |
+
paper = f'<a href="{row.url}" target="_blank">{row.title}</a>' if isinstance(row.url, str) else row.title
|
30 |
+
pdf = f'<a href="{row.pdf}" target="_blank">pdf</a>' if isinstance(row.pdf, str) else ""
|
31 |
+
arxiv = f'<a href="{row.arxiv}" target="_blank">arXiv</a>' if isinstance(row.arxiv, str) else ""
|
32 |
+
github = f'<a href="{row.github}" target="_blank">GitHub</a>' if isinstance(row.github, str) else ""
|
33 |
+
hf_space = f'<a href="{row.hf_space}" target="_blank">Space</a>' if isinstance(row.hf_space, str) else ""
|
34 |
+
hf_model = f'<a href="{row.hf_model}" target="_blank">Model</a>' if isinstance(row.hf_model, str) else ""
|
35 |
+
hf_dataset = (
|
36 |
+
f'<a href="{row.hf_dataset}" target="_blank">Dataset</a>' if isinstance(row.hf_dataset, str) else ""
|
37 |
+
)
|
38 |
+
row = f"""
|
|
|
|
|
|
|
|
|
|
|
39 |
<tr>
|
40 |
<td>{paper}</td>
|
41 |
<td>{row.authors}</td>
|
|
|
46 |
<td>{hf_space}</td>
|
47 |
<td>{hf_model}</td>
|
48 |
<td>{hf_dataset}</td>
|
49 |
+
</tr>"""
|
50 |
rows.append(row)
|
51 |
+
self.table["html_table_content"] = rows
|
52 |
|
53 |
+
def render(
|
54 |
+
self, search_query: str, case_sensitive: bool, filter_names: list[str], paper_categories: list[str]
|
55 |
+
) -> tuple[int, str]:
|
56 |
df = self.table
|
57 |
if search_query:
|
58 |
if case_sensitive:
|
59 |
df = df[df.title.str.contains(search_query)]
|
60 |
else:
|
61 |
df = df[df.title_lowercase.str.contains(search_query.lower())]
|
62 |
+
has_arxiv = "arXiv" in filter_names
|
63 |
+
has_github = "GitHub" in filter_names
|
64 |
+
has_hf_space = "HF Space" in filter_names
|
65 |
+
has_hf_model = "HF Model" in filter_names
|
66 |
+
has_hf_dataset = "HF Dataset" in filter_names
|
67 |
+
df = self.filter_table(df, has_arxiv, has_github, has_hf_space, has_hf_model, has_hf_dataset, paper_categories)
|
|
|
68 |
return len(df), self.to_html(df, self.table_header)
|
69 |
|
70 |
@staticmethod
|
71 |
+
def filter_table(
|
72 |
+
df: pd.DataFrame,
|
73 |
+
has_arxiv: bool,
|
74 |
+
has_github: bool,
|
75 |
+
has_hf_space: bool,
|
76 |
+
has_hf_model: bool,
|
77 |
+
has_hf_dataset: bool,
|
78 |
+
paper_categories: list[str],
|
79 |
+
) -> pd.DataFrame:
|
80 |
if has_arxiv:
|
81 |
df = df[~df.arxiv.isna()]
|
82 |
if has_github:
|
|
|
92 |
|
93 |
@staticmethod
|
94 |
def to_html(df: pd.DataFrame, table_header: str) -> str:
|
95 |
+
table_data = "".join(df.html_table_content)
|
96 |
+
html = f"""
|
97 |
<table>
|
98 |
{table_header}
|
99 |
{table_data}
|
100 |
+
</table>"""
|
101 |
return html
|
style.css
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
h1 {
|
2 |
text-align: center;
|
|
|
3 |
}
|
4 |
table a {
|
5 |
background-color: transparent;
|
@@ -16,7 +17,3 @@ a:hover {
|
|
16 |
table, th, td {
|
17 |
border: 1px solid;
|
18 |
}
|
19 |
-
img#visitor-badge {
|
20 |
-
display: block;
|
21 |
-
margin: auto;
|
22 |
-
}
|
|
|
1 |
h1 {
|
2 |
text-align: center;
|
3 |
+
display: block;
|
4 |
}
|
5 |
table a {
|
6 |
background-color: transparent;
|
|
|
17 |
table, th, td {
|
18 |
border: 1px solid;
|
19 |
}
|
|
|
|
|
|
|
|