Spaces:
Running
Running
app
Browse files
app.py
ADDED
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import huggingface_hub as hf
|
2 |
+
import gradio as gr
|
3 |
+
import os, datetime
|
4 |
+
fs = hf.HfFileSystem(token=os.environ["HF_TOKEN"])
|
5 |
+
datasetdir = "datasets/yoinked/blue-arxiv-papers/"
|
6 |
+
basecss = """
|
7 |
+
.container {
|
8 |
+
width: 250px;
|
9 |
+
padding: 20px;
|
10 |
+
border: 3px solid black;
|
11 |
+
border-radius: 15px;
|
12 |
+
text-align: left;
|
13 |
+
}
|
14 |
+
|
15 |
+
.title {
|
16 |
+
font-size: 24px;
|
17 |
+
margin-bottom: 10px;
|
18 |
+
text-align: center;
|
19 |
+
}
|
20 |
+
|
21 |
+
.containers {
|
22 |
+
display: flex; gap: 20px; flex-wrap: wrap;
|
23 |
+
}
|
24 |
+
.extra-info {
|
25 |
+
font-size: 14px;
|
26 |
+
line-height: 1.5;
|
27 |
+
}
|
28 |
+
.extra-info-paperid {
|
29 |
+
font-size: 14px;
|
30 |
+
line-height: 1.5;
|
31 |
+
color: #222;
|
32 |
+
}"""
|
33 |
+
jscode = """
|
34 |
+
|
35 |
+
function copyToClipboard(container) {
|
36 |
+
const titleElement = container.querySelector(".title");
|
37 |
+
const titleText = titleElement.textContent;
|
38 |
+
const tempTextArea = document.createElement("textarea");
|
39 |
+
tempTextArea.value = titleText;
|
40 |
+
document.body.appendChild(tempTextArea);
|
41 |
+
tempTextArea.select();
|
42 |
+
document.execCommand("copy");
|
43 |
+
document.body.removeChild(tempTextArea);
|
44 |
+
}
|
45 |
+
"""
|
46 |
+
def get_papers():
|
47 |
+
return fs.glob(datasetdir+"**.md")
|
48 |
+
def get_papers_metadata(papiers=None):
|
49 |
+
metadatas = []
|
50 |
+
if papiers is None:
|
51 |
+
papiers = get_papers()
|
52 |
+
for paper in papiers:
|
53 |
+
papertxt = fs.read(paper)
|
54 |
+
metadata = papertxt.split("---")[1]
|
55 |
+
try:
|
56 |
+
author = metadata.split("author: ")[1].split("\n")[0]
|
57 |
+
except:
|
58 |
+
author = "unknown"
|
59 |
+
try:
|
60 |
+
title = metadata.split("title: ")[1].split("\n")[0]
|
61 |
+
except:
|
62 |
+
title = "unknown"
|
63 |
+
try:
|
64 |
+
tags = metadata.split("tags: ")[1].split("\n")[0].split(", ")
|
65 |
+
except:
|
66 |
+
tags = []
|
67 |
+
try:
|
68 |
+
abstract = metadata.split("abstract: ")[1].split("\n")[0]
|
69 |
+
except:
|
70 |
+
abstract = "unknown"
|
71 |
+
try:
|
72 |
+
date_published = metadata.split("date_published: ")[1].split("\n")[0]
|
73 |
+
except:
|
74 |
+
date_published = "unknown"
|
75 |
+
try:
|
76 |
+
paperid = metadata.split("paperid: ")[1].split("\n")[0] #if this fails then no reason to display
|
77 |
+
md = {"fname": paper, "metadata": metadata, "author": author, "title": title, "tags": tags, "abstract": abstract, "date_published": date_published, "paperid": paperid}
|
78 |
+
metadatas.append(md)
|
79 |
+
except:
|
80 |
+
pass
|
81 |
+
return metadatas
|
82 |
+
|
83 |
+
def make_paper_card(md):
|
84 |
+
html = f"""
|
85 |
+
<div class="container" onclick="copyToClipboard(this)" title="{md["abstract"]}">>
|
86 |
+
<div class="title">{md["title"]}</div>
|
87 |
+
<br><br>
|
88 |
+
<div class="extra-info">author: {md["author"]}</div>
|
89 |
+
<div class="extra-info">published: {md["date_published"]}</div>
|
90 |
+
<div class="extra-info-paperid">id: {md["paperid"]}</div>
|
91 |
+
</div>
|
92 |
+
"""
|
93 |
+
return html
|
94 |
+
|
95 |
+
def make_paper_cards(tags=None):
|
96 |
+
mds = get_papers_metadata()
|
97 |
+
tags = tags.split(",")
|
98 |
+
tags = [tag.strip() for tag in tags]
|
99 |
+
if tags is not None:
|
100 |
+
mds = [md for md in mds if any(tag in md["tags"] for tag in tags)]
|
101 |
+
htmls = [make_paper_card(md) for md in mds]
|
102 |
+
fin = "<div class='containers'>"
|
103 |
+
for html in htmls:
|
104 |
+
fin += html + "<br>"
|
105 |
+
fin += "</div>"
|
106 |
+
return fin
|
107 |
+
|
108 |
+
def get_paper_markdown(paperid):
|
109 |
+
allpapers = get_papers_metadata()
|
110 |
+
fname = None
|
111 |
+
for paper in allpapers:
|
112 |
+
if paper["paperid"] == paperid:
|
113 |
+
fname = paper["fname"]
|
114 |
+
break
|
115 |
+
if fname is None:
|
116 |
+
return "## paper not found"
|
117 |
+
else:
|
118 |
+
paper = fs.read(fname).split("---")[2]
|
119 |
+
return paper
|
120 |
+
|
121 |
+
def publish_paper(title, authors, tags, abst, data):
|
122 |
+
paperid = ""
|
123 |
+
year = datetime.datetime.now().year
|
124 |
+
month = datetime.datetime.now().month
|
125 |
+
if month < 10:
|
126 |
+
month = "0"+str(month)
|
127 |
+
day = datetime.datetime.now().day
|
128 |
+
if day < 10:
|
129 |
+
day = "0"+str(day)
|
130 |
+
idx = 1
|
131 |
+
while True:
|
132 |
+
paperid = f"{year}-{month}{day}.{idx}"
|
133 |
+
if not fs.exists(datasetdir+paperid+".md"):
|
134 |
+
break
|
135 |
+
idx += 1
|
136 |
+
if idx > 100:
|
137 |
+
return "could not generate paperid, try again tomorrow"
|
138 |
+
metadata = f"""---
|
139 |
+
title: {title}
|
140 |
+
author: {authors}
|
141 |
+
tags: {tags}
|
142 |
+
abstract: {abst}
|
143 |
+
date_published: {year}-{month}-{day}
|
144 |
+
paperid: {paperid}
|
145 |
+
---\n"""
|
146 |
+
with fs.open(datasetdir+paperid+".md", "w") as f:
|
147 |
+
raw = metadata + data
|
148 |
+
f.write(raw)
|
149 |
+
with gr.Blocks(css=basecss, js=jscode, theme='NoCrypt/miku') as demo:
|
150 |
+
with gr.Tab("search"):
|
151 |
+
with gr.Row():
|
152 |
+
query = gr.Textbox(label="tags (optional, comma seperated)", lines=1, interactive=True)
|
153 |
+
searchbutton = gr.Button("search")
|
154 |
+
with gr.Row():
|
155 |
+
papercards = gr.HTML()
|
156 |
+
with gr.Tab("read"):
|
157 |
+
with gr.Row():
|
158 |
+
paperid = gr.Textbox(label="paper id", lines=1, interactive=True)
|
159 |
+
readbutton = gr.Button("read")
|
160 |
+
with gr.Row():
|
161 |
+
paper = gr.Markdown()
|
162 |
+
with gr.Tab("publish"):
|
163 |
+
with gr.Row():
|
164 |
+
title = gr.Textbox(label="title", lines=1, interactive=True)
|
165 |
+
authors = gr.Textbox(label="author(s)", lines=1, interactive=True)
|
166 |
+
with gr.Row():
|
167 |
+
tags = gr.Textbox(label="tags (optional, comma seperated)", lines=1, interactive=True)
|
168 |
+
abst = gr.Textbox(label="abriged abstract (aka tooltip)", lines=2, interactive=True)
|
169 |
+
markd = gr.Textbox(label="markdown", lines=10, interactive=True, max_lines=1e3)
|
170 |
+
with gr.Accordion("preview"):
|
171 |
+
|
172 |
+
previewbutton = gr.Button("preview")
|
173 |
+
preview = gr.Markdown()
|
174 |
+
with gr.Row():
|
175 |
+
status = gr.Textbox(label="status", lines=1, interactive=False)
|
176 |
+
publishbutton = gr.Button("publish")
|
177 |
+
previewbutton.click(fn=lambda x: x, inputs=markd, outputs=preview)
|
178 |
+
publishbutton.click(fn=publish_paper, inputs=[title, authors, tags, abst, markd], outputs=status)
|
179 |
+
searchbutton.click(fn=make_paper_cards, inputs=query, outputs=papercards)
|
180 |
+
readbutton.click(fn=get_paper_markdown, inputs=paperid, outputs=paper)
|
181 |
+
demo.launch()
|