yoinked commited on
Commit
5ad8cbc
·
verified ·
1 Parent(s): c8e625b
Files changed (1) hide show
  1. app.py +181 -0
app.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import huggingface_hub as hf
2
+ import gradio as gr
3
+ import os, datetime
4
+ fs = hf.HfFileSystem(token=os.environ["HF_TOKEN"])
5
+ datasetdir = "datasets/yoinked/blue-arxiv-papers/"
6
+ basecss = """
7
+ .container {
8
+ width: 250px;
9
+ padding: 20px;
10
+ border: 3px solid black;
11
+ border-radius: 15px;
12
+ text-align: left;
13
+ }
14
+
15
+ .title {
16
+ font-size: 24px;
17
+ margin-bottom: 10px;
18
+ text-align: center;
19
+ }
20
+
21
+ .containers {
22
+ display: flex; gap: 20px; flex-wrap: wrap;
23
+ }
24
+ .extra-info {
25
+ font-size: 14px;
26
+ line-height: 1.5;
27
+ }
28
+ .extra-info-paperid {
29
+ font-size: 14px;
30
+ line-height: 1.5;
31
+ color: #222;
32
+ }"""
33
+ jscode = """
34
+
35
+ function copyToClipboard(container) {
36
+ const titleElement = container.querySelector(".title");
37
+ const titleText = titleElement.textContent;
38
+ const tempTextArea = document.createElement("textarea");
39
+ tempTextArea.value = titleText;
40
+ document.body.appendChild(tempTextArea);
41
+ tempTextArea.select();
42
+ document.execCommand("copy");
43
+ document.body.removeChild(tempTextArea);
44
+ }
45
+ """
46
+ def get_papers():
47
+ return fs.glob(datasetdir+"**.md")
48
+ def get_papers_metadata(papiers=None):
49
+ metadatas = []
50
+ if papiers is None:
51
+ papiers = get_papers()
52
+ for paper in papiers:
53
+ papertxt = fs.read(paper)
54
+ metadata = papertxt.split("---")[1]
55
+ try:
56
+ author = metadata.split("author: ")[1].split("\n")[0]
57
+ except:
58
+ author = "unknown"
59
+ try:
60
+ title = metadata.split("title: ")[1].split("\n")[0]
61
+ except:
62
+ title = "unknown"
63
+ try:
64
+ tags = metadata.split("tags: ")[1].split("\n")[0].split(", ")
65
+ except:
66
+ tags = []
67
+ try:
68
+ abstract = metadata.split("abstract: ")[1].split("\n")[0]
69
+ except:
70
+ abstract = "unknown"
71
+ try:
72
+ date_published = metadata.split("date_published: ")[1].split("\n")[0]
73
+ except:
74
+ date_published = "unknown"
75
+ try:
76
+ paperid = metadata.split("paperid: ")[1].split("\n")[0] #if this fails then no reason to display
77
+ md = {"fname": paper, "metadata": metadata, "author": author, "title": title, "tags": tags, "abstract": abstract, "date_published": date_published, "paperid": paperid}
78
+ metadatas.append(md)
79
+ except:
80
+ pass
81
+ return metadatas
82
+
83
+ def make_paper_card(md):
84
+ html = f"""
85
+ <div class="container" onclick="copyToClipboard(this)" title="{md["abstract"]}">>
86
+ <div class="title">{md["title"]}</div>
87
+ <br><br>
88
+ <div class="extra-info">author: {md["author"]}</div>
89
+ <div class="extra-info">published: {md["date_published"]}</div>
90
+ <div class="extra-info-paperid">id: {md["paperid"]}</div>
91
+ </div>
92
+ """
93
+ return html
94
+
95
+ def make_paper_cards(tags=None):
96
+ mds = get_papers_metadata()
97
+ tags = tags.split(",")
98
+ tags = [tag.strip() for tag in tags]
99
+ if tags is not None:
100
+ mds = [md for md in mds if any(tag in md["tags"] for tag in tags)]
101
+ htmls = [make_paper_card(md) for md in mds]
102
+ fin = "<div class='containers'>"
103
+ for html in htmls:
104
+ fin += html + "<br>"
105
+ fin += "</div>"
106
+ return fin
107
+
108
+ def get_paper_markdown(paperid):
109
+ allpapers = get_papers_metadata()
110
+ fname = None
111
+ for paper in allpapers:
112
+ if paper["paperid"] == paperid:
113
+ fname = paper["fname"]
114
+ break
115
+ if fname is None:
116
+ return "## paper not found"
117
+ else:
118
+ paper = fs.read(fname).split("---")[2]
119
+ return paper
120
+
121
+ def publish_paper(title, authors, tags, abst, data):
122
+ paperid = ""
123
+ year = datetime.datetime.now().year
124
+ month = datetime.datetime.now().month
125
+ if month < 10:
126
+ month = "0"+str(month)
127
+ day = datetime.datetime.now().day
128
+ if day < 10:
129
+ day = "0"+str(day)
130
+ idx = 1
131
+ while True:
132
+ paperid = f"{year}-{month}{day}.{idx}"
133
+ if not fs.exists(datasetdir+paperid+".md"):
134
+ break
135
+ idx += 1
136
+ if idx > 100:
137
+ return "could not generate paperid, try again tomorrow"
138
+ metadata = f"""---
139
+ title: {title}
140
+ author: {authors}
141
+ tags: {tags}
142
+ abstract: {abst}
143
+ date_published: {year}-{month}-{day}
144
+ paperid: {paperid}
145
+ ---\n"""
146
+ with fs.open(datasetdir+paperid+".md", "w") as f:
147
+ raw = metadata + data
148
+ f.write(raw)
149
+ with gr.Blocks(css=basecss, js=jscode, theme='NoCrypt/miku') as demo:
150
+ with gr.Tab("search"):
151
+ with gr.Row():
152
+ query = gr.Textbox(label="tags (optional, comma seperated)", lines=1, interactive=True)
153
+ searchbutton = gr.Button("search")
154
+ with gr.Row():
155
+ papercards = gr.HTML()
156
+ with gr.Tab("read"):
157
+ with gr.Row():
158
+ paperid = gr.Textbox(label="paper id", lines=1, interactive=True)
159
+ readbutton = gr.Button("read")
160
+ with gr.Row():
161
+ paper = gr.Markdown()
162
+ with gr.Tab("publish"):
163
+ with gr.Row():
164
+ title = gr.Textbox(label="title", lines=1, interactive=True)
165
+ authors = gr.Textbox(label="author(s)", lines=1, interactive=True)
166
+ with gr.Row():
167
+ tags = gr.Textbox(label="tags (optional, comma seperated)", lines=1, interactive=True)
168
+ abst = gr.Textbox(label="abriged abstract (aka tooltip)", lines=2, interactive=True)
169
+ markd = gr.Textbox(label="markdown", lines=10, interactive=True, max_lines=1e3)
170
+ with gr.Accordion("preview"):
171
+
172
+ previewbutton = gr.Button("preview")
173
+ preview = gr.Markdown()
174
+ with gr.Row():
175
+ status = gr.Textbox(label="status", lines=1, interactive=False)
176
+ publishbutton = gr.Button("publish")
177
+ previewbutton.click(fn=lambda x: x, inputs=markd, outputs=preview)
178
+ publishbutton.click(fn=publish_paper, inputs=[title, authors, tags, abst, markd], outputs=status)
179
+ searchbutton.click(fn=make_paper_cards, inputs=query, outputs=papercards)
180
+ readbutton.click(fn=get_paper_markdown, inputs=paperid, outputs=paper)
181
+ demo.launch()