import huggingface_hub as hf import gradio as gr import os, datetime fs = hf.HfFileSystem(token=os.environ["HF_TOKEN"]) datasetdir = "datasets/yoinked/blue-arxiv-papers/" basecss = """ .caaard-container { width: 250px; padding: 20px; border: 3px solid black; border-radius: 15px; text-align: left; } .title { font-size: 24px; margin-bottom: 10px; text-align: center; } .caaard-containers { display: flex; gap: 20px; flex-wrap: wrap; } .extra-info { font-size: 14px; line-height: 1.5; } .extra-info-paperid { font-size: 14px; line-height: 1.5; color: #222; }""" jscode = """ function copyToClipboard(container) { const titleElement = container.querySelector(".title"); const titleText = titleElement.textContent; const tempTextArea = document.createElement("textarea"); tempTextArea.value = titleText; document.body.appendChild(tempTextArea); tempTextArea.select(); document.execCommand("copy"); document.body.removeChild(tempTextArea); } """ def get_papers(): return fs.glob(datasetdir+"**.md") def get_papers_metadata(papiers=None): metadatas = [] if papiers is None: papiers = get_papers() for paper in papiers: papertxt = fs.read(paper) metadata = papertxt.split("---")[1] try: author = metadata.split("author: ")[1].split("\n")[0] except: author = "unknown" try: title = metadata.split("title: ")[1].split("\n")[0] except: title = "unknown" try: tags = metadata.split("tags: ")[1].split("\n")[0].split(", ") except: tags = [] try: abstract = metadata.split("abstract: ")[1].split("\n")[0] except: abstract = "unknown" try: date_published = metadata.split("date_published: ")[1].split("\n")[0] except: date_published = "unknown" try: paperid = metadata.split("paperid: ")[1].split("\n")[0] #if this fails then no reason to display md = {"fname": paper, "metadata": metadata, "author": author, "title": title, "tags": tags, "abstract": abstract, "date_published": date_published, "paperid": paperid} metadatas.append(md) except: pass return metadatas def make_paper_card(md): html = f"""
>
{md["title"]}


author: {md["author"]}
published: {md["date_published"]}
id: {md["paperid"]}
""" return html def make_paper_cards(tags=None): mds = get_papers_metadata() tags = tags.split(",") tags = [tag.strip() for tag in tags] if tags is not None: mds = [md for md in mds if any(tag in md["tags"] for tag in tags)] htmls = [make_paper_card(md) for md in mds] fin = "
" for html in htmls: fin += html + "
" fin += "
" return fin def get_paper_markdown(paperid): allpapers = get_papers_metadata() fname = None for paper in allpapers: if paper["paperid"] == paperid: fname = paper["fname"] break if fname is None: return "## paper not found" else: paper = fs.read(fname).split("---")[2] return paper def publish_paper(title, authors, tags, abst, data): paperid = "" year = datetime.datetime.now().year month = datetime.datetime.now().month if month < 10: month = "0"+str(month) day = datetime.datetime.now().day if day < 10: day = "0"+str(day) idx = 1 while True: paperid = f"{year}-{month}{day}.{idx}" if not fs.exists(datasetdir+paperid+".md"): break idx += 1 if idx > 100: return "could not generate paperid, try again tomorrow" bad_chars = "<>:|\\" # primitive anti-xss sanitization for c in bad_chars: title = title.replace(c, "") authors = authors.replace(c, "") tags = tags.replace(c, "") abst = abst.replace(c, "") metadata = f"""--- title: {title} author: {authors} tags: {tags} abstract: {abst} date_published: {year}-{month}-{day} paperid: {paperid} ---\n""" with fs.open(datasetdir+paperid+".md", "w") as f: raw = metadata + data f.write(raw) def makepreview(x): return x with gr.Blocks(css=basecss, js=jscode, theme='NoCrypt/miku') as demo: with gr.Tab("search"): with gr.Row(): query = gr.Textbox(label="tags (optional, comma seperated)", lines=1, interactive=True) searchbutton = gr.Button("search") with gr.Row(): papercards = gr.HTML() with gr.Tab("read"): with gr.Row(): paperid = gr.Textbox(label="paper id", lines=1, interactive=True) readbutton = gr.Button("read") with gr.Row(): paper = gr.Markdown() with gr.Tab("publish"): with gr.Row(): title = gr.Textbox(label="title", lines=1, interactive=True) authors = gr.Textbox(label="author(s)", lines=1, interactive=True) with gr.Row(): tags = gr.Textbox(label="tags (optional, comma seperated)", lines=1, interactive=True) abst = gr.Textbox(label="abriged abstract (aka tooltip)", lines=2, interactive=True) markd = gr.Textbox(label="markdown", lines=10, interactive=True, max_lines=1e3) preview = gr.Markdown() with gr.Row(): status = gr.Textbox(label="status", lines=1, interactive=False) publishbutton = gr.Button("publish") markd.change(fn=makepreview, inputs=markd, outputs=preview) publishbutton.click(fn=publish_paper, inputs=[title, authors, tags, abst, markd], outputs=status) searchbutton.click(fn=make_paper_cards, inputs=query, outputs=papercards) readbutton.click(fn=get_paper_markdown, inputs=paperid, outputs=paper) demo.launch()