|
from pathlib import Path |
|
|
|
HERE = Path(__file__).parent |
|
|
|
import sys |
|
sys.path.append(HERE) |
|
|
|
from utils import GParse_Paper, Get_Bibliography |
|
from bs4 import BeautifulSoup |
|
import solara |
|
from solara.components.file_drop import FileInfo |
|
|
|
app_style = (HERE / "style.css").read_text() |
|
|
|
|
|
def Get_HTMLTop(title): |
|
|
|
html_top = f""" |
|
<h1>{title}</h1> |
|
<span typeof="schema:Person" resource="http://orcid.org/0000-0003-1279-3709"> |
|
|
|
</span> |
|
""" |
|
return html_top |
|
|
|
def Get_Controls(): |
|
controls=""" |
|
<label for="textSize">Text Size: </label> |
|
<select id="textSize" name="textSize" onchange="adjustTextSize(this.value)"> |
|
<option value="10">10px</option> |
|
<option value="12">12px</option> |
|
<option value="14">14px</option> |
|
<option value="16" selected>16px</option> |
|
<option value="18">18px</option> |
|
<option value="20">20px</option> |
|
<option value="24">24px</option> |
|
<option value="28">28px</option> |
|
<option value="32">32px</option> |
|
<option value="36">36px</option> |
|
<option value="40">40px</option> |
|
<option value="44">44px</option> |
|
<option value="48">48px</option> |
|
<option value="50">50px</option> |
|
</select> |
|
<script> |
|
function adjustTextSize(size) { |
|
const baseSize = parseInt(size); |
|
document.body.style.fontSize = baseSize + 'px'; |
|
} |
|
</script> |
|
|
|
<script> |
|
function openDialog(event, dialogId) { |
|
var dialog = document.getElementById(dialogId); |
|
var rect = event.target.getBoundingClientRect(); |
|
dialog.style.top = rect.top + window.scrollY + 'px'; |
|
dialog.style.left = rect.left + window.scrollX + 'px'; |
|
dialog.style.display = 'block'; |
|
|
|
// Add an event listener to close the dialog when clicking outside of it |
|
document.addEventListener('click', function(event) { |
|
var isClickInside = dialog.contains(event.target); |
|
var isClickOnText = event.target.classList.contains('text-area'); |
|
|
|
if (!isClickInside && !isClickOnText) { |
|
closeDialog(dialogId); |
|
} |
|
}, { once: true }); |
|
} |
|
|
|
function closeDialog(dialogId) { |
|
document.getElementById(dialogId).style.display = 'none'; |
|
} |
|
</script> |
|
""" |
|
return Controls |
|
|
|
|
|
def Get_Sections(soup): |
|
|
|
sections_content = "" |
|
sections_list = [] |
|
raw_text="" |
|
|
|
bib = Get_Bibliography(soup) |
|
citation_modals = [] |
|
|
|
for div in soup.find_all("div"): |
|
header = div.find("head") |
|
if header is not None: |
|
section_number = header.get('n', "") |
|
section_id = header.text.replace(" ", "_") |
|
sections_list.append({'num': normalize_section(section_number), 'text': section_id}) |
|
sections_content += f"<section id='{section_id}'>" |
|
sections_content += f"<h2>{section_number} {header.text}</h2>" |
|
else: |
|
sections_content += f"<section id=''>" |
|
|
|
for i,paragraph in enumerate(div.find_all("p")): |
|
new_paragraph = "" |
|
for ii,element in enumerate(paragraph.contents): |
|
if isinstance(element, NavigableString): |
|
new_paragraph += element |
|
elif isinstance(element, Tag) and element.name == "ref" and element.get("target")!=None: |
|
ref_id = element.get("target").lstrip("#") |
|
if ref_id in bib.keys(): |
|
citation = f"""<span class="text-area" onclick="openDialog(event, '{ref_id}')">{element.text}</span>""" |
|
new_paragraph += citation |
|
cit_info = bib[ref_id] |
|
citation_modals.append(f"""<div id="{ref_id}" class="dialog"> |
|
<b>{element.text}</b><br> |
|
<b>Title:</b> {cit_info['title']}<br> |
|
<b>Authors:</b> {", ".join(cit_info['authors'])}<br> |
|
<b>Year:</b> {cit_info['year']}<br> |
|
<b>Journal:</b> {cit_info['journal']}<br> |
|
<b>DOI:</b> <a href="https://doi.org/{cit_info['doi']}">{cit_info['doi']} </a><br> |
|
<button class="close-button" onclick="closeDialog('{ref_id}')">Close</button> |
|
</div>""") |
|
else: |
|
new_paragraph += element.text |
|
|
|
sections_content += f"<p>{new_paragraph}</p>" |
|
raw_text += "\n" + paragraph.text |
|
|
|
sections_content += "</section>" |
|
return sections_list, sections_content |
|
|
|
def Get_Navigation(controls,): |
|
|
|
navigation = "<div class='sticky-content' style='max-height: 100%; overflow-y: auto;'>" + controls + " <h2> Navigation </h2>" |
|
for section in sections_list: |
|
no_tabs = section['num'].count(".") |
|
if no_tabs>0: |
|
left = str(20*no_tabs)+"px" |
|
else: |
|
left= "0px" |
|
navigation += f'<p style="margin-left: {left}; font-size: 10px;"><a href="#{section["text"]}">{section["num"]} {section["text"]}</a></p>' |
|
|
|
navigation += "</div>" |
|
|
|
|
|
|
|
def Get_Article_HTML(pdf): |
|
|
|
article = GParse_Paper(pdf) |
|
|
|
soup = BeautifulSoup(article, "xml") |
|
|
|
html = Get_Article_HTML(soup) |
|
|
|
try: |
|
document_title = soup.find("fileDesc").find("title").text |
|
except: |
|
document_title = "" |
|
|
|
html_top = Get_HTMLTop(document_title) |
|
sections_list, sections_content = Get_Sections(soup) |
|
controls = Get_Controls() |
|
navigation = Get_Navigation(controls, sections_list) |
|
|
|
|
|
html = style + "<body><article>" + html_top + sections_content + navigation + "n".join(citation_modals)+"</body></article>" |
|
|
|
with open("article_demo.html","w") as f: |
|
f.write(html) |
|
|
|
return html |
|
|
|
|
|
html = solara.reactive("<h1> Article PDF to HTML converter </h1>") |
|
|
|
@solara.component |
|
def Page(): |
|
solara.Style(app_style) |
|
|
|
def on_file(f: FileInfo): |
|
html.value = Get_Article_HTML(f["file_obj"].read()) |
|
|
|
solara.FileDrop(label="Drag and drop article pdf", on_file=on_file, lazy=True) |
|
|
|
solara.HTML(unsafe_innerHTML=html.value) |
|
|
|
|