Spaces:

NBayer
/

Streamlit_app_paper

Runtime error

App Files Files Community

NBayer commited on Mar 12, 2023

Commit

c42ad4e

•

1 Parent(s): e1724bf

Upload main.py

Browse files

Files changed (1) hide show

main.py +67 -0

main.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import streamlit as st
+from streamlit.components.v1 import html
+import os
+import PyPDF2
+def get_pdf_text(pdf_path):
+    # creating a pdf file object
+    pdfFileObj = open(pdf_path, 'rb')
+    # creating a pdf reader object
+    pdf_reader = PyPDF2.PdfReader(pdfFileObj)
+    # extract text
+    total_text_list = []
+    for i in range(len(pdf_reader.pages)):
+        page_text = pdf_reader.pages[i].extract_text()
+        total_text_list.append(page_text)
+    pdf_text = " ".join(total_text_list)
+    pdfFileObj.close()
+    return pdf_text
+tab_general_topics, tab_your_paper = st.tabs(["Research topics", "Summarize your paper(s)"])
+with tab_general_topics:
+    html("", height=10)
+    st.header("See the status of a research topic through a summary of the most cited papers")
+    st.selectbox("Select a research topic", ["Artificial Intelligence", "Sustainability", "Cooking"])
+with tab_your_paper:
+    html("", height=10)
+    st.markdown("""
+### Simply upload one or multiple PDFs and we summarize the content for you!
+    """)
+    pdf_files = st.file_uploader("Upload your paper as a pdf", type=[".pdf"], accept_multiple_files=True, help="You can summarize one or also multiple papers at once. The file format needs to be a pdf.")
+    if pdf_files:
+        recently_added = []
+        for pdf in pdf_files:
+            # Saving the files
+            pdf_data = pdf.getvalue()
+            pdf_path = os.path.join("pdfs", pdf.name)
+            with open(pdf_path, "wb") as f:
+                f.write(pdf_data)
+                recently_added.append(pdf_path)
+        pdfs_content_list = []
+        print("*****", recently_added)
+        for recent_pdf in recently_added:
+            # Reading the pdf files
+            pdf_content = get_pdf_text(recent_pdf)
+            print("**", pdf_content)
+            pdfs_content_list.append(pdf_content)
+            # Delete the files
+            os.remove(recent_pdf)
+        print("************************", len(pdfs_content_list))
+        print(pdfs_content_list[0][:20], pdfs_content_list[1][:20])
+        all_text_together = " ".join(pdfs_content_list)
+        st.write(all_text_together)