import streamlit as st from streamlit.components.v1 import html import os import PyPDF2 st.write(os.environ("TEST")) def get_pdf_text(pdf_path): # creating a pdf file object pdfFileObj = open(pdf_path, 'rb') # creating a pdf reader object pdf_reader = PyPDF2.PdfReader(pdfFileObj) # extract text total_text_list = [] for i in range(len(pdf_reader.pages)): page_text = pdf_reader.pages[i].extract_text() total_text_list.append(page_text) pdf_text = " ".join(total_text_list) pdfFileObj.close() return pdf_text tab_general_topics, tab_your_paper = st.tabs(["Research topics", "Summarize your paper(s)"]) with tab_general_topics: html("", height=10) st.header("See the status of a research topic through a summary of the most cited papers") st.selectbox("Select a research topic", ["Artificial Intelligence", "Sustainability", "Cooking"]) with tab_your_paper: html("", height=10) st.markdown(""" ### Simply upload one or multiple PDFs and we summarize the content for you! """) pdf_files = st.file_uploader("Upload your paper as a pdf", type=[".pdf"], accept_multiple_files=True, help="You can summarize one or also multiple papers at once. The file format needs to be a pdf.") if pdf_files: recently_added = [] for pdf in pdf_files: # Saving the files pdf_data = pdf.getvalue() pdf_path = os.path.join(pdf.name) with open(pdf_path, "wb") as f: f.write(pdf_data) recently_added.append(pdf_path) pdfs_content_list = [] for recent_pdf in recently_added: # Reading the pdf files pdf_content = get_pdf_text(recent_pdf) pdfs_content_list.append(pdf_content) # Delete the files os.remove(recent_pdf) all_text_together = " ".join(pdfs_content_list) st.write(all_text_together)