sdg-policy-tracing / analyse_site.py
jonas's picture
Update analyse_site.py
cf42e81
raw
history blame
1.31 kB
import streamlit as st
import glob, os, sys; sys.path.append('/src')
#import helper
from src import preprocessing as pre
from src import cleaning as clean
def app():
# Sidebar
st.sidebar.title('Analyse Policy Document')
# Container
with st.container():
st.markdown("<h1 style='text-align: center; color: black;'>SDSN X GIZ Policy Tracing</h1>",
unsafe_allow_html=True)
file = st.file_uploader('Upload PDF File', type=['pdf', 'docx', 'txt'])
if file is not None:
st.write("Filename: ", file.name)
# text = []
# with pdfplumber.open(file) as pdf:
# for page in pdf.pages:
# text.append(page.extract_text())
# text_str = ' '.join([page for page in text])
# st.write('Number of pages:',len(pdf.pages))
# load document
docs = pre.load_document(file)
# preprocess document
docs_processed, df, all_text = clean.preprocessing(docs)
st.write('... ')
else:
st.write(' ')
st.write(' ')
st.markdown("<h3 style='text-align: center; color: black;'>no PDF uploaded ...</h3>",
unsafe_allow_html=True)