import os import PyPDF2 from PIL import Image import streamlit as st from transformers import T5ForConditionalGeneration,T5TokenizerFast model = T5ForConditionalGeneration.from_pretrained("t5-base") tokenizer = T5TokenizerFast.from_pretrained("t5-base") def read_pdf(pdf): reader=PyPDF2.PdfReader(pdf) text='' for page in reader.pages: text+=page.extract_text() # text_file_name = 'text.txt' # text_file_path = '/content/text.txt' # with open(text_file_path, 'w') as text_file: # text_file.write(text) return text def summarizer(text): inputs = tokenizer.encode("summarize: " + text,return_tensors="pt", max_length=1000,truncation=True) outputs = model.generate(inputs,max_length=1000, min_length=100,length_penalty=2.0, num_beams=4,early_stopping=True) summary = tokenizer.decode(outputs[0]) return summary st.title(':blue[Abstractive Summarizer]') st.header('by: _Team_ _Rare_ _species_') uploaded_file = st.file_uploader('Choose your .pdf file', type="pdf") if uploaded_file is not None: if st.button('Summarize Document'): with st.spinner("📚    Please wait while we produce a summary..."): text=read_pdf(uploaded_file) summary=summarizer(text) st.divider() st.markdown(summary, unsafe_allow_html=True) st.divider()