Spaces:
Runtime error
Runtime error
import PyPDF2 | |
import streamlit as st | |
from dotenv import load_dotenv | |
from transformers import pipeline | |
def retrieve_pdf_text(pdf_file): | |
pdf_reader = PyPDF2.PdfReader(pdf_file) | |
text = "" | |
for page in pdf_reader.pages: | |
text += page.extract_text() | |
return text | |
def main(): | |
load_dotenv() | |
st.set_page_config(page_title='Document Summarizer', page_icon=':books:') | |
st.header("Summarize a PDF") | |
hf_name = "pszemraj/led-base-book-summary" | |
pdf_file = st.file_uploader("Upload a PDF file with text", type=["pdf"]) | |
length = st.slider('Max summary length', 0, 3000, 1000) | |
# if a pdf file is uploaded | |
if pdf_file: | |
raw_text = retrieve_pdf_text(pdf_file) | |
if st.button("Run"): | |
with st.spinner("Summarizing.."): | |
summarizer = pipeline("summarization", hf_name) | |
result = summarizer( | |
raw_text, | |
min_length=8, | |
max_length=length, | |
no_repeat_ngram_size=3, | |
encoder_no_repeat_ngram_size=3, | |
repetition_penalty=3.5, | |
num_beams=4, | |
do_sample=False, | |
early_stopping=True, | |
) | |
st.write(result[0]["summary_text"]) | |
if __name__ == '__main__': | |
main() |