Chris Finlayson
Update space name
74d23f2
raw
history blame contribute delete
908 Bytes
import PyPDF2
from transformers import pipeline
import gradio as gr
# Load the summarization pipeline
summarizer = pipeline("summarization", model="t5-base", tokenizer="t5-base", framework="pt")
def load_pdf(file, max_length, min_length, do_sample):
pdf = PyPDF2.PdfReader(file)
text = ""
for page in pdf.pages:
text += page.extract_text()
# Summarize the text
summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=do_sample)
# Return the summary
return summary[0]['summary_text']
# return summarize_pdf(pdf)
inputs = [
gr.File(label="Upload PDF"),
gr.Slider(500, 1500, value=1000, label="Max Length"),
gr.Slider(30, 500, value=300, label="Min Length"),
gr.Checkbox(label="Do Sample", value=False)
]
iface = gr.Interface(fn=load_pdf, inputs=inputs, outputs="text", title="PDF auto-summarise")
iface.launch()