Chris Finlayson commited on
Commit
1c3b33f
1 Parent(s): 72d404b

Add pytorch

Browse files
Files changed (2) hide show
  1. application.py +31 -0
  2. requirements.txt +2 -1
application.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import PyPDF2
2
+ from transformers import pipeline
3
+ import gradio as gr
4
+
5
+ # Load the summarization pipeline
6
+ summarizer = pipeline("summarization", model="t5-base", tokenizer="t5-base", framework="pt")
7
+
8
+ def load_pdf(file, max_length, min_length, do_sample):
9
+ pdf = PyPDF2.PdfReader(file)
10
+ text = ""
11
+ for page in pdf.pages:
12
+ text += page.extract_text()
13
+ # Summarize the text
14
+
15
+ summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=do_sample)
16
+ # Return the summary
17
+ return summary[0]['summary_text']
18
+ # return summarize_pdf(pdf)
19
+
20
+
21
+ inputs = [
22
+ gr.File(label="Upload PDF"),
23
+ gr.Slider(500, 1500, value=1000, label="Max Length"),
24
+ gr.Slider(30, 500, value=300, label="Min Length"),
25
+ gr.Checkbox(label="Do Sample", value=False)
26
+ ]
27
+
28
+ iface = gr.Interface(fn=load_pdf, inputs=inputs, outputs="text", title="PDF summariser")
29
+ iface.launch()
30
+
31
+
requirements.txt CHANGED
@@ -1,2 +1,3 @@
1
  pypdf2
2
- transformers
 
 
1
  pypdf2
2
+ transformers
3
+ pytorch