varl42 commited on
Commit
a582605
1 Parent(s): 369e90f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -0
app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ import PyPDF2
4
+ from transformers import pipeline
5
+ from gtts import gTTS
6
+ from io import BytesIO
7
+
8
+ def extract_text(pdf_file):
9
+ pdfReader = PyPDF2.PdfReader(pdf_file)
10
+ pageObj = pdfReader.pages[0]
11
+ return pageObj.extract_text()
12
+
13
+ def summarize_text(text):
14
+ sentences = text.split(". ")
15
+ for i, sentence in enumerate(sentences):
16
+ if "Abstract" in sentence:
17
+ start = i + 1
18
+ end = start + 3
19
+ break
20
+ abstract = ". ".join(sentences[start:end+1])
21
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
22
+ summary = summarizer(abstract, max_length=50, min_length=50)
23
+ return summary[0]['summary_text']
24
+
25
+ def text_to_audio(text):
26
+ tts = gTTS(text, lang='en')
27
+ buffer = BytesIO()
28
+ tts.write_to_fp(buffer)
29
+ buffer.seek(0)
30
+ return buffer.read()
31
+
32
+ def summarize_pdf(pdf_file):
33
+ text = extract_text(pdf_file)
34
+ summary = summarize_text(text)
35
+ audio = text_to_audio(summary)
36
+ return audio
37
+
38
+ inputs = gr.File()
39
+ audio_summary = gr.Audio()
40
+
41
+ iface = gr.Interface(
42
+ fn=summarize_pdf,
43
+ inputs=inputs,
44
+ outputs=audio_summary,
45
+ title="PDF Summarizer"
46
+ )
47
+
48
+ iface.launch()