File size: 1,623 Bytes
a582605
2753e83
a582605
 
2753e83
 
a582605
 
5f6f8b5
a582605
 
 
1647f6a
a582605
 
5f6f8b5
a582605
 
 
 
 
 
 
 
5f6f8b5
 
 
8c41423
37c818f
a582605
 
 
 
 
 
 
 
 
4c3923d
a582605
 
 
5e46dcf
a582605
 
37c818f
5e46dcf
 
a582605
 
4c3923d
a582605
b8e363b
9ce669c
2d4f77c
c2e2b8a
 
 
a582605
 
4c3923d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import gradio as gr
import torch
import PyPDF2
from transformers import pipeline 
import numpy
import scipy
from gtts import gTTS
from io import BytesIO
from transformers import BartTokenizer

def extract_text(pdf_file):
    pdfReader = PyPDF2.PdfReader(pdf_file)
    pageObj = pdfReader.pages[0]
    return pageObj.extract_text()


def summarize_text(text):
    sentences = text.split(". ")
    for i, sentence in enumerate(sentences):
        if "Abstract" in sentence:
            start = i + 1
            end = start + 3
            break
    abstract = ". ".join(sentences[start:end+1]) 

    tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn", tokenizer=tokenizer) 
    summary = summarizer(abstract, max_length=30, min_length=30,
                         do_sample=False)
    return summary[0]['summary_text']

def text_to_audio(text):
    tts = gTTS(text, lang='en') 
    buffer = BytesIO()
    tts.write_to_fp(buffer)
    buffer.seek(0)  
    return buffer.read()

def audio_pdf(pdf_file):
    text = extract_text(pdf_file)
    summary = summarize_text(text) 
    audio = text_to_audio(summary)
    return summary, audio

inputs = gr.File() 
summary_text = gr.Text()
audio_summary = gr.Audio()


iface = gr.Interface(
    fn=audio_pdf,
    inputs=inputs,
    outputs=[summary_text,audio_summary],
    title="PDF Audio Summarizer 📻",
    description="App that converts an abstract into audio",
    examples=["Attention_is_all_you_need.pdf", 
              "ImageNet_Classification.pdf"
             ]
)

iface.launch()