_demo42 / app.py
varl42's picture
Update app.py
c2e2b8a
raw
history blame
No virus
1.48 kB
import gradio as gr
import torch
import PyPDF2
from transformers import pipeline
import numpy
import scipy
from gtts import gTTS
from io import BytesIO
def extract_text(pdf_file):
pdfReader = PyPDF2.PdfReader(pdf_file)
pageObj = pdfReader.pages[0]
return pageObj.extract_text()
def summarize_text(text):
sentences = text.split(". ")
for i, sentence in enumerate(sentences):
if "Abstract" in sentence:
start = i + 1
end = start + 3
break
abstract = ". ".join(sentences[start:end+1])
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
summary = summarizer(abstract, max_length=50, min_length=30,
do_sample=False)
return summary[0]['summary_text']
def text_to_audio(text):
tts = gTTS(text, lang='en')
buffer = BytesIO()
tts.write_to_fp(buffer)
buffer.seek(0)
return buffer.read()
def audio_pdf(pdf_file):
text = extract_text(pdf_file)
summary = summarize_text(text)
audio = text_to_audio(summary)
return summary, audio
inputs = gr.File()
summary_text = gr.Text()
audio_summary = gr.Audio()
iface = gr.Interface(
fn=audio_pdf,
inputs=inputs,
outputs=[summary_text,audio_summary],
title="PDF Audio Summarizer ",
description="App to turn an abstract into audio",
examples=["Attention_is_all_you_need.pdf",
"ImageNet_Classification.pdf"
]
)
iface.launch()