Spaces:
Running
Running
import gradio as gr | |
import torch | |
import PyPDF2 | |
from transformers import pipeline | |
import numpy | |
import scipy | |
from gtts import gTTS | |
from io import BytesIO | |
from transformers import BartTokenizer | |
def extract_text(pdf_file): | |
pdfReader = PyPDF2.PdfReader(pdf_file) | |
pageObj = pdfReader.pages[0] | |
return pageObj.extract_text() | |
def summarize_text(text): | |
sentences = text.split(". ") | |
for i, sentence in enumerate(sentences): | |
if "Abstract" in sentence: | |
start = i + 1 | |
end = start + 3 | |
break | |
abstract = ". ".join(sentences[start:end+1]) | |
tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn") | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn", tokenizer=tokenizer) | |
summary = summarizer(abstract, max_length=50, min_length=50, | |
do_sample=False) | |
return summary[0]['summary_text'] | |
def text_to_audio(text): | |
tts = gTTS(text, lang='en') | |
buffer = BytesIO() | |
tts.write_to_fp(buffer) | |
buffer.seek(0) | |
return buffer.read() | |
def audio_pdf(pdf_file): | |
text = extract_text(pdf_file) | |
summary = summarize_text(text) | |
audio = text_to_audio(summary) | |
return summary, audio | |
inputs = gr.File() | |
summary_text = gr.Text() | |
audio_summary = gr.Audio() | |
iface = gr.Interface( | |
fn=audio_pdf, | |
inputs=inputs, | |
outputs=[summary_text,audio_summary], | |
title="PDF Audio Summarizer 📻", | |
description="App that converts an abstract into audio", | |
examples=["Attention_is_all_you_need.pdf", | |
"ImageNet_Classification.pdf" | |
] | |
) | |
iface.launch() |