import gradio as gr import torch import PyPDF2 from transformers import pipeline import numpy import scipy from gtts import gTTS from io import BytesIO from transformers import BartTokenizer def extract_text(pdf_file): pdfReader = PyPDF2.PdfReader(pdf_file) pageObj = pdfReader.pages[0] return pageObj.extract_text() def summarize_text(text): sentences = text.split(". ") for i, sentence in enumerate(sentences): if "Abstract" in sentence: start = i + 1 end = start + 3 break abstract = ". ".join(sentences[start:end+1]) tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn") summarizer = pipeline("summarization", model="facebook/bart-large-cnn", tokenizer=tokenizer) summary = summarizer(abstract, max_length=30, min_length=30, do_sample=False) return summary[0]['summary_text'] def text_to_audio(text): tts = gTTS(text, lang='en') buffer = BytesIO() tts.write_to_fp(buffer) buffer.seek(0) return buffer.read() def audio_pdf(pdf_file): text = extract_text(pdf_file) summary = summarize_text(text) audio = text_to_audio(summary) return summary, audio inputs = gr.File() summary_text = gr.Text() audio_summary = gr.Audio() iface = gr.Interface( fn=audio_pdf, inputs=inputs, outputs=[summary_text,audio_summary], title="PDF Audio Summarizer 📻", description="App that converts an abstract into audio", examples=["Attention_is_all_you_need.pdf", "ImageNet_Classification.pdf" ] ) iface.launch()