MusIre's picture
Update app.py
ca189d8
raw
history blame
2.4 kB
import subprocess
subprocess.run(["pip", "install", "PyPDF2", "transformers", "bark", "gradio","soundfile","PyMuPDF","numpy"])
import PyPDF2
from transformers import pipeline
from bark import SAMPLE_RATE, generate_audio, preload_models
import gradio as gr
import os
import io
import fitz
import tempfile
from PyPDF2 import PdfReader
import numpy as np
from tempfile import NamedTemporaryFile
import soundfile as sf
def readPDF(pdf_file_path):
if not pdf_file_path.endswith(".pdf"):
raise ValueError("Please upload a PDF file.")
with open(pdf_file_path, 'rb') as file:
pdf_reader = file.read()
return pdf_reader
def summarize_and_convert_to_audio(pdf_reader, page):
temp_file = tempfile.NamedTemporaryFile(delete=False)
temp_file.write(pdf_reader)
temp_file_path = temp_file.name
# Use PyMuPDF to read the PDF content
pdf_document = fitz.open(temp_file_path)
print(page)
# Get the abstract page text
abstract_page_text = pdf_document[int(page) - 1].get_text()
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
summary = summarizer(abstract_page_text, max_length=20, min_length=20)
preload_models()
text = summary[0]['summary_text']
audio_array = generate_audio(text)
#save temporary file audio to use it in the second step
with NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav_file:
wav_file_path = temp_wav_file.name
sf.write(wav_file_path, audio_array, SAMPLE_RATE)
return wav_file_path
def read_and_speech(pdf_file,abstract_page):
print(pdf_file)
pdf_file_path= pdf_file.name
print(pdf_file_path)
page=abstract_page
reader=readPDF(pdf_file_path)
audio=summarize_and_convert_to_audio(reader,page)
return audio;
# Define app name, app description, and examples
app_name = "From PDF to Speech"
app_description = "Convert text from a PDF file to audio. Upload a PDF file. We accept only PDF files with abstracts."
iface = gr.Interface(
fn=read_and_speech,
inputs=[
gr.File(file_types=["pdf"], label="Upload PDF file"),
gr.Textbox(label="Insert the page where the abstract is located")],
outputs=gr.Audio(type="filepath"),
title=app_name,
description=app_description,
examples=[
["Article.pdf",1],
],
allow_flagging="never"
)
iface.launch(share=True)