File size: 2,402 Bytes
c6a6aba
 
707243e
c6a6aba
 
 
 
 
707243e
 
 
 
 
 
 
 
c6a6aba
707243e
 
 
 
 
 
 
5450db8
 
707243e
 
 
 
 
 
 
 
 
 
 
c6a6aba
707243e
c6a6aba
 
 
 
 
 
 
 
 
707243e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c6a6aba
 
707243e
d8786cb
707243e
 
 
 
 
 
735cb49
d8786cb
707243e
 
c6a6aba
ca189d8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import subprocess

subprocess.run(["pip", "install", "PyPDF2", "transformers", "bark", "gradio","soundfile","PyMuPDF","numpy"])

import PyPDF2
from transformers import pipeline
from bark import SAMPLE_RATE, generate_audio, preload_models
import gradio as gr
import os
import io 
import fitz
import tempfile
from PyPDF2 import PdfReader
import numpy as np
from tempfile import NamedTemporaryFile
import soundfile as sf

def readPDF(pdf_file_path):
   if not pdf_file_path.endswith(".pdf"):
        raise ValueError("Please upload a PDF file.")
  
   with open(pdf_file_path, 'rb') as file:
       pdf_reader = file.read()
   return pdf_reader


def summarize_and_convert_to_audio(pdf_reader, page):
   
    temp_file = tempfile.NamedTemporaryFile(delete=False)
    temp_file.write(pdf_reader)
    temp_file_path = temp_file.name

    # Use PyMuPDF to read the PDF content
    pdf_document = fitz.open(temp_file_path)
    
    print(page)    

        # Get the abstract page text
    abstract_page_text = pdf_document[int(page) - 1].get_text()

    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
    summary = summarizer(abstract_page_text, max_length=20, min_length=20)

    preload_models()

    text = summary[0]['summary_text']
    audio_array = generate_audio(text)

    #save temporary file audio to use it in the second step 
    
    with NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav_file:
      wav_file_path = temp_wav_file.name
      sf.write(wav_file_path, audio_array, SAMPLE_RATE)
    return wav_file_path


def read_and_speech(pdf_file,abstract_page):
  print(pdf_file)
  pdf_file_path= pdf_file.name
  print(pdf_file_path)
  page=abstract_page
  reader=readPDF(pdf_file_path)
  audio=summarize_and_convert_to_audio(reader,page)
  return audio;
    
# Define app name, app description, and examples
app_name = "From PDF to Speech"
app_description = "Convert text from a PDF file to audio. Upload a PDF file. We accept only PDF files with abstracts."


iface = gr.Interface(
    fn=read_and_speech,
    inputs=[
        gr.File(file_types=["pdf"], label="Upload PDF file"),
        gr.Textbox(label="Insert the page where the abstract is located")],
    outputs=gr.Audio(type="filepath"),
    title=app_name,
    description=app_description,
    examples=[
        ["Article.pdf",1],
    ],
    allow_flagging="never"
    )

iface.launch(share=True)