Xuratron's picture
Update app.py
2a65e96
raw
history blame
4.3 kB
# Here are the imports
import PyPDF2
import re
import torch
from transformers import pipeline
from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub
from fairseq.models.text_to_speech.hub_interface import TTSHubInterface
import gradio as gr
import io
import numpy as np
import soundfile as sf
import tempfile
# Here is the code
# Function to extract and clean abstract from PDF
def extract_and_clean_abstract(uploaded_file):
if uploaded_file is None:
return "No file uploaded."
# Read the file using its temporary file path
with open(uploaded_file.name, 'rb') as file:
reader = PyPDF2.PdfReader(file)
full_text = ""
for page in reader.pages:
full_text += page.extract_text()
# Find the abstract
pattern = r"(Abstract|ABSTRACT|abstract)(.*?)(Introduction|INTRODUCTION|introduction|1|Keywords|KEYWORDS|keywords)"
match = re.search(pattern, full_text, re.DOTALL)
if match:
abstract = match.group(2).strip()
else:
return "Abstract not found."
# Clean the abstract
cleaned_abstract = abstract.replace('\n', ' ').replace('- ', '')
return cleaned_abstract
# Function to summarize text
def summarize_text(text):
# Initialize the summarization pipeline with the summarization model
summarizer = pipeline(
"summarization",
"pszemraj/led-base-book-summary",
device=0 if torch.cuda.is_available() else -1,
)
# Generate the summary
result = summarizer(
text,
min_length=8,
max_length=25,
no_repeat_ngram_size=3,
encoder_no_repeat_ngram_size=3,
repetition_penalty=3.5,
num_beams=4,
do_sample=False,
early_stopping=True,
)
# Extract the first sentence from the summary
first_sentence = re.split(r'(?<=[.:;!?])\s', result[0]['summary_text'])[0]
return first_sentence
# Function for text-to-speech
def text_to_speech(text):
# Check if CUDA is available and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Load the TTS model and task from Hugging Face Hub
models, cfg, task = load_model_ensemble_and_task_from_hf_hub(
"facebook/fastspeech2-en-ljspeech", # Or another TTS model of your choice
arg_overrides={"vocoder": "hifigan", "fp16": False}
)
# Ensure the model is on the correct device
model = models[0].to(device)
# Update the config with the data config from the task
TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg)
# Build the generator
generator = task.build_generator([model], cfg)
# Get the model input from the text
sample = TTSHubInterface.get_model_input(task, text)
sample["net_input"]["src_tokens"] = sample["net_input"]["src_tokens"].to(device)
sample["net_input"]["src_lengths"] = sample["net_input"]["src_lengths"].to(device)
# Generate the waveform
wav, rate = TTSHubInterface.get_prediction(task, model, generator, sample)
# Move the waveform to CPU if it's on GPU
if wav.is_cuda:
wav = wav.cpu()
# Write the waveform to a temporary file and return the file path
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
sf.write(tmp_file.name, wav.numpy(), rate)
return tmp_file.name
def process_pdf(uploaded_file):
"""
Process the uploaded PDF file to extract, summarize the abstract, and convert it to speech.
"""
abstract = extract_and_clean_abstract(uploaded_file)
summary = summarize_text(abstract)
audio_output = text_to_speech(summary)
return audio_output
# Create Gradio interface
iface = gr.Interface(
fn=process_pdf,
inputs=gr.File(label="Upload PDF"),
outputs=gr.Audio(label="Audio Summary"),
title="PDF Abstract Summary to Speech",
description="Upload only a PDF file that has an abstract. The model will extract its abstract, summarize it, and converts the summary to speech."
examples=[["Article 11 Hidden Technical Debt in Machine Learning Systems.pdf"], ["Article 7 Efficient Estimation of Word Representations in Vector Space.pdf"],["Article 6 BloombergGPT_ A Large Language Model for Finance.pdf"]]
)
# Run the Gradio app
iface.launch()