Spaces:
Sleeping
Sleeping
# Here are the imports | |
import PyPDF2 | |
import re | |
import torch | |
from transformers import pipeline | |
from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub | |
from fairseq.models.text_to_speech.hub_interface import TTSHubInterface | |
import gradio as gr | |
import io | |
import numpy as np | |
import soundfile as sf | |
import tempfile | |
# Here is the code | |
# Function to extract and clean abstract from PDF | |
def extract_and_clean_abstract(uploaded_file): | |
if uploaded_file is None: | |
return "No file uploaded." | |
# Read the file using its temporary file path | |
with open(uploaded_file.name, 'rb') as file: | |
reader = PyPDF2.PdfReader(file) | |
full_text = "" | |
for page in reader.pages: | |
full_text += page.extract_text() | |
# Find the abstract | |
pattern = r"(Abstract|ABSTRACT|abstract)(.*?)(Introduction|INTRODUCTION|introduction|1|Keywords|KEYWORDS|keywords)" | |
match = re.search(pattern, full_text, re.DOTALL) | |
if match: | |
abstract = match.group(2).strip() | |
else: | |
return "Abstract not found." | |
# Clean the abstract | |
cleaned_abstract = abstract.replace('\n', ' ').replace('- ', '') | |
return cleaned_abstract | |
# Function to summarize text | |
def summarize_text(text): | |
# Initialize the summarization pipeline with the summarization model | |
summarizer = pipeline( | |
"summarization", | |
"pszemraj/led-base-book-summary", | |
device=0 if torch.cuda.is_available() else -1, | |
) | |
# Generate the summary | |
result = summarizer( | |
text, | |
min_length=8, | |
max_length=25, | |
no_repeat_ngram_size=3, | |
encoder_no_repeat_ngram_size=3, | |
repetition_penalty=3.5, | |
num_beams=4, | |
do_sample=False, | |
early_stopping=True, | |
) | |
# Extract the first sentence from the summary | |
first_sentence = re.split(r'(?<=[.:;!?])\s', result[0]['summary_text'])[0] | |
return first_sentence | |
# Function for text-to-speech | |
def text_to_speech(text): | |
# Check if CUDA is available and set the device accordingly | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
# Load the TTS model and task from Hugging Face Hub | |
models, cfg, task = load_model_ensemble_and_task_from_hf_hub( | |
"facebook/fastspeech2-en-ljspeech", # Or another TTS model of your choice | |
arg_overrides={"vocoder": "hifigan", "fp16": False} | |
) | |
# Ensure the model is on the correct device | |
model = models[0].to(device) | |
# Update the config with the data config from the task | |
TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg) | |
# Build the generator | |
generator = task.build_generator([model], cfg) | |
# Get the model input from the text | |
sample = TTSHubInterface.get_model_input(task, text) | |
sample["net_input"]["src_tokens"] = sample["net_input"]["src_tokens"].to(device) | |
sample["net_input"]["src_lengths"] = sample["net_input"]["src_lengths"].to(device) | |
# Generate the waveform | |
wav, rate = TTSHubInterface.get_prediction(task, model, generator, sample) | |
# Move the waveform to CPU if it's on GPU | |
if wav.is_cuda: | |
wav = wav.cpu() | |
# Write the waveform to a temporary file and return the file path | |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file: | |
sf.write(tmp_file.name, wav.numpy(), rate) | |
return tmp_file.name | |
def process_pdf(uploaded_file): | |
""" | |
Process the uploaded PDF file to extract, summarize the abstract, and convert it to speech. | |
""" | |
abstract = extract_and_clean_abstract(uploaded_file) | |
summary = summarize_text(abstract) | |
audio_output = text_to_speech(summary) | |
return audio_output | |
# Create Gradio interface | |
iface = gr.Interface( | |
fn=process_pdf, | |
inputs=gr.File(label="Upload PDF"), | |
outputs=gr.Audio(label="Audio Summary"), | |
title="PDF Abstract Summary to Speech", | |
description="Upload only a PDF file that has an abstract. The model will extract its abstract, summarize it, and converts the summary to speech." | |
examples=[["Article 11 Hidden Technical Debt in Machine Learning Systems.pdf"], ["Article 7 Efficient Estimation of Word Representations in Vector Space.pdf"],["Article 6 BloombergGPT_ A Large Language Model for Finance.pdf"]] | |
) | |
# Run the Gradio app | |
iface.launch() | |