Spaces:
Sleeping
Sleeping
File size: 4,298 Bytes
6616d67 1adb2ef 6616d67 1adb2ef 6616d67 1adb2ef 6616d67 1adb2ef 6616d67 1adb2ef 6616d67 1adb2ef 6616d67 1adb2ef 6616d67 1adb2ef 6616d67 1adb2ef 6616d67 1adb2ef 6616d67 1adb2ef 6616d67 1adb2ef 6616d67 1adb2ef 6616d67 1adb2ef 6616d67 1adb2ef 6616d67 1adb2ef 6616d67 1adb2ef 47bffbc 2a65e96 f7c9274 2a65e96 6616d67 1adb2ef |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
# Here are the imports
import PyPDF2
import re
import torch
from transformers import pipeline
from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub
from fairseq.models.text_to_speech.hub_interface import TTSHubInterface
import gradio as gr
import io
import numpy as np
import soundfile as sf
import tempfile
# Here is the code
# Function to extract and clean abstract from PDF
def extract_and_clean_abstract(uploaded_file):
if uploaded_file is None:
return "No file uploaded."
# Read the file using its temporary file path
with open(uploaded_file.name, 'rb') as file:
reader = PyPDF2.PdfReader(file)
full_text = ""
for page in reader.pages:
full_text += page.extract_text()
# Find the abstract
pattern = r"(Abstract|ABSTRACT|abstract)(.*?)(Introduction|INTRODUCTION|introduction|1|Keywords|KEYWORDS|keywords)"
match = re.search(pattern, full_text, re.DOTALL)
if match:
abstract = match.group(2).strip()
else:
return "Abstract not found."
# Clean the abstract
cleaned_abstract = abstract.replace('\n', ' ').replace('- ', '')
return cleaned_abstract
# Function to summarize text
def summarize_text(text):
# Initialize the summarization pipeline with the summarization model
summarizer = pipeline(
"summarization",
"pszemraj/led-base-book-summary",
device=0 if torch.cuda.is_available() else -1,
)
# Generate the summary
result = summarizer(
text,
min_length=8,
max_length=25,
no_repeat_ngram_size=3,
encoder_no_repeat_ngram_size=3,
repetition_penalty=3.5,
num_beams=4,
do_sample=False,
early_stopping=True,
)
# Extract the first sentence from the summary
first_sentence = re.split(r'(?<=[.:;!?])\s', result[0]['summary_text'])[0]
return first_sentence
# Function for text-to-speech
def text_to_speech(text):
# Check if CUDA is available and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Load the TTS model and task from Hugging Face Hub
models, cfg, task = load_model_ensemble_and_task_from_hf_hub(
"facebook/fastspeech2-en-ljspeech", # Or another TTS model of your choice
arg_overrides={"vocoder": "hifigan", "fp16": False}
)
# Ensure the model is on the correct device
model = models[0].to(device)
# Update the config with the data config from the task
TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg)
# Build the generator
generator = task.build_generator([model], cfg)
# Get the model input from the text
sample = TTSHubInterface.get_model_input(task, text)
sample["net_input"]["src_tokens"] = sample["net_input"]["src_tokens"].to(device)
sample["net_input"]["src_lengths"] = sample["net_input"]["src_lengths"].to(device)
# Generate the waveform
wav, rate = TTSHubInterface.get_prediction(task, model, generator, sample)
# Move the waveform to CPU if it's on GPU
if wav.is_cuda:
wav = wav.cpu()
# Write the waveform to a temporary file and return the file path
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
sf.write(tmp_file.name, wav.numpy(), rate)
return tmp_file.name
def process_pdf(uploaded_file):
"""
Process the uploaded PDF file to extract, summarize the abstract, and convert it to speech.
"""
abstract = extract_and_clean_abstract(uploaded_file)
summary = summarize_text(abstract)
audio_output = text_to_speech(summary)
return audio_output
# Create Gradio interface
iface = gr.Interface(
fn=process_pdf,
inputs=gr.File(label="Upload PDF"),
outputs=gr.Audio(label="Audio Summary"),
title="PDF Abstract Summary to Speech",
description="Upload only a PDF file that has an abstract. The model will extract its abstract, summarize it, and converts the summary to speech.",
examples=[["Article 11 Hidden Technical Debt in Machine Learning Systems.pdf"], ["Article 7 Efficient Estimation of Word Representations in Vector Space.pdf"],["Article 6 BloombergGPT_ A Large Language Model for Finance.pdf"]]
)
# Run the Gradio app
iface.launch()
|