Spaces:
Sleeping
Sleeping
import os | |
import torch | |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer | |
import PyPDF2 | |
import sounddevice as sd | |
import numpy as np | |
from gtts import gTTS | |
from io import BytesIO | |
import gradio as gr | |
def load_quantized_model(model_name): | |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
# Quantize the model | |
model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8) | |
model.eval() | |
return model, tokenizer | |
def pdf_to_text(pdf_bytes): | |
pdf_file_obj = BytesIO(pdf_bytes) | |
pdf_reader = PyPDF2.PdfFileReader(pdf_file_obj) | |
text = '' | |
for page_num in range(pdf_reader.numPages): | |
page_obj = pdf_reader.getPage(page_num) | |
text += page_obj.extractText() | |
pdf_file_obj.close() | |
return text | |
def generate_audio(model, tokenizer, text): | |
input_ids = torch.tensor(tokenizer.encode(text, return_tensors="pt")).cuda() | |
with torch.no_grad(): | |
outputs = model.generate(input_ids, max_length=500, pad_token_id=tokenizer.eos_token_id) | |
output_text = tokenizer.decode(outputs[0]) | |
return output_text | |
def save_and_play_audio(text): | |
tts = gTTS(text=text, lang='en') | |
output_file = "output.mp3" | |
tts.save(output_file) | |
data, fs = sd.default.read_audio(output_file) | |
sd.play(data, fs) | |
sd.wait() | |
return output_file | |
def main(pdf_file): | |
# Load the quantized model | |
model, tokenizer = load_quantized_model("microsoft/speecht5_tts") | |
# Move the model to the GPU if available | |
if torch.cuda.is_available(): | |
model.cuda() | |
# Convert the uploaded PDF file to text | |
text = pdf_to_text(pdf_file.read()) | |
# Generate audio from the text | |
audio_text = generate_audio(model, tokenizer, text) | |
# Save and play the audio | |
output_file = save_and_play_audio(audio_text) | |
return {"output_file": output_file} | |
if __name__ == "__main__": | |
app = gr.Interface(main, inputs=gr.inputs.File(type="pdf"), outputs="text") | |
app.launch() | |