Spaces:
Runtime error
Runtime error
import streamlit as st | |
import time | |
from datetime import datetime | |
from transformers import SpeechT5Processor, SpeechT5ForSpeechToSpeech, SpeechT5HifiGan, SpeechT5ForTextToSpeech | |
import numpy as np | |
import torch | |
from io import StringIO | |
import soundfile as sf | |
# Improved Styling | |
def local_css(file_name): | |
with open(file_name) as f: | |
st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True) | |
local_css("style.css") # Assuming a CSS file named 'style.css' in the same directory | |
# Streamlined Layout | |
st.title("Text-to-Voice Conversion") | |
st.markdown("Convert your text to speech using advanced AI models.") | |
# Load models outside of function calls for efficiency | |
def load_models(): | |
model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts") | |
processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts") | |
vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan") | |
return model, processor, vocoder | |
model, processor, vocoder = load_models() | |
# Load speaker embeddings | |
speaker_embeddings = np.load("cmu_us_slt_arctic-wav-arctic_a0508.npy") | |
return torch.tensor(speaker_embeddings).unsqueeze(0) | |
speaker_embeddings = get_speaker_embeddings() | |
# Text Input | |
text = st.text_area("Type your text or upload a text file below.") | |
# Function to convert text to speech | |
def text_to_speech(text): | |
inputs = processor(text=text, return_tensors="pt") | |
spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings) | |
with torch.no_grad(): | |
speech = vocoder(spectrogram) | |
sf.write("speech.wav", speech.numpy(), samplerate=16000) | |
return "speech.wav" | |
# Convert Button | |
if st.button("Convert"): | |
if text: | |
audio_path = text_to_speech(text) | |
audio_file = open(audio_path, 'rb') | |
audio_bytes = audio_file.read() | |
st.audio(audio_bytes, format='audio/wav') | |
else: | |
st.error("Please enter some text to convert.") | |
# File Uploader | |
uploaded_file = st.file_uploader("Upload your text file here", type=['txt']) | |
if uploaded_file is not None: | |
text = uploaded_file.getvalue().decode("utf-8") | |
audio_path = text_to_speech(text) | |
audio_file = open(audio_path, 'rb') | |
audio_bytes = audio_file.read() | |
st.audio(audio_bytes, format='audio/wav') | |