Spaces:

Nag189
/

Text-to-Voice

Runtime error

App Files Files Community

Text-to-Voice / app.py

Nag189

Update app.py

587b007 about 1 year ago

raw

history blame

2.31 kB

	import streamlit as st
	import time
	from datetime import datetime
	from transformers import SpeechT5Processor, SpeechT5ForSpeechToSpeech, SpeechT5HifiGan, SpeechT5ForTextToSpeech
	import numpy as np
	import torch
	from io import StringIO
	import soundfile as sf

	# Improved Styling
	def local_css(file_name):
	with open(file_name) as f:
	st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)

	local_css("style.css") # Assuming a CSS file named 'style.css' in the same directory

	# Streamlined Layout
	st.title("Text-to-Voice Conversion")
	st.markdown("Convert your text to speech using advanced AI models.")

	# Load models outside of function calls for efficiency
	@st.cache_data
	def load_models():
	model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
	processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
	vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
	return model, processor, vocoder

	model, processor, vocoder = load_models()

	# Load speaker embeddings
	@st.cache_data
	speaker_embeddings = np.load("cmu_us_slt_arctic-wav-arctic_a0508.npy")
	return torch.tensor(speaker_embeddings).unsqueeze(0)

	speaker_embeddings = get_speaker_embeddings()

	# Text Input
	text = st.text_area("Type your text or upload a text file below.")

	# Function to convert text to speech
	def text_to_speech(text):
	inputs = processor(text=text, return_tensors="pt")
	spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings)
	with torch.no_grad():
	speech = vocoder(spectrogram)
	sf.write("speech.wav", speech.numpy(), samplerate=16000)
	return "speech.wav"

	# Convert Button
	if st.button("Convert"):
	if text:
	audio_path = text_to_speech(text)
	audio_file = open(audio_path, 'rb')
	audio_bytes = audio_file.read()
	st.audio(audio_bytes, format='audio/wav')
	else:
	st.error("Please enter some text to convert.")

	# File Uploader
	uploaded_file = st.file_uploader("Upload your text file here", type=['txt'])
	if uploaded_file is not None:
	text = uploaded_file.getvalue().decode("utf-8")
	audio_path = text_to_speech(text)
	audio_file = open(audio_path, 'rb')
	audio_bytes = audio_file.read()
	st.audio(audio_bytes, format='audio/wav')