clone / app.py
nikkmitra's picture
Update app.py
7e4b5db verified
raw
history blame
3.53 kB
import gradio as gr
import torch
from TTS.api import TTS
import os
import spaces
import tempfile
os.environ["COQUI_TOS_AGREED"] = "1"
device = "cuda" if torch.cuda.is_available() else "cpu"
# Initialize TTS model
def load_tts_model():
return TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
tts = load_tts_model()
# Celebrity voices (example list, you may want to expand or modify this)
celebrity_voices = {
"morgan": "./voices/morgan.mp3",
"Scarlett Johansson": "path/to/scarlett_johansson_sample.wav",
"David Attenborough": "path/to/david_attenborough_sample.wav",
}
@spaces.GPU(duration=120)
def tts_generate(text, voice, language):
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
temp_audio_path = temp_audio.name
tts.tts_to_file(
text=text,
speaker_wav=celebrity_voices[voice],
language=language,
file_path=temp_audio_path
)
return temp_audio_path
@spaces.GPU(enable_queue=True)
def clone_voice(text, audio_file, language):
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
temp_audio_path = temp_audio.name
tts.tts_to_file(
text=text,
speaker_wav=audio_file,
language=language,
file_path=temp_audio_path
)
return temp_audio_path
# Placeholder function for Talking Image tab
def talking_image_placeholder():
return "Talking Image functionality not implemented yet."
# Define Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# Advanced Voice Synthesis")
with gr.Tabs():
with gr.TabItem("TTS"):
with gr.Row():
tts_text = gr.Textbox(label="Text to speak")
tts_voice = gr.Dropdown(choices=list(celebrity_voices.keys()), label="Celebrity Voice")
tts_language = gr.Dropdown(["en", "es", "fr", "de", "it","ar"], label="Language", value="en")
tts_generate_btn = gr.Button("Generate")
tts_output = gr.Audio(label="Generated Audio")
tts_generate_btn.click(
tts_generate,
inputs=[tts_text, tts_voice, tts_language],
outputs=tts_output
)
with gr.TabItem("Talking Image"):
gr.Markdown("Talking Image functionality coming soon!")
with gr.TabItem("Clone Voice"):
with gr.Row():
clone_text = gr.Textbox(label="Text to speak")
clone_audio = gr.Audio(label="Voice reference audio file", type="filepath")
clone_language = gr.Dropdown(["en", "es", "fr", "de", "it","ar"], label="Language", value="en")
clone_generate_btn = gr.Button("Generate")
clone_output = gr.Audio(label="Generated Audio")
clone_generate_btn.click(
clone_voice,
inputs=[clone_text, clone_audio, clone_language],
outputs=clone_output
)
js_func = """
function refresh() {
const url = new URL(window.location);
if (url.searchParams.get('__theme') !== 'dark') {
url.searchParams.set('__theme', 'dark');
window.location.href = url.href;
}
}
"""
# Launch the interface
# with gr.Blocks(js=js_func) as demo:
demo.launch()
# Clean up temporary files (this will run after the Gradio server is closed)
for file in os.listdir():
if file.endswith('.wav') and file.startswith('tmp'):
os.remove(file)