Spaces:

Abhaykoul
/

Voice-clone

Running on Zero

File size: 2,576 Bytes

53bc3ba
2a72edc
 
 
01dfdfb
2a72edc
53bc3ba
 
2a72edc
db87855
53bc3ba
2a72edc
db87855
 
 
 
01dfdfb
2a72edc
53bc3ba
 
 
 
db87855
53bc3ba
 
 
 
 
db87855
53bc3ba
 
 
 
 
01dfdfb
53bc3ba
 
 
01dfdfb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53bc3ba

import os
import gradio as gr
import torch
from TTS.api import TTS
import spaces  # Ensure this is the correct module for your environment

# Agree to Coqui TOS
os.environ["COQUI_TOS_AGREED"] = "1"

# Determine the device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Initialize TTS model without the 'device' parameter
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
tts.to(device)

@spaces.GPU()  # Removed enable_queue=True since it's now always True
def clone(text, audio):
    output_path = "./output.wav"
    tts.tts_to_file(text=text, speaker_wav=audio, language="en", file_path=output_path)
    return output_path

# Define the UI using Gradio Blocks with enhanced styling
with gr.Blocks(title="Advanced Voice Clone", theme=gr.themes.Soft(primary_hue="teal")) as demo:
    gr.Markdown(
        """
        # 🎤 Voice Clone

        **by Vortex**

        This application uses the **xtts_v2** model for voice cloning.  
        *Non-commercial use only.*

        [Coqui Public Model License](https://coqui.ai/cpml) | 
 
        ---
        """
    )
    
    # Inject custom CSS using Markdown and <style> tags
    gr.Markdown(
        """
        <style>
        #audio_upload > label {
            background-color: #14b8a6;
            color: white;
            padding: 10px;
            border-radius: 5px;
            cursor: pointer;
        }
        #audio_upload > label:hover {
            background-color: #0d9488;
        }
        </style>
        """
    )

    with gr.Row():
        with gr.Column(scale=1):
            text_input = gr.Textbox(
                label="Enter Text",
                placeholder="Type the text you want to clone...",
                lines=4
            )
            audio_input = gr.Audio(
                type="filepath",
                label="Upload Reference Voice",
                elem_id="audio_upload"
            )
            clone_button = gr.Button("Clone Voice", variant="primary")
        
        with gr.Column(scale=1):
            output_audio = gr.Audio(
                type="filepath",
                label="Cloned Voice Output",
                interactive=False
            )

    gr.Markdown(
        """
        ---
        ❤️ If you find this tool useful, please consider giving it a thumbs up!
        """
    )

    # Connect the button to the function
    clone_button.click(
        clone,
        inputs=[text_input, audio_input],
        outputs=output_audio,
        queue=True
    )

# Launch the app
demo.launch(server_name="0.0.0.0", server_port=7860)