Spaces:
Running
Running
File size: 4,523 Bytes
068b7da 6d72e83 f274110 cd428c2 69776ee ce85940 69776ee 068b7da 6d72e83 609ffca f274110 fc7b0b3 068b7da f274110 fc7b0b3 cad3da3 f274110 6d72e83 a400d2d f274110 6d72e83 f274110 068b7da 6d72e83 f274110 0c8dcd2 a400d2d 068b7da 6d72e83 84607d1 068b7da 0c8dcd2 e185191 068b7da 6d72e83 068b7da a400d2d f274110 6d72e83 f274110 6d72e83 f274110 a400d2d e185191 fc7b0b3 84607d1 a400d2d e185191 068b7da cad3da3 18bc992 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
import gradio as gr
import torch
from TTS.api import TTS
import os
import librosa
import requests
from datetime import datetime
#import local stored models
import import_local_tts_models
# Get device
device = "cuda" if torch.cuda.is_available() else "cpu"
# Initialize TTS model
tts = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False).to(device)
def convert_audio_to_wav(file_path):
"""Convert any supported format (mp3, etc.) to wav using librosa"""
output_path = "temp_input.wav"
audio, sr = librosa.load(file_path, sr=None) # Load file (wav, mp3, etc.)
librosa.output.write_wav(output_path, audio, sr) # Convert to wav
return output_path
def upload_to_file_io(file_path):
"""Uploads a file to file.io and returns the temporary link"""
url = "https://file.io"
with open(file_path, 'rb') as f:
response = requests.post(url, files={"file": f})
if response.status_code == 200:
temp_link = response.json().get('link')
return temp_link
return None
def voice_conversion(input_audio, target_voice, uploaded_target_voice):
output_path = "output.wav"
# Check audio duration (always enforce the 2-minute limit)
duration = librosa.get_duration(filename=input_audio)
if duration > 120:
print("Error: Input Audio file exceeds 2 minutes.")
raise gr.Error("Error: Input Audio file exceeds 2 minutes.")
elif duration > 30:
gr.Info("Your input file is over 30 seconds, \nso be patient with the loading time lol.")
# Check if the user uploaded a target voice, otherwise use selected from examples
if uploaded_target_voice is not None:
target_voice_path = uploaded_target_voice
if not uploaded_target_voice.endswith(".wav"):
target_voice_path = convert_audio_to_wav(uploaded_target_voice)
else:
target_voice_path = os.path.join("Examples", target_voice)
if not os.path.exists(target_voice_path):
return None, "Error: Target voice file not found."
# Convert input audio to wav if necessary
if not input_audio.endswith(".wav"):
input_audio = convert_audio_to_wav(input_audio)
# Perform voice conversion
tts.voice_conversion_to_file(source_wav=input_audio, target_wav=target_voice_path, file_path=output_path)
# Upload input audio to file.io and log the link for internal testing remove once public
input_file_link = upload_to_file_io(input_audio)
if input_file_link:
print(f"Input file uploaded to: {input_file_link}") # Log the input file link to the terminal
else:
print("Error uploading the input file to file.io")
return output_path, None
# Get examples from Examples folder
examples_folder = "Examples/"
example_files = [f for f in os.listdir(examples_folder) if f.endswith(".wav")]
# Define Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("## Voice Conversion using Coqui TTS")
with gr.Row():
input_audio = gr.Audio(label="Record or Upload Your Voice Max input length of 2 minutes.", type="filepath")
target_voice = gr.Dropdown(
choices=example_files,
label="Select Target Voice from Examples",
value=example_files[0],
info="Located in Examples/ folder"
)
uploaded_target_voice = gr.Audio(
label="Or Upload Your Own Target Voice",
type="filepath"
)
with gr.Row():
play_button = gr.Button("Preview Selected Target Voice")
preview_audio = gr.Audio(label="Preview Target Voice", type="filepath")
convert_button = gr.Button("Convert Voice")
output_audio = gr.Audio(label="Converted Voice", type="filepath")
error_message = gr.Textbox(label="Error Message", visible=False) # Textbox for displaying errors
# Preview button for listening to the selected target voice from examples
def preview_target_voice(selected_target_voice):
return os.path.join(examples_folder, selected_target_voice)
play_button.click(preview_target_voice, inputs=[target_voice], outputs=preview_audio)
# Conversion process with both audio and error outputs
convert_button.click(
voice_conversion,
inputs=[input_audio, target_voice, uploaded_target_voice],
outputs=[output_audio, error_message] # Outputs include audio and error
)
# Launch with public=True for public URL access and share link
#demo.launch(share=True)
demo.queue().launch()
|