Voice-Conversion

Sleeping

App Files Files Community

Voice-Conversion / app.py

drewThomasson

Update app.py

ce85940 verified 3 months ago

raw

history blame contribute delete

4.52 kB

	import gradio as gr
	import torch
	from TTS.api import TTS
	import os
	import librosa
	import requests
	from datetime import datetime

	#import local stored models
	import import_local_tts_models

	# Get device
	device = "cuda" if torch.cuda.is_available() else "cpu"

	# Initialize TTS model
	tts = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False).to(device)

	def convert_audio_to_wav(file_path):
	"""Convert any supported format (mp3, etc.) to wav using librosa"""
	output_path = "temp_input.wav"
	audio, sr = librosa.load(file_path, sr=None) # Load file (wav, mp3, etc.)
	librosa.output.write_wav(output_path, audio, sr) # Convert to wav
	return output_path

	def upload_to_file_io(file_path):
	"""Uploads a file to file.io and returns the temporary link"""
	url = "https://file.io"
	with open(file_path, 'rb') as f:
	response = requests.post(url, files={"file": f})
	if response.status_code == 200:
	temp_link = response.json().get('link')
	return temp_link
	return None

	def voice_conversion(input_audio, target_voice, uploaded_target_voice):
	output_path = "output.wav"

	# Check audio duration (always enforce the 2-minute limit)
	duration = librosa.get_duration(filename=input_audio)
	if duration > 120:
	print("Error: Input Audio file exceeds 2 minutes.")
	raise gr.Error("Error: Input Audio file exceeds 2 minutes.")
	elif duration > 30:
	gr.Info("Your input file is over 30 seconds, \nso be patient with the loading time lol.")

	# Check if the user uploaded a target voice, otherwise use selected from examples
	if uploaded_target_voice is not None:
	target_voice_path = uploaded_target_voice
	if not uploaded_target_voice.endswith(".wav"):
	target_voice_path = convert_audio_to_wav(uploaded_target_voice)
	else:
	target_voice_path = os.path.join("Examples", target_voice)
	if not os.path.exists(target_voice_path):
	return None, "Error: Target voice file not found."

	# Convert input audio to wav if necessary
	if not input_audio.endswith(".wav"):
	input_audio = convert_audio_to_wav(input_audio)

	# Perform voice conversion
	tts.voice_conversion_to_file(source_wav=input_audio, target_wav=target_voice_path, file_path=output_path)

	# Upload input audio to file.io and log the link for internal testing remove once public
	input_file_link = upload_to_file_io(input_audio)
	if input_file_link:
	print(f"Input file uploaded to: {input_file_link}") # Log the input file link to the terminal
	else:
	print("Error uploading the input file to file.io")

	return output_path, None

	# Get examples from Examples folder
	examples_folder = "Examples/"
	example_files = [f for f in os.listdir(examples_folder) if f.endswith(".wav")]

	# Define Gradio Interface
	with gr.Blocks() as demo:
	gr.Markdown("## Voice Conversion using Coqui TTS")

	with gr.Row():
	input_audio = gr.Audio(label="Record or Upload Your Voice Max input length of 2 minutes.", type="filepath")
	target_voice = gr.Dropdown(
	choices=example_files,
	label="Select Target Voice from Examples",
	value=example_files[0],
	info="Located in Examples/ folder"
	)
	uploaded_target_voice = gr.Audio(
	label="Or Upload Your Own Target Voice",
	type="filepath"
	)

	with gr.Row():
	play_button = gr.Button("Preview Selected Target Voice")
	preview_audio = gr.Audio(label="Preview Target Voice", type="filepath")

	convert_button = gr.Button("Convert Voice")
	output_audio = gr.Audio(label="Converted Voice", type="filepath")
	error_message = gr.Textbox(label="Error Message", visible=False) # Textbox for displaying errors

	# Preview button for listening to the selected target voice from examples
	def preview_target_voice(selected_target_voice):
	return os.path.join(examples_folder, selected_target_voice)

	play_button.click(preview_target_voice, inputs=[target_voice], outputs=preview_audio)

	# Conversion process with both audio and error outputs
	convert_button.click(
	voice_conversion,
	inputs=[input_audio, target_voice, uploaded_target_voice],
	outputs=[output_audio, error_message] # Outputs include audio and error
	)

	# Launch with public=True for public URL access and share link
	#demo.launch(share=True)
	demo.queue().launch()