Spaces:

TIMBOVILL
/

Applio-V3-HF

Running

App Files Files Community

Applio-V3-HF / tabs /tts /tts.py

PlayerBPlaytime

Upload 125 files

16de183 verified about 1 year ago

raw

history blame

10.2 kB

	import os, sys
	import gradio as gr
	import regex as re
	import json
	import shutil
	import datetime
	import random

	from core import (
	run_tts_script,
	)

	from assets.i18n.i18n import I18nAuto

	i18n = I18nAuto()

	now_dir = os.getcwd()
	sys.path.append(now_dir)

	model_root = os.path.join(now_dir, "logs")
	audio_root = os.path.join(now_dir, "assets", "audios")

	model_root_relative = os.path.relpath(model_root, now_dir)
	audio_root_relative = os.path.relpath(audio_root, now_dir)

	sup_audioext = {
	"wav",
	"mp3",
	"flac",
	"ogg",
	"opus",
	"m4a",
	"mp4",
	"aac",
	"alac",
	"wma",
	"aiff",
	"webm",
	"ac3",
	}

	names = [
	os.path.join(root, file)
	for root, _, files in os.walk(model_root_relative, topdown=False)
	for file in files
	if (
	file.endswith((".pth", ".onnx"))
	and not (file.startswith("G_") or file.startswith("D_"))
	)
	]

	indexes_list = [
	os.path.join(root, name)
	for root, _, files in os.walk(model_root_relative, topdown=False)
	for name in files
	if name.endswith(".index") and "trained" not in name
	]

	audio_paths = [
	os.path.join(root, name)
	for root, _, files in os.walk(audio_root_relative, topdown=False)
	for name in files
	if name.endswith(tuple(sup_audioext))
	and root == audio_root_relative
	and "_output" not in name
	]


	def change_choices():
	names = [
	os.path.join(root, file)
	for root, _, files in os.walk(model_root_relative, topdown=False)
	for file in files
	if (
	file.endswith((".pth", ".onnx"))
	and not (file.startswith("G_") or file.startswith("D_"))
	)
	]

	indexes_list = [
	os.path.join(root, name)
	for root, _, files in os.walk(model_root_relative, topdown=False)
	for name in files
	if name.endswith(".index") and "trained" not in name
	]

	audio_paths = [
	os.path.join(root, name)
	for root, _, files in os.walk(audio_root_relative, topdown=False)
	for name in files
	if name.endswith(tuple(sup_audioext))
	and root == audio_root_relative
	and "_output" not in name
	]
	return (
	{"choices": sorted(names), "__type__": "update"},
	{"choices": sorted(indexes_list), "__type__": "update"},
	{"choices": sorted(audio_paths), "__type__": "update"},
	)


	def get_indexes():
	indexes_list = [
	os.path.join(dirpath, filename)
	for dirpath, _, filenames in os.walk(model_root_relative)
	for filename in filenames
	if filename.endswith(".index") and "trained" not in filename
	]

	return indexes_list if indexes_list else ""


	def match_index(model_file: str) -> tuple:
	model_files_trip = re.sub(r"\.pth\|\.onnx$", "", model_file)
	model_file_name = os.path.split(model_files_trip)[
	-1
	] # Extract only the name, not the directory

	# Check if the sid0strip has the specific ending format _eXXX_sXXX
	if re.match(r".+_e\d+_s\d+$", model_file_name):
	base_model_name = model_file_name.rsplit("_", 2)[0]
	else:
	base_model_name = model_file_name

	sid_directory = os.path.join(model_root_relative, base_model_name)
	directories_to_search = [sid_directory] if os.path.exists(sid_directory) else []
	directories_to_search.append(model_root_relative)

	matching_index_files = []

	for directory in directories_to_search:
	for filename in os.listdir(directory):
	if filename.endswith(".index") and "trained" not in filename:
	# Condition to match the name
	name_match = any(
	name.lower() in filename.lower()
	for name in [model_file_name, base_model_name]
	)

	# If in the specific directory, it's automatically a match
	folder_match = directory == sid_directory

	if name_match or folder_match:
	index_path = os.path.join(directory, filename)
	if index_path in indexes_list:
	matching_index_files.append(
	(
	index_path,
	os.path.getsize(index_path),
	" " not in filename,
	)
	)

	if matching_index_files:
	# Sort by favoring files without spaces and by size (largest size first)
	matching_index_files.sort(key=lambda x: (-x[2], -x[1]))
	best_match_index_path = matching_index_files[0][0]
	return best_match_index_path

	return ""


	def save_to_wav(record_button):
	if record_button is None:
	pass
	else:
	path_to_file = record_button
	new_name = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".wav"
	target_path = os.path.join(audio_root_relative, os.path.basename(new_name))

	shutil.move(path_to_file, target_path)
	return target_path


	def save_to_wav2(upload_audio):
	file_path = upload_audio
	target_path = os.path.join(audio_root_relative, os.path.basename(file_path))

	if os.path.exists(target_path):
	os.remove(target_path)

	shutil.copy(file_path, target_path)
	return target_path


	def delete_outputs():
	for root, _, files in os.walk(audio_root_relative, topdown=False):
	for name in files:
	if name.endswith(tuple(sup_audioext)) and name.__contains__("_output"):
	os.remove(os.path.join(root, name))
	gr.Info(f"Outputs cleared!")


	def tts_tab():
	default_weight = random.choice(names) if names else ""
	with gr.Row():
	with gr.Row():
	model_file = gr.Dropdown(
	label=i18n("Voice Model"),
	choices=sorted(names, key=lambda path: os.path.getsize(path)),
	interactive=True,
	value=default_weight,
	allow_custom_value=True,
	)
	best_default_index_path = match_index(model_file.value)
	index_file = gr.Dropdown(
	label=i18n("Index File"),
	choices=get_indexes(),
	value=best_default_index_path,
	interactive=True,
	allow_custom_value=True,
	)
	with gr.Column():
	refresh_button = gr.Button(i18n("Refresh"))
	unload_button = gr.Button(i18n("Unload Voice"))

	unload_button.click(
	fn=lambda: ({"value": "", "__type__": "update"}),
	inputs=[],
	outputs=[model_file],
	)

	model_file.select(
	fn=match_index,
	inputs=[model_file],
	outputs=[index_file],
	)

	json_path = os.path.join("rvc", "lib", "tools", "tts_voices.json")
	with open(json_path, "r") as file:
	tts_voices_data = json.load(file)

	short_names = [voice.get("ShortName", "") for voice in tts_voices_data]

	tts_voice = gr.Dropdown(
	label=i18n("TTS Voices"),
	choices=short_names,
	interactive=True,
	value=None,
	)

	tts_text = gr.Textbox(
	label=i18n("Text to Synthesize"),
	placeholder=i18n("Enter text to synthesize"),
	lines=3,
	)

	with gr.Accordion(i18n("Advanced Settings"), open=False):
	with gr.Column():
	output_tts_path = gr.Textbox(
	label=i18n("Output Path for TTS Audio"),
	placeholder=i18n("Enter output path"),
	value=os.path.join(now_dir, "assets", "audios", "tts_output.wav"),
	interactive=True,
	)

	output_rvc_path = gr.Textbox(
	label=i18n("Output Path for RVC Audio"),
	placeholder=i18n("Enter output path"),
	value=os.path.join(now_dir, "assets", "audios", "tts_rvc_output.wav"),
	interactive=True,
	)

	pitch = gr.Slider(
	minimum=-24,
	maximum=24,
	step=1,
	label=i18n("Pitch"),
	value=0,
	interactive=True,
	)
	filter_radius = gr.Slider(
	minimum=0,
	maximum=7,
	label=i18n(
	"If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness"
	),
	value=3,
	step=1,
	interactive=True,
	)
	index_rate = gr.Slider(
	minimum=0,
	maximum=1,
	label=i18n("Search Feature Ratio"),
	value=0.75,
	interactive=True,
	)
	hop_length = gr.Slider(
	minimum=1,
	maximum=512,
	step=1,
	label=i18n("Hop Length"),
	value=128,
	interactive=True,
	)
	with gr.Column():
	f0method = gr.Radio(
	label=i18n("Pitch extraction algorithm"),
	choices=[
	"pm",
	"harvest",
	"dio",
	"crepe",
	"crepe-tiny",
	"rmvpe",
	],
	value="rmvpe",
	interactive=True,
	)

	convert_button1 = gr.Button(i18n("Convert"))

	with gr.Row(): # Defines output info + output audio download after conversion
	vc_output1 = gr.Textbox(label=i18n("Output Information"))
	vc_output2 = gr.Audio(label=i18n("Export Audio"))

	refresh_button.click(
	fn=change_choices,
	inputs=[],
	outputs=[model_file, index_file],
	)
	convert_button1.click(
	fn=run_tts_script,
	inputs=[
	tts_text,
	tts_voice,
	pitch,
	filter_radius,
	index_rate,
	hop_length,
	f0method,
	output_tts_path,
	output_rvc_path,
	model_file,
	index_file,
	],
	outputs=[vc_output1, vc_output2],
	)