Spaces:
Sleeping
Sleeping
import os, sys | |
import gradio as gr | |
import regex as re | |
import json | |
import shutil | |
import datetime | |
import random | |
from core import ( | |
run_tts_script, | |
) | |
from assets.i18n.i18n import I18nAuto | |
i18n = I18nAuto() | |
now_dir = os.getcwd() | |
sys.path.append(now_dir) | |
model_root = os.path.join(now_dir, "logs") | |
audio_root = os.path.join(now_dir, "assets", "audios") | |
model_root_relative = os.path.relpath(model_root, now_dir) | |
audio_root_relative = os.path.relpath(audio_root, now_dir) | |
sup_audioext = { | |
"wav", | |
"mp3", | |
"flac", | |
"ogg", | |
"opus", | |
"m4a", | |
"mp4", | |
"aac", | |
"alac", | |
"wma", | |
"aiff", | |
"webm", | |
"ac3", | |
} | |
names = [ | |
os.path.join(root, file) | |
for root, _, files in os.walk(model_root_relative, topdown=False) | |
for file in files | |
if ( | |
file.endswith((".pth", ".onnx")) | |
and not (file.startswith("G_") or file.startswith("D_")) | |
) | |
] | |
indexes_list = [ | |
os.path.join(root, name) | |
for root, _, files in os.walk(model_root_relative, topdown=False) | |
for name in files | |
if name.endswith(".index") and "trained" not in name | |
] | |
audio_paths = [ | |
os.path.join(root, name) | |
for root, _, files in os.walk(audio_root_relative, topdown=False) | |
for name in files | |
if name.endswith(tuple(sup_audioext)) | |
and root == audio_root_relative | |
and "_output" not in name | |
] | |
def change_choices(): | |
names = [ | |
os.path.join(root, file) | |
for root, _, files in os.walk(model_root_relative, topdown=False) | |
for file in files | |
if ( | |
file.endswith((".pth", ".onnx")) | |
and not (file.startswith("G_") or file.startswith("D_")) | |
) | |
] | |
indexes_list = [ | |
os.path.join(root, name) | |
for root, _, files in os.walk(model_root_relative, topdown=False) | |
for name in files | |
if name.endswith(".index") and "trained" not in name | |
] | |
audio_paths = [ | |
os.path.join(root, name) | |
for root, _, files in os.walk(audio_root_relative, topdown=False) | |
for name in files | |
if name.endswith(tuple(sup_audioext)) | |
and root == audio_root_relative | |
and "_output" not in name | |
] | |
return ( | |
{"choices": sorted(names), "__type__": "update"}, | |
{"choices": sorted(indexes_list), "__type__": "update"}, | |
{"choices": sorted(audio_paths), "__type__": "update"}, | |
) | |
def get_indexes(): | |
indexes_list = [ | |
os.path.join(dirpath, filename) | |
for dirpath, _, filenames in os.walk(model_root_relative) | |
for filename in filenames | |
if filename.endswith(".index") and "trained" not in filename | |
] | |
return indexes_list if indexes_list else "" | |
def match_index(model_file: str) -> tuple: | |
model_files_trip = re.sub(r"\.pth|\.onnx$", "", model_file) | |
model_file_name = os.path.split(model_files_trip)[ | |
-1 | |
] # Extract only the name, not the directory | |
# Check if the sid0strip has the specific ending format _eXXX_sXXX | |
if re.match(r".+_e\d+_s\d+$", model_file_name): | |
base_model_name = model_file_name.rsplit("_", 2)[0] | |
else: | |
base_model_name = model_file_name | |
sid_directory = os.path.join(model_root_relative, base_model_name) | |
directories_to_search = [sid_directory] if os.path.exists(sid_directory) else [] | |
directories_to_search.append(model_root_relative) | |
matching_index_files = [] | |
for directory in directories_to_search: | |
for filename in os.listdir(directory): | |
if filename.endswith(".index") and "trained" not in filename: | |
# Condition to match the name | |
name_match = any( | |
name.lower() in filename.lower() | |
for name in [model_file_name, base_model_name] | |
) | |
# If in the specific directory, it's automatically a match | |
folder_match = directory == sid_directory | |
if name_match or folder_match: | |
index_path = os.path.join(directory, filename) | |
if index_path in indexes_list: | |
matching_index_files.append( | |
( | |
index_path, | |
os.path.getsize(index_path), | |
" " not in filename, | |
) | |
) | |
if matching_index_files: | |
# Sort by favoring files without spaces and by size (largest size first) | |
matching_index_files.sort(key=lambda x: (-x[2], -x[1])) | |
best_match_index_path = matching_index_files[0][0] | |
return best_match_index_path | |
return "" | |
def save_to_wav(record_button): | |
if record_button is None: | |
pass | |
else: | |
path_to_file = record_button | |
new_name = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".wav" | |
target_path = os.path.join(audio_root_relative, os.path.basename(new_name)) | |
shutil.move(path_to_file, target_path) | |
return target_path | |
def save_to_wav2(upload_audio): | |
file_path = upload_audio | |
target_path = os.path.join(audio_root_relative, os.path.basename(file_path)) | |
if os.path.exists(target_path): | |
os.remove(target_path) | |
shutil.copy(file_path, target_path) | |
return target_path | |
def delete_outputs(): | |
for root, _, files in os.walk(audio_root_relative, topdown=False): | |
for name in files: | |
if name.endswith(tuple(sup_audioext)) and name.__contains__("_output"): | |
os.remove(os.path.join(root, name)) | |
gr.Info(f"Outputs cleared!") | |
def tts_tab(): | |
default_weight = random.choice(names) if names else "" | |
with gr.Row(): | |
with gr.Row(): | |
model_file = gr.Dropdown( | |
label=i18n("Voice Model"), | |
choices=sorted(names, key=lambda path: os.path.getsize(path)), | |
interactive=True, | |
value=default_weight, | |
allow_custom_value=True, | |
) | |
best_default_index_path = match_index(model_file.value) | |
index_file = gr.Dropdown( | |
label=i18n("Index File"), | |
choices=get_indexes(), | |
value=best_default_index_path, | |
interactive=True, | |
allow_custom_value=True, | |
) | |
with gr.Column(): | |
refresh_button = gr.Button(i18n("Refresh")) | |
unload_button = gr.Button(i18n("Unload Voice")) | |
unload_button.click( | |
fn=lambda: ({"value": "", "__type__": "update"}), | |
inputs=[], | |
outputs=[model_file], | |
) | |
model_file.select( | |
fn=match_index, | |
inputs=[model_file], | |
outputs=[index_file], | |
) | |
json_path = os.path.join("rvc", "lib", "tools", "tts_voices.json") | |
with open(json_path, "r") as file: | |
tts_voices_data = json.load(file) | |
short_names = [voice.get("ShortName", "") for voice in tts_voices_data] | |
tts_voice = gr.Dropdown( | |
label=i18n("TTS Voices"), | |
choices=short_names, | |
interactive=True, | |
value=None, | |
) | |
tts_text = gr.Textbox( | |
label=i18n("Text to Synthesize"), | |
placeholder=i18n("Enter text to synthesize"), | |
lines=3, | |
) | |
with gr.Accordion(i18n("Advanced Settings"), open=False): | |
with gr.Column(): | |
output_tts_path = gr.Textbox( | |
label=i18n("Output Path for TTS Audio"), | |
placeholder=i18n("Enter output path"), | |
value=os.path.join(now_dir, "assets", "audios", "tts_output.wav"), | |
interactive=True, | |
) | |
output_rvc_path = gr.Textbox( | |
label=i18n("Output Path for RVC Audio"), | |
placeholder=i18n("Enter output path"), | |
value=os.path.join(now_dir, "assets", "audios", "tts_rvc_output.wav"), | |
interactive=True, | |
) | |
pitch = gr.Slider( | |
minimum=-24, | |
maximum=24, | |
step=1, | |
label=i18n("Pitch"), | |
value=0, | |
interactive=True, | |
) | |
filter_radius = gr.Slider( | |
minimum=0, | |
maximum=7, | |
label=i18n( | |
"If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness" | |
), | |
value=3, | |
step=1, | |
interactive=True, | |
) | |
index_rate = gr.Slider( | |
minimum=0, | |
maximum=1, | |
label=i18n("Search Feature Ratio"), | |
value=0.75, | |
interactive=True, | |
) | |
hop_length = gr.Slider( | |
minimum=1, | |
maximum=512, | |
step=1, | |
label=i18n("Hop Length"), | |
value=128, | |
interactive=True, | |
) | |
with gr.Column(): | |
f0method = gr.Radio( | |
label=i18n("Pitch extraction algorithm"), | |
choices=[ | |
"pm", | |
"harvest", | |
"dio", | |
"crepe", | |
"crepe-tiny", | |
"rmvpe", | |
], | |
value="rmvpe", | |
interactive=True, | |
) | |
convert_button1 = gr.Button(i18n("Convert")) | |
with gr.Row(): # Defines output info + output audio download after conversion | |
vc_output1 = gr.Textbox(label=i18n("Output Information")) | |
vc_output2 = gr.Audio(label=i18n("Export Audio")) | |
refresh_button.click( | |
fn=change_choices, | |
inputs=[], | |
outputs=[model_file, index_file], | |
) | |
convert_button1.click( | |
fn=run_tts_script, | |
inputs=[ | |
tts_text, | |
tts_voice, | |
pitch, | |
filter_radius, | |
index_rate, | |
hop_length, | |
f0method, | |
output_tts_path, | |
output_rvc_path, | |
model_file, | |
index_file, | |
], | |
outputs=[vc_output1, vc_output2], | |
) | |