|
import os, sys |
|
import gradio as gr |
|
import regex as re |
|
import shutil |
|
import datetime |
|
import random |
|
|
|
from core import ( |
|
run_infer_script, |
|
run_batch_infer_script, |
|
) |
|
|
|
from assets.i18n.i18n import I18nAuto |
|
|
|
i18n = I18nAuto() |
|
|
|
now_dir = os.getcwd() |
|
sys.path.append(now_dir) |
|
|
|
model_root = os.path.join(now_dir, "logs") |
|
audio_root = os.path.join(now_dir, "assets", "audios") |
|
sup_audioext = { |
|
"wav", |
|
"mp3", |
|
"flac", |
|
"ogg", |
|
"opus", |
|
"m4a", |
|
"mp4", |
|
"aac", |
|
"alac", |
|
"wma", |
|
"aiff", |
|
"webm", |
|
"ac3", |
|
} |
|
|
|
names = [ |
|
os.path.join(root, file) |
|
for root, _, files in os.walk(model_root, topdown=False) |
|
for file in files |
|
if file.endswith((".pth", ".onnx")) |
|
] |
|
|
|
indexes_list = [ |
|
os.path.join(root, name) |
|
for root, _, files in os.walk(model_root, topdown=False) |
|
for name in files |
|
if name.endswith(".index") and "trained" not in name |
|
] |
|
|
|
audio_paths = [ |
|
os.path.join(root, name) |
|
for root, _, files in os.walk(audio_root, topdown=False) |
|
for name in files |
|
if name.endswith(tuple(sup_audioext)) |
|
and root == audio_root |
|
and "_output" not in name |
|
] |
|
|
|
|
|
def output_path_fn(input_audio_path): |
|
original_name_without_extension = os.path.basename(input_audio_path).rsplit(".", 1)[ |
|
0 |
|
] |
|
new_name = original_name_without_extension + "_output.wav" |
|
output_path = os.path.join(os.path.dirname(input_audio_path), new_name) |
|
return output_path |
|
|
|
|
|
def change_choices(): |
|
names = [ |
|
os.path.join(root, file) |
|
for root, _, files in os.walk(model_root, topdown=False) |
|
for file in files |
|
if file.endswith((".pth", ".onnx")) |
|
] |
|
|
|
indexes_list = [ |
|
os.path.join(root, name) |
|
for root, _, files in os.walk(model_root, topdown=False) |
|
for name in files |
|
if name.endswith(".index") and "trained" not in name |
|
] |
|
|
|
audio_paths = [ |
|
os.path.join(root, name) |
|
for root, _, files in os.walk(audio_root, topdown=False) |
|
for name in files |
|
if name.endswith(tuple(sup_audioext)) |
|
and root == audio_root |
|
and "_output" not in name |
|
] |
|
|
|
return ( |
|
{"choices": sorted(names), "__type__": "update"}, |
|
{"choices": sorted(indexes_list), "__type__": "update"}, |
|
{"choices": sorted(audio_paths), "__type__": "update"}, |
|
) |
|
|
|
|
|
def get_indexes(): |
|
indexes_list = [ |
|
os.path.join(dirpath, filename) |
|
for dirpath, _, filenames in os.walk(model_root) |
|
for filename in filenames |
|
if filename.endswith(".index") and "trained" not in filename |
|
] |
|
|
|
return indexes_list if indexes_list else "" |
|
|
|
|
|
def match_index(model_file: str) -> tuple: |
|
model_files_trip = re.sub(r"\.pth|\.onnx$", "", model_file) |
|
model_file_name = os.path.split(model_files_trip)[ |
|
-1 |
|
] |
|
|
|
|
|
if re.match(r".+_e\d+_s\d+$", model_file_name): |
|
base_model_name = model_file_name.rsplit("_", 2)[0] |
|
else: |
|
base_model_name = model_file_name |
|
|
|
sid_directory = os.path.join(model_root, base_model_name) |
|
directories_to_search = [sid_directory] if os.path.exists(sid_directory) else [] |
|
directories_to_search.append(model_root) |
|
|
|
matching_index_files = [] |
|
|
|
for directory in directories_to_search: |
|
for filename in os.listdir(directory): |
|
if filename.endswith(".index") and "trained" not in filename: |
|
|
|
name_match = any( |
|
name.lower() in filename.lower() |
|
for name in [model_file_name, base_model_name] |
|
) |
|
|
|
|
|
folder_match = directory == sid_directory |
|
|
|
if name_match or folder_match: |
|
index_path = os.path.join(directory, filename) |
|
if index_path in indexes_list: |
|
matching_index_files.append( |
|
( |
|
index_path, |
|
os.path.getsize(index_path), |
|
" " not in filename, |
|
) |
|
) |
|
|
|
if matching_index_files: |
|
|
|
matching_index_files.sort(key=lambda x: (-x[2], -x[1])) |
|
best_match_index_path = matching_index_files[0][0] |
|
return best_match_index_path |
|
|
|
return "" |
|
|
|
|
|
def save_to_wav(record_button): |
|
if record_button is None: |
|
pass |
|
else: |
|
path_to_file = record_button |
|
new_name = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".wav" |
|
target_path = os.path.join(audio_root, os.path.basename(new_name)) |
|
|
|
shutil.move(path_to_file, target_path) |
|
return target_path, output_path_fn(target_path) |
|
|
|
|
|
def save_to_wav2(upload_audio): |
|
file_path = upload_audio |
|
target_path = os.path.join(audio_root, os.path.basename(file_path)) |
|
|
|
if os.path.exists(target_path): |
|
os.remove(target_path) |
|
|
|
shutil.copy(file_path, target_path) |
|
return target_path, output_path_fn(target_path) |
|
|
|
|
|
def delete_outputs(): |
|
for root, _, files in os.walk(audio_root, topdown=False): |
|
for name in files: |
|
if name.endswith(tuple(sup_audioext)) and name.__contains__("_output"): |
|
os.remove(os.path.join(root, name)) |
|
gr.Info(f"Outputs cleared!") |
|
|
|
|
|
|
|
def inference_tab(): |
|
default_weight = random.choice(names) if names else "" |
|
with gr.Row(): |
|
with gr.Row(): |
|
model_file = gr.Dropdown( |
|
label=i18n("Voice Model"), |
|
choices=sorted(names), |
|
interactive=True, |
|
value=default_weight, |
|
allow_custom_value=True, |
|
) |
|
best_default_index_path = match_index(model_file.value) |
|
index_file = gr.Dropdown( |
|
label=i18n("Index File"), |
|
choices=get_indexes(), |
|
value=best_default_index_path, |
|
interactive=True, |
|
allow_custom_value=True, |
|
) |
|
with gr.Column(): |
|
refresh_button = gr.Button(i18n("Refresh")) |
|
unload_button = gr.Button(i18n("Unload Voice")) |
|
|
|
unload_button.click( |
|
fn=lambda: ({"value": "", "__type__": "update"}), |
|
inputs=[], |
|
outputs=[model_file], |
|
) |
|
|
|
model_file.select( |
|
fn=match_index, |
|
inputs=[model_file], |
|
outputs=[index_file], |
|
) |
|
|
|
|
|
with gr.Tab(i18n("Single")): |
|
with gr.Row(): |
|
with gr.Column(): |
|
upload_audio = gr.Audio( |
|
label=i18n("Upload Audio"), type="filepath", editable=False |
|
) |
|
with gr.Row(): |
|
audio = gr.Dropdown( |
|
label=i18n("Select Audio"), |
|
choices=sorted(audio_paths), |
|
value=audio_paths[0] if audio_paths else "", |
|
interactive=True, |
|
allow_custom_value=True, |
|
) |
|
|
|
with gr.Accordion(i18n("Advanced Settings"), open=False): |
|
with gr.Column(): |
|
clear_outputs = gr.Button( |
|
i18n("Clear Outputs (Deletes all audios in assets/audios)") |
|
) |
|
output_path = gr.Textbox( |
|
label=i18n("Output Path"), |
|
placeholder=i18n("Enter output path"), |
|
value=output_path_fn(audio_paths[0]) |
|
if audio_paths |
|
else os.path.join(now_dir, "assets", "audios", "output.wav"), |
|
interactive=True, |
|
) |
|
split_audio = gr.Checkbox( |
|
label=i18n("Split Audio"), |
|
visible=True, |
|
value=False, |
|
interactive=True, |
|
) |
|
pitch = gr.Slider(-12, 12, 0, label=i18n("Pitch")) |
|
filter_radius = gr.Slider( |
|
minimum=0, |
|
maximum=7, |
|
label=i18n( |
|
"If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness" |
|
), |
|
value=3, |
|
step=1, |
|
interactive=True, |
|
) |
|
index_rate = gr.Slider( |
|
minimum=0, |
|
maximum=1, |
|
label=i18n("Search Feature Ratio"), |
|
value=0.75, |
|
interactive=True, |
|
) |
|
hop_length = gr.Slider( |
|
minimum=1, |
|
maximum=512, |
|
step=1, |
|
label=i18n("Hop Length"), |
|
value=128, |
|
interactive=True, |
|
) |
|
with gr.Column(): |
|
f0method = gr.Radio( |
|
label=i18n("Pitch extraction algorithm"), |
|
choices=[ |
|
"pm", |
|
"harvest", |
|
"dio", |
|
"crepe", |
|
"crepe-tiny", |
|
"rmvpe", |
|
], |
|
value="rmvpe", |
|
interactive=True, |
|
) |
|
|
|
convert_button1 = gr.Button(i18n("Convert")) |
|
|
|
with gr.Row(): |
|
vc_output1 = gr.Textbox(label=i18n("Output Information")) |
|
vc_output2 = gr.Audio(label=i18n("Export Audio")) |
|
|
|
|
|
with gr.Tab(i18n("Batch")): |
|
with gr.Row(): |
|
with gr.Column(): |
|
input_folder_batch = gr.Textbox( |
|
label=i18n("Input Folder"), |
|
placeholder=i18n("Enter input path"), |
|
value=os.path.join(now_dir, "assets", "audios"), |
|
interactive=True, |
|
) |
|
output_folder_batch = gr.Textbox( |
|
label=i18n("Output Folder"), |
|
placeholder=i18n("Enter output path"), |
|
value=os.path.join(now_dir, "assets", "audios"), |
|
interactive=True, |
|
) |
|
with gr.Accordion(i18n("Advanced Settings"), open=False): |
|
with gr.Column(): |
|
clear_outputs = gr.Button( |
|
i18n("Clear Outputs (Deletes all audios in assets/audios)") |
|
) |
|
pitch_batch = gr.Slider(-12, 12, 0, label=i18n("Pitch")) |
|
filter_radius_batch = gr.Slider( |
|
minimum=0, |
|
maximum=7, |
|
label=i18n( |
|
"If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness" |
|
), |
|
value=3, |
|
step=1, |
|
interactive=True, |
|
) |
|
index_rate_batch = gr.Slider( |
|
minimum=0, |
|
maximum=1, |
|
label=i18n("Search Feature Ratio"), |
|
value=0.75, |
|
interactive=True, |
|
) |
|
hop_length_batch = gr.Slider( |
|
minimum=1, |
|
maximum=512, |
|
step=1, |
|
label=i18n("Hop Length"), |
|
value=128, |
|
interactive=True, |
|
) |
|
with gr.Column(): |
|
f0method_batch = gr.Radio( |
|
label=i18n("Pitch extraction algorithm"), |
|
choices=[ |
|
"pm", |
|
"harvest", |
|
"dio", |
|
"crepe", |
|
"crepe-tiny", |
|
"rmvpe", |
|
], |
|
value="rmvpe", |
|
interactive=True, |
|
) |
|
|
|
convert_button2 = gr.Button(i18n("Convert")) |
|
|
|
with gr.Row(): |
|
vc_output3 = gr.Textbox(label=i18n("Output Information")) |
|
|
|
def toggle_visible(checkbox): |
|
return {"visible": checkbox, "__type__": "update"} |
|
|
|
refresh_button.click( |
|
fn=change_choices, |
|
inputs=[], |
|
outputs=[model_file, index_file, audio], |
|
) |
|
audio.change( |
|
fn=output_path_fn, |
|
inputs=[audio], |
|
outputs=[output_path], |
|
) |
|
upload_audio.upload( |
|
fn=save_to_wav2, |
|
inputs=[upload_audio], |
|
outputs=[audio, output_path], |
|
) |
|
upload_audio.stop_recording( |
|
fn=save_to_wav, |
|
inputs=[upload_audio], |
|
outputs=[audio, output_path], |
|
) |
|
clear_outputs.click( |
|
fn=delete_outputs, |
|
inputs=[], |
|
outputs=[], |
|
) |
|
convert_button1.click( |
|
fn=run_infer_script, |
|
inputs=[ |
|
pitch, |
|
filter_radius, |
|
index_rate, |
|
hop_length, |
|
f0method, |
|
audio, |
|
output_path, |
|
model_file, |
|
index_file, |
|
split_audio, |
|
], |
|
outputs=[vc_output1, vc_output2], |
|
) |
|
convert_button2.click( |
|
fn=run_batch_infer_script, |
|
inputs=[ |
|
pitch_batch, |
|
filter_radius_batch, |
|
index_rate_batch, |
|
hop_length_batch, |
|
f0method_batch, |
|
input_folder_batch, |
|
output_folder_batch, |
|
model_file, |
|
index_file, |
|
], |
|
outputs=[vc_output3], |
|
) |
|
|