t / tabs /inference /inference.py
leey00nsu's picture
t
f11c739
raw
history blame
14 kB
import os, sys
import gradio as gr
import regex as re
import shutil
import datetime
import random
from core import (
run_infer_script,
run_batch_infer_script,
)
from assets.i18n.i18n import I18nAuto
i18n = I18nAuto()
now_dir = os.getcwd()
sys.path.append(now_dir)
model_root = os.path.join(now_dir, "logs")
audio_root = os.path.join(now_dir, "assets", "audios")
sup_audioext = {
"wav",
"mp3",
"flac",
"ogg",
"opus",
"m4a",
"mp4",
"aac",
"alac",
"wma",
"aiff",
"webm",
"ac3",
}
names = [
os.path.join(root, file)
for root, _, files in os.walk(model_root, topdown=False)
for file in files
if file.endswith((".pth", ".onnx"))
]
indexes_list = [
os.path.join(root, name)
for root, _, files in os.walk(model_root, topdown=False)
for name in files
if name.endswith(".index") and "trained" not in name
]
audio_paths = [
os.path.join(root, name)
for root, _, files in os.walk(audio_root, topdown=False)
for name in files
if name.endswith(tuple(sup_audioext))
and root == audio_root
and "_output" not in name
]
def output_path_fn(input_audio_path):
original_name_without_extension = os.path.basename(input_audio_path).rsplit(".", 1)[
0
]
new_name = original_name_without_extension + "_output.wav"
output_path = os.path.join(os.path.dirname(input_audio_path), new_name)
return output_path
def change_choices():
names = [
os.path.join(root, file)
for root, _, files in os.walk(model_root, topdown=False)
for file in files
if file.endswith((".pth", ".onnx"))
]
indexes_list = [
os.path.join(root, name)
for root, _, files in os.walk(model_root, topdown=False)
for name in files
if name.endswith(".index") and "trained" not in name
]
audio_paths = [
os.path.join(root, name)
for root, _, files in os.walk(audio_root, topdown=False)
for name in files
if name.endswith(tuple(sup_audioext))
and root == audio_root
and "_output" not in name
]
return (
{"choices": sorted(names), "__type__": "update"},
{"choices": sorted(indexes_list), "__type__": "update"},
{"choices": sorted(audio_paths), "__type__": "update"},
)
def get_indexes():
indexes_list = [
os.path.join(dirpath, filename)
for dirpath, _, filenames in os.walk(model_root)
for filename in filenames
if filename.endswith(".index") and "trained" not in filename
]
return indexes_list if indexes_list else ""
def match_index(model_file: str) -> tuple:
model_files_trip = re.sub(r"\.pth|\.onnx$", "", model_file)
model_file_name = os.path.split(model_files_trip)[
-1
] # Extract only the name, not the directory
# Check if the sid0strip has the specific ending format _eXXX_sXXX
if re.match(r".+_e\d+_s\d+$", model_file_name):
base_model_name = model_file_name.rsplit("_", 2)[0]
else:
base_model_name = model_file_name
sid_directory = os.path.join(model_root, base_model_name)
directories_to_search = [sid_directory] if os.path.exists(sid_directory) else []
directories_to_search.append(model_root)
matching_index_files = []
for directory in directories_to_search:
for filename in os.listdir(directory):
if filename.endswith(".index") and "trained" not in filename:
# Condition to match the name
name_match = any(
name.lower() in filename.lower()
for name in [model_file_name, base_model_name]
)
# If in the specific directory, it's automatically a match
folder_match = directory == sid_directory
if name_match or folder_match:
index_path = os.path.join(directory, filename)
if index_path in indexes_list:
matching_index_files.append(
(
index_path,
os.path.getsize(index_path),
" " not in filename,
)
)
if matching_index_files:
# Sort by favoring files without spaces and by size (largest size first)
matching_index_files.sort(key=lambda x: (-x[2], -x[1]))
best_match_index_path = matching_index_files[0][0]
return best_match_index_path
return ""
def save_to_wav(record_button):
if record_button is None:
pass
else:
path_to_file = record_button
new_name = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".wav"
target_path = os.path.join(audio_root, os.path.basename(new_name))
shutil.move(path_to_file, target_path)
return target_path, output_path_fn(target_path)
def save_to_wav2(upload_audio):
file_path = upload_audio
target_path = os.path.join(audio_root, os.path.basename(file_path))
if os.path.exists(target_path):
os.remove(target_path)
shutil.copy(file_path, target_path)
return target_path, output_path_fn(target_path)
def delete_outputs():
for root, _, files in os.walk(audio_root, topdown=False):
for name in files:
if name.endswith(tuple(sup_audioext)) and name.__contains__("_output"):
os.remove(os.path.join(root, name))
gr.Info(f"Outputs cleared!")
# Inference tab
def inference_tab():
default_weight = random.choice(names) if names else ""
with gr.Row():
with gr.Row():
model_file = gr.Dropdown(
label=i18n("Voice Model"),
choices=sorted(names),
interactive=True,
value=default_weight,
allow_custom_value=True,
)
best_default_index_path = match_index(model_file.value)
index_file = gr.Dropdown(
label=i18n("Index File"),
choices=get_indexes(),
value=best_default_index_path,
interactive=True,
allow_custom_value=True,
)
with gr.Column():
refresh_button = gr.Button(i18n("Refresh"))
unload_button = gr.Button(i18n("Unload Voice"))
unload_button.click(
fn=lambda: ({"value": "", "__type__": "update"}),
inputs=[],
outputs=[model_file],
)
model_file.select(
fn=match_index,
inputs=[model_file],
outputs=[index_file],
)
# Single inference tab
with gr.Tab(i18n("Single")):
with gr.Row():
with gr.Column():
upload_audio = gr.Audio(
label=i18n("Upload Audio"), type="filepath", editable=False
)
with gr.Row():
audio = gr.Dropdown(
label=i18n("Select Audio"),
choices=sorted(audio_paths),
value=audio_paths[0] if audio_paths else "",
interactive=True,
allow_custom_value=True,
)
with gr.Accordion(i18n("Advanced Settings"), open=False):
with gr.Column():
clear_outputs = gr.Button(
i18n("Clear Outputs (Deletes all audios in assets/audios)")
)
output_path = gr.Textbox(
label=i18n("Output Path"),
placeholder=i18n("Enter output path"),
value=output_path_fn(audio_paths[0])
if audio_paths
else os.path.join(now_dir, "assets", "audios", "output.wav"),
interactive=True,
)
split_audio = gr.Checkbox(
label=i18n("Split Audio"),
visible=True,
value=False,
interactive=True,
)
pitch = gr.Slider(-12, 12, 0, label=i18n("Pitch"))
filter_radius = gr.Slider(
minimum=0,
maximum=7,
label=i18n(
"If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness"
),
value=3,
step=1,
interactive=True,
)
index_rate = gr.Slider(
minimum=0,
maximum=1,
label=i18n("Search Feature Ratio"),
value=0.75,
interactive=True,
)
hop_length = gr.Slider(
minimum=1,
maximum=512,
step=1,
label=i18n("Hop Length"),
value=128,
interactive=True,
)
with gr.Column():
f0method = gr.Radio(
label=i18n("Pitch extraction algorithm"),
choices=[
"pm",
"harvest",
"dio",
"crepe",
"crepe-tiny",
"rmvpe",
],
value="rmvpe",
interactive=True,
)
convert_button1 = gr.Button(i18n("Convert"))
with gr.Row(): # Defines output info + output audio download after conversion
vc_output1 = gr.Textbox(label=i18n("Output Information"))
vc_output2 = gr.Audio(label=i18n("Export Audio"))
# Batch inference tab
with gr.Tab(i18n("Batch")):
with gr.Row():
with gr.Column():
input_folder_batch = gr.Textbox(
label=i18n("Input Folder"),
placeholder=i18n("Enter input path"),
value=os.path.join(now_dir, "assets", "audios"),
interactive=True,
)
output_folder_batch = gr.Textbox(
label=i18n("Output Folder"),
placeholder=i18n("Enter output path"),
value=os.path.join(now_dir, "assets", "audios"),
interactive=True,
)
with gr.Accordion(i18n("Advanced Settings"), open=False):
with gr.Column():
clear_outputs = gr.Button(
i18n("Clear Outputs (Deletes all audios in assets/audios)")
)
pitch_batch = gr.Slider(-12, 12, 0, label=i18n("Pitch"))
filter_radius_batch = gr.Slider(
minimum=0,
maximum=7,
label=i18n(
"If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness"
),
value=3,
step=1,
interactive=True,
)
index_rate_batch = gr.Slider(
minimum=0,
maximum=1,
label=i18n("Search Feature Ratio"),
value=0.75,
interactive=True,
)
hop_length_batch = gr.Slider(
minimum=1,
maximum=512,
step=1,
label=i18n("Hop Length"),
value=128,
interactive=True,
)
with gr.Column():
f0method_batch = gr.Radio(
label=i18n("Pitch extraction algorithm"),
choices=[
"pm",
"harvest",
"dio",
"crepe",
"crepe-tiny",
"rmvpe",
],
value="rmvpe",
interactive=True,
)
convert_button2 = gr.Button(i18n("Convert"))
with gr.Row(): # Defines output info + output audio download after conversion
vc_output3 = gr.Textbox(label=i18n("Output Information"))
def toggle_visible(checkbox):
return {"visible": checkbox, "__type__": "update"}
refresh_button.click(
fn=change_choices,
inputs=[],
outputs=[model_file, index_file, audio],
)
audio.change(
fn=output_path_fn,
inputs=[audio],
outputs=[output_path],
)
upload_audio.upload(
fn=save_to_wav2,
inputs=[upload_audio],
outputs=[audio, output_path],
)
upload_audio.stop_recording(
fn=save_to_wav,
inputs=[upload_audio],
outputs=[audio, output_path],
)
clear_outputs.click(
fn=delete_outputs,
inputs=[],
outputs=[],
)
convert_button1.click(
fn=run_infer_script,
inputs=[
pitch,
filter_radius,
index_rate,
hop_length,
f0method,
audio,
output_path,
model_file,
index_file,
split_audio,
],
outputs=[vc_output1, vc_output2],
)
convert_button2.click(
fn=run_batch_infer_script,
inputs=[
pitch_batch,
filter_radius_batch,
index_rate_batch,
hop_length_batch,
f0method_batch,
input_folder_batch,
output_folder_batch,
model_file,
index_file,
],
outputs=[vc_output3],
)