Spaces:
Running
Running
MarcusSu1216
commited on
Commit
·
6e41012
1
Parent(s):
fc2aea6
Update app.py
Browse files
app.py
CHANGED
@@ -1,446 +1,67 @@
|
|
1 |
import io
|
2 |
import os
|
3 |
-
|
4 |
-
|
5 |
-
#import argparse
|
6 |
import gradio as gr
|
7 |
-
import gradio.processing_utils as gr_pu
|
8 |
import librosa
|
9 |
import numpy as np
|
10 |
-
import soundfile
|
11 |
from inference.infer_tool import Svc
|
12 |
import logging
|
13 |
-
import json
|
14 |
-
import matplotlib.pyplot as plt
|
15 |
-
import parselmouth
|
16 |
-
import time
|
17 |
-
import subprocess
|
18 |
-
import shutil
|
19 |
-
import asyncio
|
20 |
-
import datetime
|
21 |
-
|
22 |
-
from scipy.io import wavfile
|
23 |
-
|
24 |
-
#parser = argparse.ArgumentParser()
|
25 |
-
#parser.add_argument("--user", type=str, help='set gradio user', default=None)
|
26 |
-
#parser.add_argument("--password", type=str, help='set gradio password', default=None)
|
27 |
-
#cmd_opts = parser.parse_args()
|
28 |
|
29 |
logging.getLogger('numba').setLevel(logging.WARNING)
|
30 |
logging.getLogger('markdown_it').setLevel(logging.WARNING)
|
31 |
logging.getLogger('urllib3').setLevel(logging.WARNING)
|
32 |
logging.getLogger('matplotlib').setLevel(logging.WARNING)
|
33 |
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
def
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
if
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
return output_msg, gr.Dropdown.update(choices=spk_list, value=spk_choice)
|
62 |
-
|
63 |
-
def load_options():
|
64 |
-
file_list = os.listdir("logs/44k")
|
65 |
-
ckpt_list = []
|
66 |
-
cluster_list = []
|
67 |
-
for ck in file_list:
|
68 |
-
if os.path.splitext(ck)[-1] == ".pth" and ck[0] != "k" and ck[:2] != "D_":
|
69 |
-
ckpt_list.append(ck)
|
70 |
-
if ck[0] == "k":
|
71 |
-
cluster_list.append(ck)
|
72 |
-
if not cluster_list:
|
73 |
-
cluster_list = ["你没有聚类模型"]
|
74 |
-
return choice_ckpt.update(choices = ckpt_list), config_choice.update(choices = os.listdir("configs")), cluster_choice.update(choices = cluster_list)
|
75 |
-
|
76 |
-
def vc_fn(sid, input_audio, vc_transform, auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,F0_mean_pooling,enhancer_adaptive_key):
|
77 |
-
global model
|
78 |
-
try:
|
79 |
-
if input_audio is None:
|
80 |
-
return "You need to upload an audio", None
|
81 |
-
if model is None:
|
82 |
-
return "You need to upload an model", None
|
83 |
-
sampling_rate, audio = input_audio
|
84 |
-
# print(audio.shape,sampling_rate)
|
85 |
-
audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
|
86 |
-
if len(audio.shape) > 1:
|
87 |
-
audio = librosa.to_mono(audio.transpose(1, 0))
|
88 |
-
temp_path = "temp.wav"
|
89 |
-
sf.write(temp_path, audio, sampling_rate, format="wav")
|
90 |
-
_audio = model.slice_inference(temp_path, sid, vc_transform, slice_db, cluster_ratio, auto_f0, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,F0_mean_pooling,enhancer_adaptive_key)
|
91 |
-
model.clear_empty()
|
92 |
-
os.remove(temp_path)
|
93 |
-
#构建保存文件的路径,并保存到results文件夹内
|
94 |
-
timestamp = str(int(time.time()))
|
95 |
-
output_file = os.path.join("results", sid + "_" + timestamp + ".wav")
|
96 |
-
sf.write(output_file, _audio, model.target_sample, format="wav")
|
97 |
-
return "Success", (model.target_sample, _audio)
|
98 |
-
except Exception as e:
|
99 |
-
return "异常信息:"+str(e)+"\n请排障后重试",None
|
100 |
-
|
101 |
-
def load_raw_dirs():
|
102 |
-
#检查文件名
|
103 |
-
allowed_pattern = re.compile(r'^[a-zA-Z0-9_@#$%^&()_+\-=\s]*$')
|
104 |
-
for root, dirs, files in os.walk(raw_path):
|
105 |
-
if root != raw_path: # 只处理子文件夹内的文件
|
106 |
-
for file in files:
|
107 |
-
file_name, _ = os.path.splitext(file)
|
108 |
-
if not allowed_pattern.match(file_name):
|
109 |
-
return "数据集文件名只能包含数字、字母、下划线"
|
110 |
-
#检查有没有小可爱不用wav文件当数据集
|
111 |
-
for root, dirs, files in os.walk(raw_path):
|
112 |
-
if root != raw_path: # 只处理子文件夹内的文件
|
113 |
-
for file in files:
|
114 |
-
if not file.endswith('.wav'):
|
115 |
-
return "数据集中包含非wav格式文件,请检查后再试"
|
116 |
-
spk_dirs = []
|
117 |
-
with os.scandir(raw_path) as entries:
|
118 |
-
for entry in entries:
|
119 |
-
if entry.is_dir():
|
120 |
-
spk_dirs.append(entry.name)
|
121 |
-
if len(spk_dirs) != 0:
|
122 |
-
return raw_dirs_list.update(value=spk_dirs)
|
123 |
-
else:
|
124 |
-
return raw_dirs_list.update(value="未找到数据集,请检查dataset_raw文件夹")
|
125 |
-
'''Old function
|
126 |
-
def dataset_preprocess():
|
127 |
-
preprocess_commands = [
|
128 |
-
r".\workenv\python.exe resample.py",
|
129 |
-
r".\workenv\python.exe preprocess_flist_config.py",
|
130 |
-
r".\workenv\python.exe preprocess_hubert_f0.py"
|
131 |
-
]
|
132 |
-
output = ""
|
133 |
-
for command in preprocess_commands:
|
134 |
-
try:
|
135 |
-
result = subprocess.check_output(command, shell=True, stderr=subprocess.STDOUT, text=True)
|
136 |
-
except subprocess.CalledProcessError as e:
|
137 |
-
result = e.output
|
138 |
-
output += f"Command: {command}\nResult:\n{result}\n{'-' * 50}\n"
|
139 |
-
#cmd = r".\venv\Scripts\activate&&python resample.py&&python preprocess_flist_config.py&&python preprocess_hubert_f0.py"
|
140 |
-
#print(cmd)
|
141 |
-
#p = Popen(cmd, shell=True, cwd=now_dir)
|
142 |
-
#p.wait()
|
143 |
-
config_path = "configs/config.json"
|
144 |
-
with open(config_path, 'r') as f:
|
145 |
-
config = json.load(f)
|
146 |
-
spk_dict = config["spk"]
|
147 |
-
spk_name = config.get('spk', None)
|
148 |
-
return output, speakers.update(value=spk_name)
|
149 |
-
'''
|
150 |
-
def dataset_preprocess():
|
151 |
-
preprocess_commands = [
|
152 |
-
r".\workenv\python.exe resample.py",
|
153 |
-
r".\workenv\python.exe preprocess_flist_config.py",
|
154 |
-
r".\workenv\python.exe preprocess_hubert_f0.py"
|
155 |
-
]
|
156 |
-
accumulated_output = ""
|
157 |
-
|
158 |
-
for command in preprocess_commands:
|
159 |
-
try:
|
160 |
-
result = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, text=True)
|
161 |
-
|
162 |
-
accumulated_output += f"Command: {command}\n"
|
163 |
-
yield accumulated_output, None
|
164 |
-
|
165 |
-
for line in result.stdout:
|
166 |
-
accumulated_output += line
|
167 |
-
yield accumulated_output, None
|
168 |
-
|
169 |
-
result.communicate()
|
170 |
-
|
171 |
-
except subprocess.CalledProcessError as e:
|
172 |
-
result = e.output
|
173 |
-
accumulated_output += f"Error: {result}\n"
|
174 |
-
yield accumulated_output, None
|
175 |
-
|
176 |
-
accumulated_output += '-' * 50 + '\n'
|
177 |
-
yield accumulated_output, None
|
178 |
-
|
179 |
-
config_path = "configs/config.json"
|
180 |
-
with open(config_path, 'r') as f:
|
181 |
-
config = json.load(f)
|
182 |
-
spk_dict = config["spk"]
|
183 |
-
spk_name = config.get('spk', None)
|
184 |
-
|
185 |
-
yield accumulated_output, gr.Textbox.update(value=spk_name)
|
186 |
-
|
187 |
-
def clear_output():
|
188 |
-
return gr.Textbox.update(value="Cleared!>_<")
|
189 |
-
|
190 |
-
def config_fn(log_interval, eval_interval, keep_ckpts, batch_size, lr, fp16_run, all_in_mem):
|
191 |
-
config_origin = ".\\configs\\config.json"
|
192 |
-
with open(config_origin, 'r') as config_file:
|
193 |
-
config_data = json.load(config_file)
|
194 |
-
config_data['train']['log_interval'] = int(log_interval)
|
195 |
-
config_data['train']['eval_interval'] = int(eval_interval)
|
196 |
-
config_data['train']['keep_ckpts'] = int(keep_ckpts)
|
197 |
-
config_data['train']['batch_size'] = int(batch_size)
|
198 |
-
config_data['train']['learning_rate'] = float(lr)
|
199 |
-
config_data['train']['fp16_run'] = fp16_run
|
200 |
-
config_data['train']['all_in_mem'] = all_in_mem
|
201 |
-
with open(config_origin, 'w') as config_file:
|
202 |
-
json.dump(config_data, config_file, indent=4)
|
203 |
-
return "配置文件写入完成"
|
204 |
-
|
205 |
-
#def next_backup_folder_number(backup_path):
|
206 |
-
# numbers = [int(folder) for folder in os.listdir(backup_path) if folder.isdigit()]
|
207 |
-
# return max(numbers) + 1 if numbers else 1
|
208 |
-
|
209 |
-
def training(gpu_selection):
|
210 |
-
if not os.listdir(r"dataset\44k"):
|
211 |
-
return "数据集不存在,请检查dataset文件夹"
|
212 |
-
dataset_path = "dataset/44k"
|
213 |
-
no_npy_pt_files = True
|
214 |
-
for root, dirs, files in os.walk(dataset_path):
|
215 |
-
for file in files:
|
216 |
-
if file.endswith('.npy') or file.endswith('.pt'):
|
217 |
-
no_npy_pt_files = False
|
218 |
-
break
|
219 |
-
if no_npy_pt_files:
|
220 |
-
return "数据集中未检测到f0和hubert文件,可能是预训练未完成"
|
221 |
-
#备份logs/44k文件
|
222 |
-
logs_44k = "logs/44k"
|
223 |
-
pre_trained_model = "pre_trained_model"
|
224 |
-
models_backup = "models_backup"
|
225 |
-
timestamp = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M')
|
226 |
-
#new_backup_folder_number = next_backup_folder_number(models_backup)
|
227 |
-
new_backup_folder = os.path.join(models_backup, str(timestamp))
|
228 |
-
os.makedirs(new_backup_folder, exist_ok=True)
|
229 |
-
for file in os.listdir(logs_44k):
|
230 |
-
shutil.move(os.path.join(logs_44k, file), os.path.join(new_backup_folder, file))
|
231 |
-
d_0_path = os.path.join(pre_trained_model, "D_0.pth")
|
232 |
-
g_0_path = os.path.join(pre_trained_model, "G_0.pth")
|
233 |
-
if os.path.isfile(d_0_path) and os.path.isfile(g_0_path):
|
234 |
-
print("D_0.pth and G_0.pth exist in pre_trained_model")
|
235 |
-
else:
|
236 |
-
print("D_0.pth and/or G_0.pth are missing in pre_trained_model")
|
237 |
-
shutil.copy(d_0_path, os.path.join(logs_44k, "D_0.pth"))
|
238 |
-
shutil.copy(g_0_path, os.path.join(logs_44k, "G_0.pth"))
|
239 |
-
cmd = r"set CUDA_VISIBLE_DEVICES=%s && .\workenv\python.exe train.py -c configs/config.json -m 44k" % (gpu_selection)
|
240 |
-
subprocess.Popen(["cmd", "/c", "start", "cmd", "/k", cmd])
|
241 |
-
return "已经在新的终端窗口开始训练,请监看终端窗口的训练日志。在终端中按Ctrl+C可暂���训练。"
|
242 |
|
243 |
-
def continue_training(gpu_selection):
|
244 |
-
if not os.listdir(r"dataset\44k"):
|
245 |
-
return "数据集不存在,请检查dataset文件夹"
|
246 |
-
dataset_path = "dataset/44k"
|
247 |
-
no_npy_pt_files = True
|
248 |
-
for root, dirs, files in os.walk(dataset_path):
|
249 |
-
for file in files:
|
250 |
-
if file.endswith('.npy') or file.endswith('.pt'):
|
251 |
-
no_npy_pt_files = False
|
252 |
-
break
|
253 |
-
if no_npy_pt_files:
|
254 |
-
return "数据集中未检测到f0和hubert文件,可能是预训练未完成"
|
255 |
-
cmd = r"set CUDA_VISIBLE_DEVICES=%s && .\workenv\python.exe train.py -c configs/config.json -m 44k" % (gpu_selection)
|
256 |
-
subprocess.Popen(["cmd", "/c", "start", "cmd", "/k", cmd])
|
257 |
-
return "已经在新的终端窗口开始训练,请监看终端窗口的训练日志。在终端中按Ctrl+C可暂停训练。"
|
258 |
-
|
259 |
-
def continue_selected_training(work_dir):
|
260 |
-
print(work_dir)
|
261 |
-
if work_dir is None:
|
262 |
-
return "你没有选择工作进度"
|
263 |
-
if not os.path.exists(os.path.join(models_backup_path, work_dir)):
|
264 |
-
return "该工作文件夹不存在",
|
265 |
-
logs_44k_path = r'logs\44k'
|
266 |
-
logs_44k_files = os.listdir(logs_44k_path)
|
267 |
-
d0_path = os.path.join(logs_44k_path, "D_0.pth")
|
268 |
-
g0_path = os.path.join(logs_44k_path, "G_0.pth")
|
269 |
-
if len(logs_44k_files) == 2 and os.path.isfile(d0_path) and os.path.isfile(g0_path):
|
270 |
-
os.remove(d0_path)
|
271 |
-
os.remove(g0_path)
|
272 |
-
else:
|
273 |
-
if logs_44k_files:
|
274 |
-
timestamp = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M')
|
275 |
-
new_backup_folder = os.path.join(models_backup_path, timestamp)
|
276 |
-
os.makedirs(new_backup_folder)
|
277 |
-
|
278 |
-
for file in logs_44k_files:
|
279 |
-
shutil.copy(os.path.join(logs_44k_path, file), new_backup_folder)
|
280 |
-
work_dir_path = os.path.join(models_backup_path, work_dir)
|
281 |
-
work_dir_files = os.listdir(work_dir_path)
|
282 |
-
for file in work_dir_files:
|
283 |
-
shutil.copy(os.path.join(work_dir_path, file), logs_44k_path)
|
284 |
-
|
285 |
-
return "已经在新的终端窗口开始训练,请监看终端窗口的训练日志。在终端中按Ctrl+C可暂停训练。"
|
286 |
-
|
287 |
-
def previous_selection_refresh():
|
288 |
-
work_saved_list = []
|
289 |
-
for entry in os.listdir("models_backup"):
|
290 |
-
entry_path = os.path.join(models_backup_path, entry)
|
291 |
-
if os.path.isdir(entry_path):
|
292 |
-
work_saved_list.append(entry)
|
293 |
-
return gr.Dropdown.update(choices=work_saved_list)
|
294 |
-
|
295 |
-
|
296 |
-
def kmeans_training():
|
297 |
-
if not os.listdir(r"dataset\44k"):
|
298 |
-
return "数据集不存在,请检查dataset文件夹"
|
299 |
-
subprocess.Popen(["cmd", "/c", "start", "cmd", "/k", r".\workenv\python.exe cluster\train_cluster.py"])
|
300 |
-
return "已经在新的终端窗口开始训练,训练聚类模型不会输出日志,检查任务管理器中python进程有在占用CPU就是正在训练,训练一般需要5-10分钟左右"
|
301 |
-
|
302 |
-
# read ckpt list
|
303 |
-
file_list = os.listdir("logs/44k")
|
304 |
-
ckpt_list = []
|
305 |
-
cluster_list = []
|
306 |
-
for ck in file_list:
|
307 |
-
if os.path.splitext(ck)[-1] == ".pth" and ck[0] != "k" and ck[:2] != "D_":
|
308 |
-
ckpt_list.append(ck)
|
309 |
-
if ck[0] == "k":
|
310 |
-
cluster_list.append(ck)
|
311 |
-
if not cluster_list:
|
312 |
-
cluster_list = ["你没有聚类模型"]
|
313 |
-
|
314 |
-
#read GPU info
|
315 |
-
ngpu=torch.cuda.device_count()
|
316 |
-
gpu_infos=[]
|
317 |
-
if(torch.cuda.is_available()==False or ngpu==0):if_gpu_ok=False
|
318 |
-
else:
|
319 |
-
if_gpu_ok = False
|
320 |
-
for i in range(ngpu):
|
321 |
-
gpu_name=torch.cuda.get_device_name(i)
|
322 |
-
if("16"in gpu_name or "MX"in gpu_name):continue
|
323 |
-
if("10"in gpu_name or "20"in gpu_name or "30"in gpu_name or "40"in gpu_name or "A50"in gpu_name.upper() or "70"in gpu_name or "80"in gpu_name or "90"in gpu_name or "M4"in gpu_name or "T4"in gpu_name or "TITAN"in gpu_name.upper()):#A10#A100#V100#A40#P40#M40#K80
|
324 |
-
if_gpu_ok=True#至少有一张能用的N卡
|
325 |
-
gpu_infos.append("%s\t%s"%(i,gpu_name))
|
326 |
-
gpu_info="\n".join(gpu_infos)if if_gpu_ok==True and len(gpu_infos)>0 else "很遗憾您这没有能用的显卡来支持您训练"
|
327 |
-
gpus="-".join([i[0]for i in gpu_infos])
|
328 |
-
|
329 |
-
#get previous saved training work
|
330 |
-
work_saved_list = []
|
331 |
-
for entry in os.listdir("models_backup"):
|
332 |
-
entry_path = os.path.join(models_backup_path, entry)
|
333 |
-
if os.path.isdir(entry_path):
|
334 |
-
work_saved_list.append(entry)
|
335 |
|
336 |
app = gr.Blocks()
|
337 |
with app:
|
338 |
-
gr.Markdown(value="""
|
339 |
-
###sovits4.0 webui 推理&训练
|
340 |
-
|
341 |
-
修改自原项目及bilibili@麦哲云
|
342 |
-
|
343 |
-
仅供个人娱乐和非商业用途,禁止用于血腥、暴力、性相关、政治相关内容
|
344 |
-
|
345 |
-
作者:bilibili@羽毛布団
|
346 |
-
|
347 |
-
""")
|
348 |
with gr.Tabs():
|
349 |
-
with gr.TabItem("
|
350 |
-
choice_ckpt = gr.Dropdown(label="模型选择", choices=ckpt_list, value="no_model")
|
351 |
-
config_choice = gr.Dropdown(label="配置文件", choices=os.listdir("configs"), value="no_config")
|
352 |
-
cluster_choice = gr.Dropdown(label="选择聚类模型", choices=cluster_list, value="no_clu")
|
353 |
-
enhance = gr.Checkbox(label="是否使用NSF_HIFIGAN增强,该选项对部分训练集少的模型有一定的音质增强效果,但是对训练好的模型有反面效果,默认关闭", value=False)
|
354 |
-
refresh = gr.Button("刷新选项")
|
355 |
-
loadckpt = gr.Button("加载模型", variant="primary")
|
356 |
-
|
357 |
-
sid = gr.Dropdown(label="音色", value="speaker0")
|
358 |
-
model_message = gr.Textbox(label="Output Message")
|
359 |
-
|
360 |
-
refresh.click(load_options,[],[choice_ckpt,config_choice,cluster_choice])
|
361 |
-
loadckpt.click(load_model_func,[choice_ckpt,cluster_choice,config_choice,enhance],[model_message, sid])
|
362 |
-
|
363 |
gr.Markdown(value="""
|
364 |
-
|
|
|
|
|
365 |
""")
|
366 |
-
|
367 |
-
|
|
|
368 |
vc_transform = gr.Number(label="变调(整数,可以正负,半音数量,升高八度就是12)", value=0)
|
369 |
cluster_ratio = gr.Number(label="聚类模型混合比例,0-1之间,默认为0不启用聚类,能提升音色相似度,但会导致咬字下降(如果使用建议0.5左右)", value=0)
|
370 |
auto_f0 = gr.Checkbox(label="自动f0预测,配合聚类模型f0预测效果更好,会导致变调功能失效(仅限转换语音,歌声不要勾选此项会究极跑调)", value=False)
|
371 |
-
F0_mean_pooling = gr.Checkbox(label="F0均值滤波(池化),开启后可能有效改善哑音(对因和声混响造成的哑音无效)。", value=False)
|
372 |
-
enhancer_adaptive_key = gr.Number(label="使NSF-HIFIGAN增强器适应更高的音域(单位为半音数)|默认为0", value=0,interactive=True)
|
373 |
-
slice_db = gr.Number(label="切片阈值", value=-40)
|
374 |
noise_scale = gr.Number(label="noise_scale 建议不要动,会影响音质,玄学参数", value=0.4)
|
375 |
-
cl_num = gr.Number(label="音频自动切片,0为不切片,单位为秒/s", value=0)
|
376 |
-
pad_seconds = gr.Number(label="推理音频pad秒数,由于未知原因开头结尾会有异响,pad一小段静音段后就不会出现", value=0.5)
|
377 |
-
lg_num = gr.Number(label="两端音频切片的交叉淡入长度,如果自动切片后出现人声不连贯可调整该数值,如果连贯建议采用默认值0,注意,该设置会影响推理速度,单位为秒/s", value=0)
|
378 |
-
lgr_num = gr.Number(label="自动音频切片后,需要舍弃每段切片的头尾。该参数设置交叉长度保留的比例,范围0-1,左开右闭", value=0.75,interactive=True)
|
379 |
-
|
380 |
vc_submit = gr.Button("转换", variant="primary")
|
381 |
vc_output1 = gr.Textbox(label="Output Message")
|
382 |
vc_output2 = gr.Audio(label="Output Audio")
|
|
|
383 |
|
384 |
-
|
385 |
-
|
386 |
-
with gr.TabItem("训练"):
|
387 |
-
gr.Markdown(value="""请将数据集文件夹放置在dataset_raw文件夹下,确认放置正确后点击下方获取数据集名称""")
|
388 |
-
raw_dirs_list=gr.Textbox(label="Raw dataset directory(s):")
|
389 |
-
get_raw_dirs=gr.Button("识别数据集", variant="primary")
|
390 |
-
gr.Markdown(value="""确认数据集正确识别后请点击数据预处理(大数据集可能会花上很长时间预处理,没报错等着就行)""")
|
391 |
-
#with gr.Row():
|
392 |
-
raw_preprocess=gr.Button("数据预处理", variant="primary")
|
393 |
-
preprocess_output=gr.Textbox(label="预处理输出信息,完成后请检查一下是否有报错信息,如无则可以进行下一步", max_lines=999)
|
394 |
-
clear_preprocess_output=gr.Button("清空输出信息")
|
395 |
-
with gr.Group():
|
396 |
-
gr.Markdown(value="""填写训练设置和超参数""")
|
397 |
-
with gr.Row():
|
398 |
-
gr.Textbox(label="当前使用显卡信息", value=gpu_info)
|
399 |
-
gpu_selection=gr.Textbox(label="多卡用户请指定希望训练使用的显卡ID(0,1,2...)", value=gpus, interactive=True)
|
400 |
-
with gr.Row():
|
401 |
-
log_interval=gr.Textbox(label="每隔多少步(steps)生成一次评估日志", value="200")
|
402 |
-
eval_interval=gr.Textbox(label="每隔多少步(steps)验证并保存一次模型", value="800")
|
403 |
-
keep_ckpts=gr.Textbox(label="仅保留最新的X个模型,超出该数字的旧模型会被删除。设置为0则永不删除", value="10")
|
404 |
-
with gr.Row():
|
405 |
-
batch_size=gr.Textbox(label="批量大小,每步取多少条数据进行训练,大batch可以加快训练但显著增加显存占用。6G显存建议设定为4", value="12")
|
406 |
-
lr=gr.Textbox(label="学习率,尽量与batch size成正比(6:0.0001),无法整除的话四舍五入一下也行", value="0.0002")
|
407 |
-
fp16_run=gr.Checkbox(label="是否使用半精度训练,半精度训练可能降低显存占用和训练时间,但对模型质量的影响尚未查证", value=False)
|
408 |
-
all_in_mem=gr.Checkbox(label="是否加载所有数据集到内存中,硬盘IO过于低下、同时内存容量远大于数据集体积时可以启用", value=False)
|
409 |
-
with gr.Row():
|
410 |
-
gr.Markdown("请检查右侧的说话人列表是否和你要训练的目标说话人一致,确认无误后点击写入配置文件,然后就可以开始训练了")
|
411 |
-
speakers=gr.Textbox(label="说话人列表")
|
412 |
-
write_config=gr.Button("写入配置文件", variant="primary")
|
413 |
-
|
414 |
-
write_config_output=gr.Textbox(label="写入配置文件输出信息")
|
415 |
-
|
416 |
-
gr.Markdown(value="""**点击从头开始训练**将会自动将已有的训练进度保存到models_backup文件夹,并自动装载预训练模型。
|
417 |
-
**继续上一次的训练进度**将从上一个保存模型的进度继续训练。继续训练进度无需重新预处理和写入配置文件。
|
418 |
-
""")
|
419 |
-
with gr.Row():
|
420 |
-
with gr.Column():
|
421 |
-
start_training=gr.Button("从头开始训练", variant="primary")
|
422 |
-
training_output=gr.Textbox(label="训练输出信息")
|
423 |
-
with gr.Column():
|
424 |
-
continue_training_btn=gr.Button("继续上一次的训练进度", variant="primary")
|
425 |
-
continue_training_output=gr.Textbox(label="训练输出信息")
|
426 |
-
with gr.Column():
|
427 |
-
kmeans_button=gr.Button("训练聚类模型", variant="primary")
|
428 |
-
kmeans_output=gr.Textbox(label="训练输出信息")
|
429 |
-
#previous_selection_training_btn=gr.Button("继续训练已保存的工作", variant="primary")
|
430 |
-
#with gr.Row():
|
431 |
-
# select_previous_work=gr.Dropdown(label="选择已保存的工作进度", choices=work_saved_list)
|
432 |
-
# previous_selection_refresh_btn=gr.Button("刷新列表", variant="primary")
|
433 |
-
#previous_selection_output=gr.Textbox(label="训练输出信息")
|
434 |
-
|
435 |
-
|
436 |
-
get_raw_dirs.click(load_raw_dirs,[],[raw_dirs_list])
|
437 |
-
raw_preprocess.click(dataset_preprocess,[],[preprocess_output, speakers])
|
438 |
-
clear_preprocess_output.click(clear_output,[],[preprocess_output])
|
439 |
-
write_config.click(config_fn,[log_interval, eval_interval, keep_ckpts, batch_size, lr, fp16_run, all_in_mem],[write_config_output])
|
440 |
-
start_training.click(training,[gpu_selection],[training_output])
|
441 |
-
continue_training_btn.click(continue_training,[gpu_selection],[continue_training_output])
|
442 |
-
#previous_selection_training_btn.click(continue_selected_training,[select_previous_work],[previous_selection_output])
|
443 |
-
#previous_selection_refresh_btn.click(previous_selection_refresh,[],[select_previous_work])
|
444 |
-
kmeans_button.click(kmeans_training,[],[kmeans_output])
|
445 |
-
|
446 |
-
app.queue(concurrency_count=1022, max_size=2044).launch(server_name="127.0.0.1",inbrowser=True,quiet=True)
|
|
|
1 |
import io
|
2 |
import os
|
3 |
+
|
4 |
+
os.system("wget -P hubert/ https://huggingface.co/spaces/MarcusSu1216/XingTong/blob/main/hubert/checkpoint_best_legacy_500.pt")
|
|
|
5 |
import gradio as gr
|
|
|
6 |
import librosa
|
7 |
import numpy as np
|
8 |
+
import soundfile
|
9 |
from inference.infer_tool import Svc
|
10 |
import logging
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
logging.getLogger('numba').setLevel(logging.WARNING)
|
13 |
logging.getLogger('markdown_it').setLevel(logging.WARNING)
|
14 |
logging.getLogger('urllib3').setLevel(logging.WARNING)
|
15 |
logging.getLogger('matplotlib').setLevel(logging.WARNING)
|
16 |
|
17 |
+
model = Svc("logs/44k/G_32000.pth", "configs/config.json", cluster_model_path="logs/44k/kmeans_10000.pt")
|
18 |
+
|
19 |
+
|
20 |
+
|
21 |
+
def vc_fn(sid, input_audio, vc_transform, auto_f0,cluster_ratio, noise_scale):
|
22 |
+
if input_audio is None:
|
23 |
+
return "You need to upload an audio", None
|
24 |
+
sampling_rate, audio = input_audio
|
25 |
+
# print(audio.shape,sampling_rate)
|
26 |
+
duration = audio.shape[0] / sampling_rate
|
27 |
+
if duration > 45:
|
28 |
+
return "请上传小于45s的音频,需要转换长音频请本地进行转换", None
|
29 |
+
audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
|
30 |
+
if len(audio.shape) > 1:
|
31 |
+
audio = librosa.to_mono(audio.transpose(1, 0))
|
32 |
+
if sampling_rate != 16000:
|
33 |
+
audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
|
34 |
+
print(audio.shape)
|
35 |
+
out_wav_path = "temp.wav"
|
36 |
+
soundfile.write(out_wav_path, audio, 16000, format="wav")
|
37 |
+
print( cluster_ratio, auto_f0, noise_scale)
|
38 |
+
out_audio, out_sr = model.infer(sid, vc_transform, out_wav_path,
|
39 |
+
cluster_infer_ratio=cluster_ratio,
|
40 |
+
auto_predict_f0=auto_f0,
|
41 |
+
noice_scale=noise_scale
|
42 |
+
)
|
43 |
+
return "Success", (44100, out_audio.numpy())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
app = gr.Blocks()
|
47 |
with app:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
with gr.Tabs():
|
49 |
+
with gr.TabItem("Basic"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
gr.Markdown(value="""
|
51 |
+
SoVITS 4.0 在线 demo,基于 https://github.com/innnky/so-vits-svc/tree/4.0
|
52 |
+
|
53 |
+
此 demo 为预训练底模在线 demo,使用数据:星瞳
|
54 |
""")
|
55 |
+
spks = list(model.spk2id.keys())
|
56 |
+
sid = gr.Dropdown(label="音色", choices=["mqnl-covers-v3"], value="mqnl-covers-v3")
|
57 |
+
vc_input3 = gr.Audio(label="上传音频(长度小于45秒)")
|
58 |
vc_transform = gr.Number(label="变调(整数,可以正负,半音数量,升高八度就是12)", value=0)
|
59 |
cluster_ratio = gr.Number(label="聚类模型混合比例,0-1之间,默认为0不启用聚类,能提升音色相似度,但会导致咬字下降(如果使用建议0.5左右)", value=0)
|
60 |
auto_f0 = gr.Checkbox(label="自动f0预测,配合聚类模型f0预测效果更好,会导致变调功能失效(仅限转换语音,歌声不要勾选此项会究极跑调)", value=False)
|
|
|
|
|
|
|
61 |
noise_scale = gr.Number(label="noise_scale 建议不要动,会影响音质,玄学参数", value=0.4)
|
|
|
|
|
|
|
|
|
|
|
62 |
vc_submit = gr.Button("转换", variant="primary")
|
63 |
vc_output1 = gr.Textbox(label="Output Message")
|
64 |
vc_output2 = gr.Audio(label="Output Audio")
|
65 |
+
vc_submit.click(vc_fn, [sid, vc_input3, vc_transform,auto_f0,cluster_ratio, noise_scale], [vc_output1, vc_output2])
|
66 |
|
67 |
+
app.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|