MarcusSu1216 commited on
Commit
6e41012
·
1 Parent(s): fc2aea6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -418
app.py CHANGED
@@ -1,446 +1,67 @@
1
  import io
2
  import os
3
- import re
4
- import torch
5
- #import argparse
6
  import gradio as gr
7
- import gradio.processing_utils as gr_pu
8
  import librosa
9
  import numpy as np
10
- import soundfile as sf
11
  from inference.infer_tool import Svc
12
  import logging
13
- import json
14
- import matplotlib.pyplot as plt
15
- import parselmouth
16
- import time
17
- import subprocess
18
- import shutil
19
- import asyncio
20
- import datetime
21
-
22
- from scipy.io import wavfile
23
-
24
- #parser = argparse.ArgumentParser()
25
- #parser.add_argument("--user", type=str, help='set gradio user', default=None)
26
- #parser.add_argument("--password", type=str, help='set gradio password', default=None)
27
- #cmd_opts = parser.parse_args()
28
 
29
  logging.getLogger('numba').setLevel(logging.WARNING)
30
  logging.getLogger('markdown_it').setLevel(logging.WARNING)
31
  logging.getLogger('urllib3').setLevel(logging.WARNING)
32
  logging.getLogger('matplotlib').setLevel(logging.WARNING)
33
 
34
- raw_path = "./dataset_raw"
35
- models_backup_path = './models_backup'
36
- #now_dir = os.getcwd()
37
-
38
- def load_model_func(ckpt_name,cluster_name,config_name,enhance):
39
- global model, cluster_model_path
40
-
41
- config_path = "configs/" + config_name
42
-
43
- with open(config_path, 'r') as f:
44
- config = json.load(f)
45
- spk_dict = config["spk"]
46
- spk_name = config.get('spk', None)
47
- if spk_name:
48
- spk_choice = next(iter(spk_name))
49
- else:
50
- spk_choice = "未检测到音色"
51
-
52
- ckpt_path = "logs/44k/" + ckpt_name
53
- cluster_path = "logs/44k/" + cluster_name
54
- if cluster_name == "no_clu":
55
- model = Svc(ckpt_path,config_path,nsf_hifigan_enhance=enhance)
56
- else:
57
- model = Svc(ckpt_path,config_path,cluster_model_path=cluster_path,nsf_hifigan_enhance=enhance)
58
-
59
- spk_list = list(spk_dict.keys())
60
- output_msg = "模型加载成功"
61
- return output_msg, gr.Dropdown.update(choices=spk_list, value=spk_choice)
62
-
63
- def load_options():
64
- file_list = os.listdir("logs/44k")
65
- ckpt_list = []
66
- cluster_list = []
67
- for ck in file_list:
68
- if os.path.splitext(ck)[-1] == ".pth" and ck[0] != "k" and ck[:2] != "D_":
69
- ckpt_list.append(ck)
70
- if ck[0] == "k":
71
- cluster_list.append(ck)
72
- if not cluster_list:
73
- cluster_list = ["你没有聚类模型"]
74
- return choice_ckpt.update(choices = ckpt_list), config_choice.update(choices = os.listdir("configs")), cluster_choice.update(choices = cluster_list)
75
-
76
- def vc_fn(sid, input_audio, vc_transform, auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,F0_mean_pooling,enhancer_adaptive_key):
77
- global model
78
- try:
79
- if input_audio is None:
80
- return "You need to upload an audio", None
81
- if model is None:
82
- return "You need to upload an model", None
83
- sampling_rate, audio = input_audio
84
- # print(audio.shape,sampling_rate)
85
- audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
86
- if len(audio.shape) > 1:
87
- audio = librosa.to_mono(audio.transpose(1, 0))
88
- temp_path = "temp.wav"
89
- sf.write(temp_path, audio, sampling_rate, format="wav")
90
- _audio = model.slice_inference(temp_path, sid, vc_transform, slice_db, cluster_ratio, auto_f0, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,F0_mean_pooling,enhancer_adaptive_key)
91
- model.clear_empty()
92
- os.remove(temp_path)
93
- #构建保存文件的路径,并保存到results文件夹内
94
- timestamp = str(int(time.time()))
95
- output_file = os.path.join("results", sid + "_" + timestamp + ".wav")
96
- sf.write(output_file, _audio, model.target_sample, format="wav")
97
- return "Success", (model.target_sample, _audio)
98
- except Exception as e:
99
- return "异常信息:"+str(e)+"\n请排障后重试",None
100
-
101
- def load_raw_dirs():
102
- #检查文件名
103
- allowed_pattern = re.compile(r'^[a-zA-Z0-9_@#$%^&()_+\-=\s]*$')
104
- for root, dirs, files in os.walk(raw_path):
105
- if root != raw_path: # 只处理子文件夹内的文件
106
- for file in files:
107
- file_name, _ = os.path.splitext(file)
108
- if not allowed_pattern.match(file_name):
109
- return "数据集文件名只能包含数字、字母、下划线"
110
- #检查有没有小可爱不用wav文件当数据集
111
- for root, dirs, files in os.walk(raw_path):
112
- if root != raw_path: # 只处理子文件夹内的文件
113
- for file in files:
114
- if not file.endswith('.wav'):
115
- return "数据集中包含非wav格式文件,请检查后再试"
116
- spk_dirs = []
117
- with os.scandir(raw_path) as entries:
118
- for entry in entries:
119
- if entry.is_dir():
120
- spk_dirs.append(entry.name)
121
- if len(spk_dirs) != 0:
122
- return raw_dirs_list.update(value=spk_dirs)
123
- else:
124
- return raw_dirs_list.update(value="未找到数据集,请检查dataset_raw文件夹")
125
- '''Old function
126
- def dataset_preprocess():
127
- preprocess_commands = [
128
- r".\workenv\python.exe resample.py",
129
- r".\workenv\python.exe preprocess_flist_config.py",
130
- r".\workenv\python.exe preprocess_hubert_f0.py"
131
- ]
132
- output = ""
133
- for command in preprocess_commands:
134
- try:
135
- result = subprocess.check_output(command, shell=True, stderr=subprocess.STDOUT, text=True)
136
- except subprocess.CalledProcessError as e:
137
- result = e.output
138
- output += f"Command: {command}\nResult:\n{result}\n{'-' * 50}\n"
139
- #cmd = r".\venv\Scripts\activate&&python resample.py&&python preprocess_flist_config.py&&python preprocess_hubert_f0.py"
140
- #print(cmd)
141
- #p = Popen(cmd, shell=True, cwd=now_dir)
142
- #p.wait()
143
- config_path = "configs/config.json"
144
- with open(config_path, 'r') as f:
145
- config = json.load(f)
146
- spk_dict = config["spk"]
147
- spk_name = config.get('spk', None)
148
- return output, speakers.update(value=spk_name)
149
- '''
150
- def dataset_preprocess():
151
- preprocess_commands = [
152
- r".\workenv\python.exe resample.py",
153
- r".\workenv\python.exe preprocess_flist_config.py",
154
- r".\workenv\python.exe preprocess_hubert_f0.py"
155
- ]
156
- accumulated_output = ""
157
-
158
- for command in preprocess_commands:
159
- try:
160
- result = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, text=True)
161
-
162
- accumulated_output += f"Command: {command}\n"
163
- yield accumulated_output, None
164
-
165
- for line in result.stdout:
166
- accumulated_output += line
167
- yield accumulated_output, None
168
-
169
- result.communicate()
170
-
171
- except subprocess.CalledProcessError as e:
172
- result = e.output
173
- accumulated_output += f"Error: {result}\n"
174
- yield accumulated_output, None
175
-
176
- accumulated_output += '-' * 50 + '\n'
177
- yield accumulated_output, None
178
-
179
- config_path = "configs/config.json"
180
- with open(config_path, 'r') as f:
181
- config = json.load(f)
182
- spk_dict = config["spk"]
183
- spk_name = config.get('spk', None)
184
-
185
- yield accumulated_output, gr.Textbox.update(value=spk_name)
186
-
187
- def clear_output():
188
- return gr.Textbox.update(value="Cleared!>_<")
189
-
190
- def config_fn(log_interval, eval_interval, keep_ckpts, batch_size, lr, fp16_run, all_in_mem):
191
- config_origin = ".\\configs\\config.json"
192
- with open(config_origin, 'r') as config_file:
193
- config_data = json.load(config_file)
194
- config_data['train']['log_interval'] = int(log_interval)
195
- config_data['train']['eval_interval'] = int(eval_interval)
196
- config_data['train']['keep_ckpts'] = int(keep_ckpts)
197
- config_data['train']['batch_size'] = int(batch_size)
198
- config_data['train']['learning_rate'] = float(lr)
199
- config_data['train']['fp16_run'] = fp16_run
200
- config_data['train']['all_in_mem'] = all_in_mem
201
- with open(config_origin, 'w') as config_file:
202
- json.dump(config_data, config_file, indent=4)
203
- return "配置文件写入完成"
204
-
205
- #def next_backup_folder_number(backup_path):
206
- # numbers = [int(folder) for folder in os.listdir(backup_path) if folder.isdigit()]
207
- # return max(numbers) + 1 if numbers else 1
208
-
209
- def training(gpu_selection):
210
- if not os.listdir(r"dataset\44k"):
211
- return "数据集不存在,请检查dataset文件夹"
212
- dataset_path = "dataset/44k"
213
- no_npy_pt_files = True
214
- for root, dirs, files in os.walk(dataset_path):
215
- for file in files:
216
- if file.endswith('.npy') or file.endswith('.pt'):
217
- no_npy_pt_files = False
218
- break
219
- if no_npy_pt_files:
220
- return "数据集中未检测到f0和hubert文件,可能是预训练未完成"
221
- #备份logs/44k文件
222
- logs_44k = "logs/44k"
223
- pre_trained_model = "pre_trained_model"
224
- models_backup = "models_backup"
225
- timestamp = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M')
226
- #new_backup_folder_number = next_backup_folder_number(models_backup)
227
- new_backup_folder = os.path.join(models_backup, str(timestamp))
228
- os.makedirs(new_backup_folder, exist_ok=True)
229
- for file in os.listdir(logs_44k):
230
- shutil.move(os.path.join(logs_44k, file), os.path.join(new_backup_folder, file))
231
- d_0_path = os.path.join(pre_trained_model, "D_0.pth")
232
- g_0_path = os.path.join(pre_trained_model, "G_0.pth")
233
- if os.path.isfile(d_0_path) and os.path.isfile(g_0_path):
234
- print("D_0.pth and G_0.pth exist in pre_trained_model")
235
- else:
236
- print("D_0.pth and/or G_0.pth are missing in pre_trained_model")
237
- shutil.copy(d_0_path, os.path.join(logs_44k, "D_0.pth"))
238
- shutil.copy(g_0_path, os.path.join(logs_44k, "G_0.pth"))
239
- cmd = r"set CUDA_VISIBLE_DEVICES=%s && .\workenv\python.exe train.py -c configs/config.json -m 44k" % (gpu_selection)
240
- subprocess.Popen(["cmd", "/c", "start", "cmd", "/k", cmd])
241
- return "已经在新的终端窗口开始训练,请监看终端窗口的训练日志。在终端中按Ctrl+C可暂���训练。"
242
 
243
- def continue_training(gpu_selection):
244
- if not os.listdir(r"dataset\44k"):
245
- return "数据集不存在,请检查dataset文件夹"
246
- dataset_path = "dataset/44k"
247
- no_npy_pt_files = True
248
- for root, dirs, files in os.walk(dataset_path):
249
- for file in files:
250
- if file.endswith('.npy') or file.endswith('.pt'):
251
- no_npy_pt_files = False
252
- break
253
- if no_npy_pt_files:
254
- return "数据集中未检测到f0和hubert文件,可能是预训练未完成"
255
- cmd = r"set CUDA_VISIBLE_DEVICES=%s && .\workenv\python.exe train.py -c configs/config.json -m 44k" % (gpu_selection)
256
- subprocess.Popen(["cmd", "/c", "start", "cmd", "/k", cmd])
257
- return "已经在新的终端窗口开始训练,请监看终端窗口的训练日志。在终端中按Ctrl+C可暂停训练。"
258
-
259
- def continue_selected_training(work_dir):
260
- print(work_dir)
261
- if work_dir is None:
262
- return "你没有选择工作进度"
263
- if not os.path.exists(os.path.join(models_backup_path, work_dir)):
264
- return "该工作文件夹不存在",
265
- logs_44k_path = r'logs\44k'
266
- logs_44k_files = os.listdir(logs_44k_path)
267
- d0_path = os.path.join(logs_44k_path, "D_0.pth")
268
- g0_path = os.path.join(logs_44k_path, "G_0.pth")
269
- if len(logs_44k_files) == 2 and os.path.isfile(d0_path) and os.path.isfile(g0_path):
270
- os.remove(d0_path)
271
- os.remove(g0_path)
272
- else:
273
- if logs_44k_files:
274
- timestamp = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M')
275
- new_backup_folder = os.path.join(models_backup_path, timestamp)
276
- os.makedirs(new_backup_folder)
277
-
278
- for file in logs_44k_files:
279
- shutil.copy(os.path.join(logs_44k_path, file), new_backup_folder)
280
- work_dir_path = os.path.join(models_backup_path, work_dir)
281
- work_dir_files = os.listdir(work_dir_path)
282
- for file in work_dir_files:
283
- shutil.copy(os.path.join(work_dir_path, file), logs_44k_path)
284
-
285
- return "已经在新的终端窗口开始训练,请监看终端窗口的训练日志。在终端中按Ctrl+C可暂停训练。"
286
-
287
- def previous_selection_refresh():
288
- work_saved_list = []
289
- for entry in os.listdir("models_backup"):
290
- entry_path = os.path.join(models_backup_path, entry)
291
- if os.path.isdir(entry_path):
292
- work_saved_list.append(entry)
293
- return gr.Dropdown.update(choices=work_saved_list)
294
-
295
-
296
- def kmeans_training():
297
- if not os.listdir(r"dataset\44k"):
298
- return "数据集不存在,请检查dataset文件夹"
299
- subprocess.Popen(["cmd", "/c", "start", "cmd", "/k", r".\workenv\python.exe cluster\train_cluster.py"])
300
- return "已经在新的终端窗口开始训练,训练聚类模型不会输出日志,检查任务管理器中python进程有在占用CPU就是正在训练,训练一般需要5-10分钟左右"
301
-
302
- # read ckpt list
303
- file_list = os.listdir("logs/44k")
304
- ckpt_list = []
305
- cluster_list = []
306
- for ck in file_list:
307
- if os.path.splitext(ck)[-1] == ".pth" and ck[0] != "k" and ck[:2] != "D_":
308
- ckpt_list.append(ck)
309
- if ck[0] == "k":
310
- cluster_list.append(ck)
311
- if not cluster_list:
312
- cluster_list = ["你没有聚类模型"]
313
-
314
- #read GPU info
315
- ngpu=torch.cuda.device_count()
316
- gpu_infos=[]
317
- if(torch.cuda.is_available()==False or ngpu==0):if_gpu_ok=False
318
- else:
319
- if_gpu_ok = False
320
- for i in range(ngpu):
321
- gpu_name=torch.cuda.get_device_name(i)
322
- if("16"in gpu_name or "MX"in gpu_name):continue
323
- if("10"in gpu_name or "20"in gpu_name or "30"in gpu_name or "40"in gpu_name or "A50"in gpu_name.upper() or "70"in gpu_name or "80"in gpu_name or "90"in gpu_name or "M4"in gpu_name or "T4"in gpu_name or "TITAN"in gpu_name.upper()):#A10#A100#V100#A40#P40#M40#K80
324
- if_gpu_ok=True#至少有一张能用的N卡
325
- gpu_infos.append("%s\t%s"%(i,gpu_name))
326
- gpu_info="\n".join(gpu_infos)if if_gpu_ok==True and len(gpu_infos)>0 else "很遗憾您这没有能用的显卡来支持您训练"
327
- gpus="-".join([i[0]for i in gpu_infos])
328
-
329
- #get previous saved training work
330
- work_saved_list = []
331
- for entry in os.listdir("models_backup"):
332
- entry_path = os.path.join(models_backup_path, entry)
333
- if os.path.isdir(entry_path):
334
- work_saved_list.append(entry)
335
 
336
  app = gr.Blocks()
337
  with app:
338
- gr.Markdown(value="""
339
- ###sovits4.0 webui 推理&训练
340
-
341
- 修改自原项目及bilibili@麦哲云
342
-
343
- 仅供个人娱乐和非商业用途,禁止用于血腥、暴力、性相关、政治相关内容
344
-
345
- 作者:bilibili@羽毛布団
346
-
347
- """)
348
  with gr.Tabs():
349
- with gr.TabItem("推理"):
350
- choice_ckpt = gr.Dropdown(label="模型选择", choices=ckpt_list, value="no_model")
351
- config_choice = gr.Dropdown(label="配置文件", choices=os.listdir("configs"), value="no_config")
352
- cluster_choice = gr.Dropdown(label="选择聚类模型", choices=cluster_list, value="no_clu")
353
- enhance = gr.Checkbox(label="是否使用NSF_HIFIGAN增强,该选项对部分训练集少的模型有一定的音质增强效果,但是对训练好的模型有反面效果,默认关闭", value=False)
354
- refresh = gr.Button("刷新选项")
355
- loadckpt = gr.Button("加载模型", variant="primary")
356
-
357
- sid = gr.Dropdown(label="音色", value="speaker0")
358
- model_message = gr.Textbox(label="Output Message")
359
-
360
- refresh.click(load_options,[],[choice_ckpt,config_choice,cluster_choice])
361
- loadckpt.click(load_model_func,[choice_ckpt,cluster_choice,config_choice,enhance],[model_message, sid])
362
-
363
  gr.Markdown(value="""
364
- 请稍等片刻,模型加载大约需要10秒。后续操作不需要重新加载模型
 
 
365
  """)
366
-
367
- vc_input3 = gr.Audio(label="上传音频")
 
368
  vc_transform = gr.Number(label="变调(整数,可以正负,半音数量,升高八度就是12)", value=0)
369
  cluster_ratio = gr.Number(label="聚类模型混合比例,0-1之间,默认为0不启用聚类,能提升音色相似度,但会导致咬字下降(如果使用建议0.5左右)", value=0)
370
  auto_f0 = gr.Checkbox(label="自动f0预测,配合聚类模型f0预测效果更好,会导致变调功能失效(仅限转换语音,歌声不要勾选此项会究极跑调)", value=False)
371
- F0_mean_pooling = gr.Checkbox(label="F0均值滤波(池化),开启后可能有效改善哑音(对因和声混响造成的哑音无效)。", value=False)
372
- enhancer_adaptive_key = gr.Number(label="使NSF-HIFIGAN增强器适应更高的音域(单位为半音数)|默认为0", value=0,interactive=True)
373
- slice_db = gr.Number(label="切片阈值", value=-40)
374
  noise_scale = gr.Number(label="noise_scale 建议不要动,会影响音质,玄学参数", value=0.4)
375
- cl_num = gr.Number(label="音频自动切片,0为不切片,单位为秒/s", value=0)
376
- pad_seconds = gr.Number(label="推理音频pad秒数,由于未知原因开头结尾会有异响,pad一小段静音段后就不会出现", value=0.5)
377
- lg_num = gr.Number(label="两端音频切片的交叉淡入长度,如果自动切片后出现人声不连贯可调整该数值,如果连贯建议采用默认值0,注意,该设置会影响推理速度,单位为秒/s", value=0)
378
- lgr_num = gr.Number(label="自动音频切片后,需要舍弃每段切片的头尾。该参数设置交叉长度保留的比例,范围0-1,左开右闭", value=0.75,interactive=True)
379
-
380
  vc_submit = gr.Button("转换", variant="primary")
381
  vc_output1 = gr.Textbox(label="Output Message")
382
  vc_output2 = gr.Audio(label="Output Audio")
 
383
 
384
- vc_submit.click(vc_fn, [sid, vc_input3, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,F0_mean_pooling,enhancer_adaptive_key], [vc_output1, vc_output2])
385
-
386
- with gr.TabItem("训练"):
387
- gr.Markdown(value="""请将数据集文件夹放置在dataset_raw文件夹下,确认放置正确后点击下方获取数据集名称""")
388
- raw_dirs_list=gr.Textbox(label="Raw dataset directory(s):")
389
- get_raw_dirs=gr.Button("识别数据集", variant="primary")
390
- gr.Markdown(value="""确认数据集正确识别后请点击数据预处理(大数据集可能会花上很长时间预处理,没报错等着就行)""")
391
- #with gr.Row():
392
- raw_preprocess=gr.Button("数据预处理", variant="primary")
393
- preprocess_output=gr.Textbox(label="预处理输出信息,完成后请检查一下是否有报错信息,如无则可以进行下一步", max_lines=999)
394
- clear_preprocess_output=gr.Button("清空输出信息")
395
- with gr.Group():
396
- gr.Markdown(value="""填写训练设置和超参数""")
397
- with gr.Row():
398
- gr.Textbox(label="当前使用显卡信息", value=gpu_info)
399
- gpu_selection=gr.Textbox(label="多卡用户请指定希望训练使用的显卡ID(0,1,2...)", value=gpus, interactive=True)
400
- with gr.Row():
401
- log_interval=gr.Textbox(label="每隔多少步(steps)生成一次评估日志", value="200")
402
- eval_interval=gr.Textbox(label="每隔多少步(steps)验证并保存一次模型", value="800")
403
- keep_ckpts=gr.Textbox(label="仅保留最新的X个模型,超出该数字的旧模型会被删除。设置为0则永不删除", value="10")
404
- with gr.Row():
405
- batch_size=gr.Textbox(label="批量大小,每步取多少条数据进行训练,大batch可以加快训练但显著增加显存占用。6G显存建议设定为4", value="12")
406
- lr=gr.Textbox(label="学习率,尽量与batch size成正比(6:0.0001),无法整除的话四舍五入一下也行", value="0.0002")
407
- fp16_run=gr.Checkbox(label="是否使用半精度训练,半精度训练可能降低显存占用和训练时间,但对模型质量的影响尚未查证", value=False)
408
- all_in_mem=gr.Checkbox(label="是否加载所有数据集到内存中,硬盘IO过于低下、同时内存容量远大于数据集体积时可以启用", value=False)
409
- with gr.Row():
410
- gr.Markdown("请检查右侧的说话人列表是否和你要训练的目标说话人一致,确认无误后点击写入配置文件,然后就可以开始训练了")
411
- speakers=gr.Textbox(label="说话人列表")
412
- write_config=gr.Button("写入配置文件", variant="primary")
413
-
414
- write_config_output=gr.Textbox(label="写入配置文件输出信息")
415
-
416
- gr.Markdown(value="""**点击从头开始训练**将会自动将已有的训练进度保存到models_backup文件夹,并自动装载预训练模型。
417
- **继续上一次的训练进度**将从上一个保存模型的进度继续训练。继续训练进度无需重新预处理和写入配置文件。
418
- """)
419
- with gr.Row():
420
- with gr.Column():
421
- start_training=gr.Button("从头开始训练", variant="primary")
422
- training_output=gr.Textbox(label="训练输出信息")
423
- with gr.Column():
424
- continue_training_btn=gr.Button("继续上一次的训练进度", variant="primary")
425
- continue_training_output=gr.Textbox(label="训练输出信息")
426
- with gr.Column():
427
- kmeans_button=gr.Button("训练聚类模型", variant="primary")
428
- kmeans_output=gr.Textbox(label="训练输出信息")
429
- #previous_selection_training_btn=gr.Button("继续训练已保存的工作", variant="primary")
430
- #with gr.Row():
431
- # select_previous_work=gr.Dropdown(label="选择已保存的工作进度", choices=work_saved_list)
432
- # previous_selection_refresh_btn=gr.Button("刷新列表", variant="primary")
433
- #previous_selection_output=gr.Textbox(label="训练输出信息")
434
-
435
-
436
- get_raw_dirs.click(load_raw_dirs,[],[raw_dirs_list])
437
- raw_preprocess.click(dataset_preprocess,[],[preprocess_output, speakers])
438
- clear_preprocess_output.click(clear_output,[],[preprocess_output])
439
- write_config.click(config_fn,[log_interval, eval_interval, keep_ckpts, batch_size, lr, fp16_run, all_in_mem],[write_config_output])
440
- start_training.click(training,[gpu_selection],[training_output])
441
- continue_training_btn.click(continue_training,[gpu_selection],[continue_training_output])
442
- #previous_selection_training_btn.click(continue_selected_training,[select_previous_work],[previous_selection_output])
443
- #previous_selection_refresh_btn.click(previous_selection_refresh,[],[select_previous_work])
444
- kmeans_button.click(kmeans_training,[],[kmeans_output])
445
-
446
- app.queue(concurrency_count=1022, max_size=2044).launch(server_name="127.0.0.1",inbrowser=True,quiet=True)
 
1
  import io
2
  import os
3
+
4
+ os.system("wget -P hubert/ https://huggingface.co/spaces/MarcusSu1216/XingTong/blob/main/hubert/checkpoint_best_legacy_500.pt")
 
5
  import gradio as gr
 
6
  import librosa
7
  import numpy as np
8
+ import soundfile
9
  from inference.infer_tool import Svc
10
  import logging
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  logging.getLogger('numba').setLevel(logging.WARNING)
13
  logging.getLogger('markdown_it').setLevel(logging.WARNING)
14
  logging.getLogger('urllib3').setLevel(logging.WARNING)
15
  logging.getLogger('matplotlib').setLevel(logging.WARNING)
16
 
17
+ model = Svc("logs/44k/G_32000.pth", "configs/config.json", cluster_model_path="logs/44k/kmeans_10000.pt")
18
+
19
+
20
+
21
+ def vc_fn(sid, input_audio, vc_transform, auto_f0,cluster_ratio, noise_scale):
22
+ if input_audio is None:
23
+ return "You need to upload an audio", None
24
+ sampling_rate, audio = input_audio
25
+ # print(audio.shape,sampling_rate)
26
+ duration = audio.shape[0] / sampling_rate
27
+ if duration > 45:
28
+ return "请上传小于45s的音频,需要转换长音频请本地进行转换", None
29
+ audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
30
+ if len(audio.shape) > 1:
31
+ audio = librosa.to_mono(audio.transpose(1, 0))
32
+ if sampling_rate != 16000:
33
+ audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
34
+ print(audio.shape)
35
+ out_wav_path = "temp.wav"
36
+ soundfile.write(out_wav_path, audio, 16000, format="wav")
37
+ print( cluster_ratio, auto_f0, noise_scale)
38
+ out_audio, out_sr = model.infer(sid, vc_transform, out_wav_path,
39
+ cluster_infer_ratio=cluster_ratio,
40
+ auto_predict_f0=auto_f0,
41
+ noice_scale=noise_scale
42
+ )
43
+ return "Success", (44100, out_audio.numpy())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  app = gr.Blocks()
47
  with app:
 
 
 
 
 
 
 
 
 
 
48
  with gr.Tabs():
49
+ with gr.TabItem("Basic"):
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  gr.Markdown(value="""
51
+ SoVITS 4.0 在线 demo,基于 https://github.com/innnky/so-vits-svc/tree/4.0
52
+
53
+ 此 demo 为预训练底模在线 demo,使用数据:星瞳
54
  """)
55
+ spks = list(model.spk2id.keys())
56
+ sid = gr.Dropdown(label="音色", choices=["mqnl-covers-v3"], value="mqnl-covers-v3")
57
+ vc_input3 = gr.Audio(label="上传音频(长度小于45秒)")
58
  vc_transform = gr.Number(label="变调(整数,可以正负,半音数量,升高八度就是12)", value=0)
59
  cluster_ratio = gr.Number(label="聚类模型混合比例,0-1之间,默认为0不启用聚类,能提升音色相似度,但会导致咬字下降(如果使用建议0.5左右)", value=0)
60
  auto_f0 = gr.Checkbox(label="自动f0预测,配合聚类模型f0预测效果更好,会导致变调功能失效(仅限转换语音,歌声不要勾选此项会究极跑调)", value=False)
 
 
 
61
  noise_scale = gr.Number(label="noise_scale 建议不要动,会影响音质,玄学参数", value=0.4)
 
 
 
 
 
62
  vc_submit = gr.Button("转换", variant="primary")
63
  vc_output1 = gr.Textbox(label="Output Message")
64
  vc_output2 = gr.Audio(label="Output Audio")
65
+ vc_submit.click(vc_fn, [sid, vc_input3, vc_transform,auto_f0,cluster_ratio, noise_scale], [vc_output1, vc_output2])
66
 
67
+ app.launch()