JacobLinCool commited on
Commit
38548f2
1 Parent(s): 3a010aa

feat: better ui

Browse files
Files changed (9) hide show
  1. app.py +30 -436
  2. app/__init__.py +0 -0
  3. app/export.py +79 -0
  4. app/extract.py +64 -0
  5. app/infer.py +110 -0
  6. app/setup.py +110 -0
  7. app/train.py +191 -0
  8. configs/config.py +1 -1
  9. infer/modules/vc/modules.py +4 -4
app.py CHANGED
@@ -1,459 +1,53 @@
1
- from typing import Tuple
2
  from prelude import prelude
3
 
4
  prelude()
5
 
6
- import os
7
- import traceback
8
- import numpy as np
9
- from sklearn.cluster import MiniBatchKMeans
10
- from random import shuffle
11
  import gradio as gr
12
- import zipfile
13
- import tempfile
14
- import shutil
15
- import faiss
16
- from glob import glob
17
- from infer.modules.train.preprocess import PreProcess
18
- from infer.modules.train.extract.extract_f0_rmvpe import FeatureInput
19
- from infer.modules.train.extract_feature_print import HubertFeatureExtractor
20
- from infer.modules.train.train import train
21
- from infer.lib.train.process_ckpt import extract_small_model
22
- from infer.modules.vc.modules import VC
23
- from configs.config import Config
24
- import demucs.separate
25
- import soundfile as sf
26
- from zero import zero
27
- from model import device
28
 
29
 
30
- def extract_audio_files(zip_file: str, target_dir: str) -> list[str]:
31
- with zipfile.ZipFile(zip_file, "r") as zip_ref:
32
- zip_ref.extractall(target_dir)
33
-
34
- audio_files = [
35
- os.path.join(target_dir, f)
36
- for f in os.listdir(target_dir)
37
- if f.endswith((".wav", ".mp3", ".ogg"))
38
- ]
39
- if not audio_files:
40
- raise gr.Error("No audio files found at the top level of the zip file")
41
-
42
- return audio_files
43
-
44
-
45
- def preprocess(zip_file: str) -> str:
46
- temp_dir = tempfile.mkdtemp()
47
- print(f"Using exp dir: {temp_dir}")
48
-
49
- data_dir = os.path.join(temp_dir, "_data")
50
- os.makedirs(data_dir)
51
- audio_files = extract_audio_files(zip_file, data_dir)
52
-
53
- pp = PreProcess(40000, temp_dir, 3.0, False)
54
- pp.pipeline_mp_inp_dir(data_dir, 4)
55
-
56
- pp.logfile.seek(0)
57
- log = pp.logfile.read()
58
-
59
- return temp_dir, f"Preprocessed {len(audio_files)} audio files.\n{log}"
60
-
61
-
62
- @zero(duration=300)
63
- def extract_features(exp_dir: str) -> str:
64
- err = None
65
- fi = FeatureInput(exp_dir)
66
- try:
67
- fi.run()
68
- except Exception as e:
69
- err = e
70
-
71
- fi.logfile.seek(0)
72
- log = fi.logfile.read()
73
-
74
- if err:
75
- log = f"Error: {err}\n{log}"
76
- return log
77
-
78
- hfe = HubertFeatureExtractor(exp_dir)
79
- try:
80
- hfe.run()
81
- except Exception as e:
82
- err = e
83
-
84
- hfe.logfile.seek(0)
85
- log += hfe.logfile.read()
86
-
87
- if err:
88
- log = f"Error: {err}\n{log}"
89
-
90
- return log
91
-
92
-
93
- def write_filelist(exp_dir: str) -> None:
94
- if_f0_3 = True
95
- spk_id5 = 0
96
- gt_wavs_dir = "%s/0_gt_wavs" % (exp_dir)
97
- feature_dir = "%s/3_feature768" % (exp_dir)
98
-
99
- if if_f0_3:
100
- f0_dir = "%s/2a_f0" % (exp_dir)
101
- f0nsf_dir = "%s/2b-f0nsf" % (exp_dir)
102
- names = (
103
- set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)])
104
- & set([name.split(".")[0] for name in os.listdir(feature_dir)])
105
- & set([name.split(".")[0] for name in os.listdir(f0_dir)])
106
- & set([name.split(".")[0] for name in os.listdir(f0nsf_dir)])
107
- )
108
- else:
109
- names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set(
110
- [name.split(".")[0] for name in os.listdir(feature_dir)]
111
- )
112
- opt = []
113
- for name in names:
114
- if if_f0_3:
115
- opt.append(
116
- "%s/%s.wav|%s/%s.npy|%s/%s.wav.npy|%s/%s.wav.npy|%s"
117
- % (
118
- gt_wavs_dir.replace("\\", "\\\\"),
119
- name,
120
- feature_dir.replace("\\", "\\\\"),
121
- name,
122
- f0_dir.replace("\\", "\\\\"),
123
- name,
124
- f0nsf_dir.replace("\\", "\\\\"),
125
- name,
126
- spk_id5,
127
- )
128
- )
129
- else:
130
- opt.append(
131
- "%s/%s.wav|%s/%s.npy|%s"
132
- % (
133
- gt_wavs_dir.replace("\\", "\\\\"),
134
- name,
135
- feature_dir.replace("\\", "\\\\"),
136
- name,
137
- spk_id5,
138
- )
139
- )
140
- fea_dim = 768
141
-
142
- now_dir = os.getcwd()
143
- sr2 = "40k"
144
- if if_f0_3:
145
- for _ in range(2):
146
- opt.append(
147
- "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s"
148
- % (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5)
149
- )
150
- else:
151
- for _ in range(2):
152
- opt.append(
153
- "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s"
154
- % (now_dir, sr2, now_dir, fea_dim, spk_id5)
155
- )
156
- shuffle(opt)
157
- with open("%s/filelist.txt" % exp_dir, "w") as f:
158
- f.write("\n".join(opt))
159
-
160
-
161
- @zero(duration=300)
162
- def train_model(exp_dir: str) -> str:
163
- shutil.copy("config.json", exp_dir)
164
- write_filelist(exp_dir)
165
- train(exp_dir)
166
-
167
- models = glob(f"{exp_dir}/G_*.pth")
168
- print(models)
169
- if not models:
170
- raise gr.Error("No model found")
171
-
172
- latest_model = max(models, key=os.path.getctime)
173
- return latest_model
174
-
175
-
176
- def download_weight(exp_dir: str) -> str:
177
- models = glob(f"{exp_dir}/G_*.pth")
178
- if not models:
179
- raise gr.Error("No model found")
180
-
181
- latest_model = max(models, key=os.path.getctime)
182
- print(f"Latest model: {latest_model}")
183
-
184
- name = os.path.basename(exp_dir)
185
- out = os.path.join(exp_dir, f"{name}.pth")
186
- extract_small_model(
187
- latest_model, out, "40k", True, "Model trained by ZeroGPU.", "v2"
188
- )
189
-
190
- return out
191
-
192
-
193
- def train_index(exp_dir: str) -> str:
194
- feature_dir = "%s/3_feature768" % (exp_dir)
195
- if not os.path.exists(feature_dir):
196
- raise gr.Error("Please extract features first.")
197
- listdir_res = list(os.listdir(feature_dir))
198
- if len(listdir_res) == 0:
199
- raise gr.Error("Please extract features first.")
200
- npys = []
201
- for name in sorted(listdir_res):
202
- phone = np.load("%s/%s" % (feature_dir, name))
203
- npys.append(phone)
204
- big_npy = np.concatenate(npys, 0)
205
- big_npy_idx = np.arange(big_npy.shape[0])
206
- np.random.shuffle(big_npy_idx)
207
- big_npy = big_npy[big_npy_idx]
208
- if big_npy.shape[0] > 2e5:
209
- print("Trying doing kmeans %s shape to 10k centers." % big_npy.shape[0])
210
- try:
211
- big_npy = (
212
- MiniBatchKMeans(
213
- n_clusters=10000,
214
- verbose=True,
215
- batch_size=256 * 8,
216
- compute_labels=False,
217
- init="random",
218
- )
219
- .fit(big_npy)
220
- .cluster_centers_
221
- )
222
- except:
223
- info = traceback.format_exc()
224
- print(info)
225
- raise gr.Error(info)
226
-
227
- np.save("%s/total_fea.npy" % exp_dir, big_npy)
228
- n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39)
229
- print("%s,%s" % (big_npy.shape, n_ivf))
230
- index = faiss.index_factory(768, "IVF%s,Flat" % n_ivf)
231
- # index = faiss.index_factory(256if version19=="v1"else 768, "IVF%s,PQ128x4fs,RFlat"%n_ivf)
232
- print("training")
233
- index_ivf = faiss.extract_index_ivf(index) #
234
- index_ivf.nprobe = 1
235
- index.train(big_npy)
236
- faiss.write_index(
237
- index,
238
- "%s/trained_IVF%s_Flat_nprobe_%s.index" % (exp_dir, n_ivf, index_ivf.nprobe),
239
- )
240
- print("adding")
241
- batch_size_add = 8192
242
- for i in range(0, big_npy.shape[0], batch_size_add):
243
- index.add(big_npy[i : i + batch_size_add])
244
- faiss.write_index(
245
- index,
246
- "%s/added_IVF%s_Flat_nprobe_%s.index" % (exp_dir, n_ivf, index_ivf.nprobe),
247
- )
248
- print("built added_IVF%s_Flat_nprobe_%s.index" % (n_ivf, index_ivf.nprobe))
249
-
250
- return "%s/added_IVF%s_Flat_nprobe_%s.index" % (exp_dir, n_ivf, index_ivf.nprobe)
251
-
252
-
253
- def download_expdir(exp_dir: str) -> str:
254
- shutil.make_archive(exp_dir, "zip", exp_dir)
255
- return f"{exp_dir}.zip"
256
-
257
-
258
- def restore_expdir(zip: str) -> str:
259
- exp_dir = tempfile.mkdtemp()
260
- shutil.unpack_archive(zip, exp_dir)
261
- return exp_dir
262
-
263
-
264
- @zero(duration=120)
265
- def infer(exp_dir: str, original_audio: str, f0add: int) -> Tuple[int, np.ndarray]:
266
- name = os.path.basename(exp_dir)
267
- model = os.path.join(exp_dir, f"{name}.pth")
268
- if not os.path.exists(model):
269
- raise gr.Error("Model not found")
270
-
271
- index = glob(f"{exp_dir}/added_*.index")
272
- if not index:
273
- raise gr.Error("Index not found")
274
-
275
- base = os.path.basename(original_audio)
276
- base = os.path.splitext(base)[0]
277
- demucs.separate.main(
278
- ["--two-stems", "vocals", "-d", str(device), "-n", "htdemucs", original_audio]
279
- )
280
- out = os.path.join("separated", "htdemucs", base, "vocals.wav")
281
-
282
- cfg = Config()
283
- vc = VC(cfg)
284
- vc.get_vc(model)
285
- _, wav_opt = vc.vc_single(
286
- 0,
287
- out,
288
- f0add,
289
- None,
290
- "rmvpe",
291
- index,
292
- None,
293
- 0.5,
294
- 3,
295
- 0,
296
- 1,
297
- 0.33,
298
- )
299
-
300
- sr = wav_opt[0]
301
- data = wav_opt[1]
302
-
303
- return sr, data
304
-
305
-
306
- def merge(exp_dir: str, original_audio: str, vocal: Tuple[int, np.ndarray]) -> str:
307
- base = os.path.basename(original_audio)
308
- base = os.path.splitext(base)[0]
309
- music = os.path.join("separated", "htdemucs", base, "no-vocals.wav")
310
-
311
- tmp = os.path.join(exp_dir, "tmp.wav")
312
- sf.write(tmp, vocal[1], vocal[0])
313
-
314
- os.system(
315
- f"ffmpeg -i {music} -i {tmp} -filter_complex '[1]volume=2[a];[0][a]amix=inputs=2:duration=first:dropout_transition=2' {tmp}.merged.mp3"
316
  )
317
 
318
- return f"{tmp}.merged.mp3"
319
-
320
-
321
- with gr.Blocks() as app:
322
- # allow user to manually select the experiment directory
323
  exp_dir = gr.Textbox(
324
- label="Experiment directory (don't touch it unless you know what you are doing)",
325
  visible=True,
326
- interactive=True,
327
  )
328
 
329
- with gr.Tabs():
330
- with gr.Tab(label="New / Restore"):
331
- with gr.Row():
332
- with gr.Column():
333
- zip_file = gr.File(
334
- label="Upload a zip file containing audio files for training",
335
- file_types=["zip"],
336
- )
337
- preprocess_output = gr.Textbox(
338
- label="Preprocessing output", lines=5
339
- )
340
-
341
- preprocess_btn = gr.Button(
342
- value="Start New Experiment", variant="primary"
343
- )
344
 
345
- with gr.Row():
346
- restore_zip_file = gr.File(
347
- label="Upload the experiment directory zip file",
348
- file_types=["zip"],
349
- )
350
- restore_btn = gr.Button(value="Restore Experiment", variant="primary")
351
 
352
- with gr.Tab(label="Extract features"):
353
- with gr.Row():
354
- extract_features_btn = gr.Button(
355
- value="Extract features", variant="primary"
356
- )
357
- with gr.Row():
358
- extract_features_output = gr.Textbox(
359
- label="Feature extraction output", lines=10
360
- )
361
 
362
- with gr.Tab(label="Train"):
363
- with gr.Row():
364
- train_btn = gr.Button(value="Train", variant="primary")
365
- latest_model = gr.File(label="Latest checkpoint")
366
- with gr.Row():
367
- train_index_btn = gr.Button(value="Train index", variant="primary")
368
- trained_index = gr.File(label="Trained index")
369
 
370
  with gr.Tab(label="Download"):
371
- with gr.Row():
372
- download_weight_btn = gr.Button(
373
- value="Download latest model", variant="primary"
374
- )
375
- download_weight_output = gr.File(label="Download latest model")
376
-
377
- with gr.Row():
378
- download_expdir_btn = gr.Button(
379
- value="Download experiment directory", variant="primary"
380
- )
381
- download_expdir_output = gr.File(label="Download experiment directory")
382
 
383
  with gr.Tab(label="Inference"):
384
- with gr.Row():
385
- original_audio = gr.Audio(
386
- label="Upload original audio",
387
- type="filepath",
388
- show_download_button=True,
389
- )
390
- f0add = gr.Slider(
391
- label="F0 add",
392
- minimum=-16,
393
- maximum=16,
394
- step=1,
395
- value=0,
396
- )
397
- infer_btn = gr.Button(value="Infer", variant="primary")
398
- with gr.Row():
399
- infer_output = gr.Audio(label="Inferred audio")
400
- with gr.Row():
401
- merge_output = gr.Audio(label="Merged audio")
402
-
403
- preprocess_btn.click(
404
- fn=preprocess,
405
- inputs=[zip_file],
406
- outputs=[exp_dir, preprocess_output],
407
- )
408
-
409
- extract_features_btn.click(
410
- fn=extract_features,
411
- inputs=[exp_dir],
412
- outputs=[extract_features_output],
413
- )
414
-
415
- train_btn.click(
416
- fn=train_model,
417
- inputs=[exp_dir],
418
- outputs=[latest_model],
419
- ).success(
420
- fn=train_model,
421
- inputs=[exp_dir],
422
- outputs=[latest_model],
423
- )
424
 
425
- train_index_btn.click(
426
- fn=train_index,
427
- inputs=[exp_dir],
428
- outputs=[trained_index],
429
- )
430
-
431
- download_weight_btn.click(
432
- fn=download_weight,
433
- inputs=[exp_dir],
434
- outputs=[download_weight_output],
435
- )
436
-
437
- download_expdir_btn.click(
438
- fn=download_expdir,
439
- inputs=[exp_dir],
440
- outputs=[download_expdir_output],
441
- )
442
-
443
- restore_btn.click(
444
- fn=restore_expdir,
445
- inputs=[restore_zip_file],
446
- outputs=[exp_dir],
447
- )
448
-
449
- infer_btn.click(
450
- fn=infer,
451
- inputs=[exp_dir, original_audio, f0add],
452
- outputs=[infer_output],
453
- ).success(
454
- fn=merge,
455
- inputs=[exp_dir, original_audio, infer_output],
456
- outputs=[merge_output],
457
- )
458
 
459
  app.launch()
 
 
1
  from prelude import prelude
2
 
3
  prelude()
4
 
 
 
 
 
 
5
  import gradio as gr
6
+ from app.setup import SetupTab
7
+ from app.extract import FeatureExtractionTab
8
+ from app.train import TrainTab
9
+ from app.export import ExportTab
10
+ from app.infer import InferenceTab
 
 
 
 
 
 
 
 
 
 
 
11
 
12
 
13
+ with gr.Blocks() as app:
14
+ gr.Markdown("# ZeroRVC")
15
+ gr.Markdown(
16
+ "Run Retrieval-based Voice Conversion training and inference on HuggingFace ZeroGPU."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  )
18
 
 
 
 
 
 
19
  exp_dir = gr.Textbox(
20
+ label="Experiment directory",
21
  visible=True,
22
+ interactive=False,
23
  )
24
 
25
+ setup = SetupTab()
26
+ feature_extraction = FeatureExtractionTab()
27
+ training = TrainTab()
28
+ export = ExportTab()
29
+ inferencing = InferenceTab()
 
 
 
 
 
 
 
 
 
 
30
 
31
+ with gr.Tabs():
32
+ with gr.Tab(label="Setup"):
33
+ setup.ui()
 
 
 
34
 
35
+ with gr.Tab(label="Feature Extraction"):
36
+ feature_extraction.ui()
 
 
 
 
 
 
 
37
 
38
+ with gr.Tab(label="Training"):
39
+ training.ui()
 
 
 
 
 
40
 
41
  with gr.Tab(label="Download"):
42
+ export.ui()
 
 
 
 
 
 
 
 
 
 
43
 
44
  with gr.Tab(label="Inference"):
45
+ inferencing.ui()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
+ setup.build(exp_dir)
48
+ feature_extraction.build(exp_dir)
49
+ training.build(exp_dir)
50
+ export.build(exp_dir)
51
+ inferencing.build(exp_dir)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  app.launch()
app/__init__.py ADDED
File without changes
app/export.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from glob import glob
2
+ import os
3
+ import shutil
4
+ import gradio as gr
5
+ from infer.lib.train.process_ckpt import extract_small_model
6
+
7
+
8
+ def download_weight(exp_dir: str) -> str:
9
+ models = glob(f"{exp_dir}/G_*.pth")
10
+ if not models:
11
+ raise gr.Error("No model found")
12
+
13
+ latest_model = max(models, key=os.path.getctime)
14
+ print(f"Latest model: {latest_model}")
15
+
16
+ name = os.path.basename(exp_dir)
17
+ out = os.path.join(exp_dir, f"{name}.pth")
18
+ extract_small_model(
19
+ latest_model, out, "40k", True, "Model trained by ZeroGPU.", "v2"
20
+ )
21
+
22
+ return out
23
+
24
+
25
+ def download_expdir(exp_dir: str) -> str:
26
+ shutil.make_archive(exp_dir, "zip", exp_dir)
27
+ return f"{exp_dir}.zip"
28
+
29
+
30
+ def remove_expdir(exp_dir: str) -> str:
31
+ shutil.rmtree(exp_dir)
32
+ return ""
33
+
34
+
35
+ class ExportTab:
36
+ def __init__(self):
37
+ pass
38
+
39
+ def ui(self):
40
+ gr.Markdown("# Download Model or Experiment Directory")
41
+ gr.Markdown(
42
+ "You can download the latest model or the entire experiment directory here."
43
+ )
44
+
45
+ with gr.Row():
46
+ self.download_weight_btn = gr.Button(
47
+ value="Latest model (for inferencing)", variant="primary"
48
+ )
49
+ self.download_weight_output = gr.File(label="Prune latest model")
50
+
51
+ with gr.Row():
52
+ self.download_expdir_btn = gr.Button(
53
+ value="Download experiment directory", variant="primary"
54
+ )
55
+ self.download_expdir_output = gr.File(label="Archive experiment directory")
56
+
57
+ with gr.Row():
58
+ self.remove_expdir_btn = gr.Button(
59
+ value="REMOVE experiment directory", variant="stop"
60
+ )
61
+
62
+ def build(self, exp_dir: gr.Textbox):
63
+ self.download_weight_btn.click(
64
+ fn=download_weight,
65
+ inputs=[exp_dir],
66
+ outputs=[self.download_weight_output],
67
+ )
68
+
69
+ self.download_expdir_btn.click(
70
+ fn=download_expdir,
71
+ inputs=[exp_dir],
72
+ outputs=[self.download_expdir_output],
73
+ )
74
+
75
+ self.remove_expdir_btn.click(
76
+ fn=remove_expdir,
77
+ inputs=[exp_dir],
78
+ outputs=[exp_dir],
79
+ )
app/extract.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from infer.modules.train.extract.extract_f0_rmvpe import FeatureInput
3
+ from infer.modules.train.extract_feature_print import HubertFeatureExtractor
4
+ from zero import zero
5
+
6
+
7
+ @zero(duration=300)
8
+ def extract_features(exp_dir: str) -> str:
9
+ err = None
10
+ fi = FeatureInput(exp_dir)
11
+ try:
12
+ fi.run()
13
+ except Exception as e:
14
+ err = e
15
+
16
+ fi.logfile.seek(0)
17
+ log = fi.logfile.read()
18
+
19
+ if err:
20
+ log = f"Error: {err}\n{log}"
21
+ return log
22
+
23
+ hfe = HubertFeatureExtractor(exp_dir)
24
+ try:
25
+ hfe.run()
26
+ except Exception as e:
27
+ err = e
28
+
29
+ hfe.logfile.seek(0)
30
+ log += hfe.logfile.read()
31
+
32
+ if err:
33
+ log = f"Error: {err}\n{log}"
34
+
35
+ return log
36
+
37
+
38
+ class FeatureExtractionTab:
39
+ def __init__(self):
40
+ pass
41
+
42
+ def ui(self):
43
+ gr.Markdown("# Feature Extraction")
44
+ gr.Markdown(
45
+ "Before training, you need to extract features from the audio files. "
46
+ "This process may take a while, depending on the number of audio files. "
47
+ "Under the hood, this process extracts speech features using HuBERT and extracts F0 by RMVPE."
48
+ )
49
+
50
+ with gr.Row():
51
+ self.extract_features_btn = gr.Button(
52
+ value="Extract features", variant="primary"
53
+ )
54
+ with gr.Row():
55
+ self.extract_features_log = gr.Textbox(
56
+ label="Feature extraction log", lines=10
57
+ )
58
+
59
+ def build(self, exp_dir: gr.Textbox):
60
+ self.extract_features_btn.click(
61
+ fn=extract_features,
62
+ inputs=[exp_dir],
63
+ outputs=[self.extract_features_log],
64
+ )
app/infer.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from glob import glob
2
+ import os
3
+ from typing import Tuple
4
+ import demucs
5
+ import gradio as gr
6
+ import numpy as np
7
+ import soundfile as sf
8
+ from configs.config import Config
9
+ from infer.modules.vc.modules import VC
10
+ from zero import zero
11
+ from model import device
12
+
13
+
14
+ @zero(duration=120)
15
+ def infer(exp_dir: str, original_audio: str, f0add: int) -> Tuple[int, np.ndarray]:
16
+ name = os.path.basename(exp_dir)
17
+ model = os.path.join(exp_dir, f"{name}.pth")
18
+ if not os.path.exists(model):
19
+ raise gr.Error("Model not found")
20
+
21
+ index = glob(f"{exp_dir}/added_*.index")
22
+ if not index:
23
+ raise gr.Error("Index not found")
24
+
25
+ base = os.path.basename(original_audio)
26
+ base = os.path.splitext(base)[0]
27
+ demucs.separate.main(
28
+ ["--two-stems", "vocals", "-d", str(device), "-n", "htdemucs", original_audio]
29
+ )
30
+ out = os.path.join("separated", "htdemucs", base, "vocals.wav")
31
+
32
+ cfg = Config()
33
+ vc = VC(cfg)
34
+ vc.get_vc(model)
35
+ _, wav_opt = vc.vc_single(
36
+ 0,
37
+ out,
38
+ f0add,
39
+ None,
40
+ "rmvpe",
41
+ index,
42
+ None,
43
+ 0.5,
44
+ 3,
45
+ 0,
46
+ 1,
47
+ 0.33,
48
+ )
49
+
50
+ sr = wav_opt[0]
51
+ data = wav_opt[1]
52
+
53
+ return sr, data
54
+
55
+
56
+ def merge(exp_dir: str, original_audio: str, vocal: Tuple[int, np.ndarray]) -> str:
57
+ base = os.path.basename(original_audio)
58
+ base = os.path.splitext(base)[0]
59
+ music = os.path.join("separated", "htdemucs", base, "no-vocals.wav")
60
+
61
+ tmp = os.path.join(exp_dir, "tmp.wav")
62
+ sf.write(tmp, vocal[1], vocal[0])
63
+
64
+ os.system(
65
+ f"ffmpeg -i {music} -i {tmp} -filter_complex '[1]volume=2[a];[0][a]amix=inputs=2:duration=first:dropout_transition=2' {tmp}.merged.mp3"
66
+ )
67
+
68
+ return f"{tmp}.merged.mp3"
69
+
70
+
71
+ class InferenceTab:
72
+ def __init__(self):
73
+ pass
74
+
75
+ def ui(self):
76
+ gr.Markdown("# Inference")
77
+ gr.Markdown(
78
+ "After trained model is pruned, you can use it to infer on new music. \n"
79
+ "Upload the original audio and adjust the F0 add value to generate the inferred audio."
80
+ )
81
+
82
+ with gr.Row():
83
+ self.original_audio = gr.Audio(
84
+ label="Upload original audio",
85
+ type="filepath",
86
+ show_download_button=True,
87
+ )
88
+ self.f0add = gr.Slider(
89
+ label="F0 add",
90
+ minimum=-16,
91
+ maximum=16,
92
+ step=1,
93
+ value=0,
94
+ )
95
+ self.infer_btn = gr.Button(value="Infer", variant="primary")
96
+ with gr.Row():
97
+ self.infer_output = gr.Audio(label="Inferred audio")
98
+ with gr.Row():
99
+ self.merge_output = gr.Audio(label="Merged audio")
100
+
101
+ def build(self, exp_dir: gr.Textbox):
102
+ self.infer_btn.click(
103
+ fn=infer,
104
+ inputs=[exp_dir, self.original_audio, self.f0add],
105
+ outputs=[self.infer_output],
106
+ ).success(
107
+ fn=merge,
108
+ inputs=[exp_dir, self.original_audio, self.infer_output],
109
+ outputs=[self.merge_output],
110
+ )
app/setup.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import gradio as gr
4
+ import zipfile
5
+ import tempfile
6
+ from infer.modules.train.preprocess import PreProcess
7
+ from typing import Tuple
8
+
9
+
10
+ def extract_audio_files(zip_file: str, target_dir: str) -> list[str]:
11
+ with zipfile.ZipFile(zip_file, "r") as zip_ref:
12
+ zip_ref.extractall(target_dir)
13
+
14
+ audio_files = [
15
+ os.path.join(target_dir, f)
16
+ for f in os.listdir(target_dir)
17
+ if f.endswith((".wav", ".mp3", ".ogg"))
18
+ ]
19
+ if not audio_files:
20
+ raise gr.Error("No audio files found at the top level of the zip file")
21
+
22
+ return audio_files
23
+
24
+
25
+ def create_new_expdir(zip_file: str) -> Tuple[str, str]:
26
+ temp_dir = tempfile.mkdtemp()
27
+ print(f"Using exp dir: {temp_dir}")
28
+
29
+ data_dir = os.path.join(temp_dir, "_data")
30
+ os.makedirs(data_dir)
31
+ audio_files = extract_audio_files(zip_file, data_dir)
32
+
33
+ pp = PreProcess(40000, temp_dir, 3.0, False)
34
+ pp.pipeline_mp_inp_dir(data_dir, 4)
35
+
36
+ pp.logfile.seek(0)
37
+ log = pp.logfile.read()
38
+
39
+ return temp_dir, f"Preprocessed {len(audio_files)} audio files.\n{log}"
40
+
41
+
42
+ def restore_expdir(zip: str) -> str:
43
+ exp_dir = tempfile.mkdtemp()
44
+ shutil.unpack_archive(zip, exp_dir)
45
+ return exp_dir
46
+
47
+
48
+ def set_dir(dir_val: str) -> str:
49
+ if not dir_val.startswith("/tmp/"):
50
+ dir_val = os.path.join("/tmp", dir_val)
51
+ if not os.path.isdir(dir_val):
52
+ raise gr.Error("Directory does not exist")
53
+
54
+ return dir_val
55
+
56
+
57
+ class SetupTab:
58
+ def __init__(self):
59
+ pass
60
+
61
+ def ui(self):
62
+ gr.Markdown("# Setup Experiment")
63
+ gr.Markdown(
64
+ "You can upload a zip file containing audio files to start a new experiment, or upload an experiment directory zip file to restore an existing experiment."
65
+ )
66
+
67
+ with gr.Row():
68
+ with gr.Column():
69
+ self.zip_file = gr.File(
70
+ label="Upload a zip file containing audio files for training",
71
+ file_types=["zip"],
72
+ )
73
+ self.preprocess_log = gr.Textbox(label="Log", lines=5)
74
+
75
+ self.preprocess_btn = gr.Button(
76
+ value="Start New Experiment", variant="primary"
77
+ )
78
+
79
+ with gr.Row():
80
+ self.restore_zip_file = gr.File(
81
+ label="Upload the experiment directory zip file",
82
+ file_types=["zip"],
83
+ )
84
+ self.restore_btn = gr.Button(value="Restore Experiment", variant="primary")
85
+
86
+ with gr.Row():
87
+ self.dir_val = gr.Textbox(
88
+ label="Manually set the experiment directory (don't touch it unless you know what you are doing)",
89
+ placeholder="/tmp/...",
90
+ )
91
+ self.set_dir_btn = gr.Button(value="Set Directory")
92
+
93
+ def build(self, exp_dir: gr.Textbox):
94
+ self.preprocess_btn.click(
95
+ fn=create_new_expdir,
96
+ inputs=[self.zip_file],
97
+ outputs=[exp_dir, self.preprocess_log],
98
+ )
99
+
100
+ self.restore_btn.click(
101
+ fn=restore_expdir,
102
+ inputs=[self.restore_zip_file],
103
+ outputs=[exp_dir],
104
+ )
105
+
106
+ self.set_dir_btn.click(
107
+ fn=set_dir,
108
+ inputs=[self.dir_val],
109
+ outputs=[exp_dir],
110
+ )
app/train.py ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import traceback
4
+ import faiss
5
+ import gradio as gr
6
+ import numpy as np
7
+ from sklearn.cluster import MiniBatchKMeans
8
+ from random import shuffle
9
+ from glob import glob
10
+ from infer.modules.train.train import train
11
+ from zero import zero
12
+
13
+
14
+ def write_filelist(exp_dir: str) -> None:
15
+ if_f0_3 = True
16
+ spk_id5 = 0
17
+ gt_wavs_dir = "%s/0_gt_wavs" % (exp_dir)
18
+ feature_dir = "%s/3_feature768" % (exp_dir)
19
+
20
+ if if_f0_3:
21
+ f0_dir = "%s/2a_f0" % (exp_dir)
22
+ f0nsf_dir = "%s/2b-f0nsf" % (exp_dir)
23
+ names = (
24
+ set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)])
25
+ & set([name.split(".")[0] for name in os.listdir(feature_dir)])
26
+ & set([name.split(".")[0] for name in os.listdir(f0_dir)])
27
+ & set([name.split(".")[0] for name in os.listdir(f0nsf_dir)])
28
+ )
29
+ else:
30
+ names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set(
31
+ [name.split(".")[0] for name in os.listdir(feature_dir)]
32
+ )
33
+ opt = []
34
+ for name in names:
35
+ if if_f0_3:
36
+ opt.append(
37
+ "%s/%s.wav|%s/%s.npy|%s/%s.wav.npy|%s/%s.wav.npy|%s"
38
+ % (
39
+ gt_wavs_dir.replace("\\", "\\\\"),
40
+ name,
41
+ feature_dir.replace("\\", "\\\\"),
42
+ name,
43
+ f0_dir.replace("\\", "\\\\"),
44
+ name,
45
+ f0nsf_dir.replace("\\", "\\\\"),
46
+ name,
47
+ spk_id5,
48
+ )
49
+ )
50
+ else:
51
+ opt.append(
52
+ "%s/%s.wav|%s/%s.npy|%s"
53
+ % (
54
+ gt_wavs_dir.replace("\\", "\\\\"),
55
+ name,
56
+ feature_dir.replace("\\", "\\\\"),
57
+ name,
58
+ spk_id5,
59
+ )
60
+ )
61
+ fea_dim = 768
62
+
63
+ now_dir = os.getcwd()
64
+ sr2 = "40k"
65
+ if if_f0_3:
66
+ for _ in range(2):
67
+ opt.append(
68
+ "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s"
69
+ % (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5)
70
+ )
71
+ else:
72
+ for _ in range(2):
73
+ opt.append(
74
+ "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s"
75
+ % (now_dir, sr2, now_dir, fea_dim, spk_id5)
76
+ )
77
+ shuffle(opt)
78
+ with open("%s/filelist.txt" % exp_dir, "w") as f:
79
+ f.write("\n".join(opt))
80
+
81
+
82
+ @zero(duration=300)
83
+ def train_model(exp_dir: str) -> str:
84
+ shutil.copy("config.json", exp_dir)
85
+ write_filelist(exp_dir)
86
+ train(exp_dir)
87
+
88
+ models = glob(f"{exp_dir}/G_*.pth")
89
+ print(models)
90
+ if not models:
91
+ raise gr.Error("No model found")
92
+
93
+ latest_model = max(models, key=os.path.getctime)
94
+ return latest_model
95
+
96
+
97
+ def train_index(exp_dir: str) -> str:
98
+ feature_dir = "%s/3_feature768" % (exp_dir)
99
+ if not os.path.exists(feature_dir):
100
+ raise gr.Error("Please extract features first.")
101
+ listdir_res = list(os.listdir(feature_dir))
102
+ if len(listdir_res) == 0:
103
+ raise gr.Error("Please extract features first.")
104
+ npys = []
105
+ for name in sorted(listdir_res):
106
+ phone = np.load("%s/%s" % (feature_dir, name))
107
+ npys.append(phone)
108
+ big_npy = np.concatenate(npys, 0)
109
+ big_npy_idx = np.arange(big_npy.shape[0])
110
+ np.random.shuffle(big_npy_idx)
111
+ big_npy = big_npy[big_npy_idx]
112
+ if big_npy.shape[0] > 2e5:
113
+ print("Trying doing kmeans %s shape to 10k centers." % big_npy.shape[0])
114
+ try:
115
+ big_npy = (
116
+ MiniBatchKMeans(
117
+ n_clusters=10000,
118
+ verbose=True,
119
+ batch_size=256 * 8,
120
+ compute_labels=False,
121
+ init="random",
122
+ )
123
+ .fit(big_npy)
124
+ .cluster_centers_
125
+ )
126
+ except:
127
+ info = traceback.format_exc()
128
+ print(info)
129
+ raise gr.Error(info)
130
+
131
+ np.save("%s/total_fea.npy" % exp_dir, big_npy)
132
+ n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39)
133
+ print("%s,%s" % (big_npy.shape, n_ivf))
134
+ index = faiss.index_factory(768, "IVF%s,Flat" % n_ivf)
135
+ # index = faiss.index_factory(256if version19=="v1"else 768, "IVF%s,PQ128x4fs,RFlat"%n_ivf)
136
+ print("training")
137
+ index_ivf = faiss.extract_index_ivf(index) #
138
+ index_ivf.nprobe = 1
139
+ index.train(big_npy)
140
+ faiss.write_index(
141
+ index,
142
+ "%s/trained_IVF%s_Flat_nprobe_%s.index" % (exp_dir, n_ivf, index_ivf.nprobe),
143
+ )
144
+ print("adding")
145
+ batch_size_add = 8192
146
+ for i in range(0, big_npy.shape[0], batch_size_add):
147
+ index.add(big_npy[i : i + batch_size_add])
148
+ faiss.write_index(
149
+ index,
150
+ "%s/added_IVF%s_Flat_nprobe_%s.index" % (exp_dir, n_ivf, index_ivf.nprobe),
151
+ )
152
+ print("built added_IVF%s_Flat_nprobe_%s.index" % (n_ivf, index_ivf.nprobe))
153
+
154
+ return "%s/added_IVF%s_Flat_nprobe_%s.index" % (exp_dir, n_ivf, index_ivf.nprobe)
155
+
156
+
157
+ class TrainTab:
158
+ def __init__(self):
159
+ pass
160
+
161
+ def ui(self):
162
+ gr.Markdown("# Training")
163
+ gr.Markdown(
164
+ "You can start training the model by clicking the button below. "
165
+ "Each time you click the button, the model will train for 20 epochs, which takes about 10 minutes on ZeroGPU (A100). "
166
+ "Tha latest *training checkpoint* will be avaible below."
167
+ )
168
+
169
+ with gr.Row():
170
+ self.train_btn = gr.Button(value="Train", variant="primary")
171
+ self.latest_checkpoint = gr.File(label="Latest checkpoint")
172
+ with gr.Row():
173
+ self.train_index_btn = gr.Button(value="Train index", variant="primary")
174
+ self.trained_index = gr.File(label="Trained index")
175
+
176
+ def build(self, exp_dir: gr.Textbox):
177
+ self.train_btn.click(
178
+ fn=train_model,
179
+ inputs=[exp_dir],
180
+ outputs=[self.latest_checkpoint],
181
+ ).success(
182
+ fn=train_model,
183
+ inputs=[exp_dir],
184
+ outputs=[self.latest_checkpoint],
185
+ )
186
+
187
+ self.train_index_btn.click(
188
+ fn=train_index,
189
+ inputs=[exp_dir],
190
+ outputs=[self.trained_index],
191
+ )
configs/config.py CHANGED
@@ -132,7 +132,7 @@ class Config:
132
  if self.has_xpu():
133
  self.device = self.instead = "xpu:0"
134
  self.is_half = True
135
- i_device = int(self.device.split(":")[-1])
136
  self.gpu_name = torch.cuda.get_device_name(i_device)
137
  if (
138
  ("16" in self.gpu_name and "V100" not in self.gpu_name.upper())
 
132
  if self.has_xpu():
133
  self.device = self.instead = "xpu:0"
134
  self.is_half = True
135
+ i_device = int(0)
136
  self.gpu_name = torch.cuda.get_device_name(i_device)
137
  if (
138
  ("16" in self.gpu_name and "V100" not in self.gpu_name.upper())
infer/modules/vc/modules.py CHANGED
@@ -129,16 +129,16 @@ class VC:
129
 
130
  self.pipeline = Pipeline(self.tgt_sr, self.config)
131
  n_spk = self.cpt["config"][-3]
132
- index = {"value": get_index_path_from_model(sid), "__type__": "update"}
133
- logger.info("Select index: " + index["value"])
134
 
135
  return (
136
  (
137
  {"visible": True, "maximum": n_spk, "__type__": "update"},
138
  to_return_protect0,
139
  to_return_protect1,
140
- index,
141
- index,
142
  )
143
  if to_return_protect
144
  else {"visible": True, "maximum": n_spk, "__type__": "update"}
 
129
 
130
  self.pipeline = Pipeline(self.tgt_sr, self.config)
131
  n_spk = self.cpt["config"][-3]
132
+ # index = {"value": get_index_path_from_model(sid), "__type__": "update"}
133
+ # logger.info("Select index: " + index["value"])
134
 
135
  return (
136
  (
137
  {"visible": True, "maximum": n_spk, "__type__": "update"},
138
  to_return_protect0,
139
  to_return_protect1,
140
+ # index,
141
+ # index,
142
  )
143
  if to_return_protect
144
  else {"visible": True, "maximum": n_spk, "__type__": "update"}