Eddycrack864 commited on
Commit
b0294f6
·
verified ·
1 Parent(s): 7fdfd3f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +546 -545
app.py CHANGED
@@ -1,546 +1,547 @@
1
- import os
2
- import re
3
- import random
4
- from scipy.io.wavfile import write
5
- import gradio as gr
6
-
7
- roformer_models = {
8
- 'BS-Roformer-Viperx-1297.ckpt': 'model_bs_roformer_ep_317_sdr_12.9755.ckpt',
9
- 'BS-Roformer-Viperx-1296.ckpt': 'model_bs_roformer_ep_368_sdr_12.9628.ckpt',
10
- 'BS-Roformer-Viperx-1053.ckpt': 'model_bs_roformer_ep_937_sdr_10.5309.ckpt',
11
- 'Mel-Roformer-Viperx-1143.ckpt': 'model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt'
12
- }
13
-
14
- mdx23c_models = [
15
- 'MDX23C_D1581.ckpt',
16
- 'MDX23C-8KFFT-InstVoc_HQ.ckpt',
17
- 'MDX23C-8KFFT-InstVoc_HQ_2.ckpt',
18
- ]
19
-
20
- mdxnet_models = [
21
- 'UVR-MDX-NET-Inst_full_292.onnx',
22
- 'UVR-MDX-NET_Inst_187_beta.onnx',
23
- 'UVR-MDX-NET_Inst_82_beta.onnx',
24
- 'UVR-MDX-NET_Inst_90_beta.onnx',
25
- 'UVR-MDX-NET_Main_340.onnx',
26
- 'UVR-MDX-NET_Main_390.onnx',
27
- 'UVR-MDX-NET_Main_406.onnx',
28
- 'UVR-MDX-NET_Main_427.onnx',
29
- 'UVR-MDX-NET_Main_438.onnx',
30
- 'UVR-MDX-NET-Inst_HQ_1.onnx',
31
- 'UVR-MDX-NET-Inst_HQ_2.onnx',
32
- 'UVR-MDX-NET-Inst_HQ_3.onnx',
33
- 'UVR-MDX-NET-Inst_HQ_4.onnx',
34
- 'UVR_MDXNET_Main.onnx',
35
- 'UVR-MDX-NET-Inst_Main.onnx',
36
- 'UVR_MDXNET_1_9703.onnx',
37
- 'UVR_MDXNET_2_9682.onnx',
38
- 'UVR_MDXNET_3_9662.onnx',
39
- 'UVR-MDX-NET-Inst_1.onnx',
40
- 'UVR-MDX-NET-Inst_2.onnx',
41
- 'UVR-MDX-NET-Inst_3.onnx',
42
- 'UVR_MDXNET_KARA.onnx',
43
- 'UVR_MDXNET_KARA_2.onnx',
44
- 'UVR_MDXNET_9482.onnx',
45
- 'UVR-MDX-NET-Voc_FT.onnx',
46
- 'Kim_Vocal_1.onnx',
47
- 'Kim_Vocal_2.onnx',
48
- 'Kim_Inst.onnx',
49
- 'Reverb_HQ_By_FoxJoy.onnx',
50
- 'UVR-MDX-NET_Crowd_HQ_1.onnx',
51
- 'kuielab_a_vocals.onnx',
52
- 'kuielab_a_other.onnx',
53
- 'kuielab_a_bass.onnx',
54
- 'kuielab_a_drums.onnx',
55
- 'kuielab_b_vocals.onnx',
56
- 'kuielab_b_other.onnx',
57
- 'kuielab_b_bass.onnx',
58
- 'kuielab_b_drums.onnx',
59
- ]
60
-
61
- vrarch_models = [
62
- '1_HP-UVR.pth',
63
- '2_HP-UVR.pth',
64
- '3_HP-Vocal-UVR.pth',
65
- '4_HP-Vocal-UVR.pth',
66
- '5_HP-Karaoke-UVR.pth',
67
- '6_HP-Karaoke-UVR.pth',
68
- '7_HP2-UVR.pth',
69
- '8_HP2-UVR.pth',
70
- '9_HP2-UVR.pth',
71
- '10_SP-UVR-2B-32000-1.pth',
72
- '11_SP-UVR-2B-32000-2.pth',
73
- '12_SP-UVR-3B-44100.pth',
74
- '13_SP-UVR-4B-44100-1.pth',
75
- '14_SP-UVR-4B-44100-2.pth',
76
- '15_SP-UVR-MID-44100-1.pth',
77
- '16_SP-UVR-MID-44100-2.pth',
78
- '17_HP-Wind_Inst-UVR.pth',
79
- 'UVR-De-Echo-Aggressive.pth',
80
- 'UVR-De-Echo-Normal.pth',
81
- 'UVR-DeEcho-DeReverb.pth',
82
- 'UVR-DeNoise-Lite.pth',
83
- 'UVR-DeNoise.pth',
84
- 'UVR-BVE-4B_SN-44100-1.pth',
85
- 'MGM_HIGHEND_v4.pth',
86
- 'MGM_LOWEND_A_v4.pth',
87
- 'MGM_LOWEND_B_v4.pth',
88
- 'MGM_MAIN_v4.pth',
89
- ]
90
-
91
- demucs_models = [
92
- 'htdemucs_ft.yaml',
93
- 'htdemucs.yaml',
94
- 'hdemucs_mmi.yaml',
95
- ]
96
-
97
- output_format = [
98
- 'wav',
99
- 'flac',
100
- 'mp3',
101
- ]
102
-
103
- mdxnet_overlap_values = [
104
- '0.25',
105
- '0.5',
106
- '0.75',
107
- '0.99',
108
- ]
109
-
110
- vrarch_window_size_values = [
111
- '320',
112
- '512',
113
- '1024',
114
- ]
115
-
116
- demucs_overlap_values = [
117
- '0.25',
118
- '0.50',
119
- '0.75',
120
- '0.99',
121
- ]
122
-
123
- def roformer_separator(roformer_audio, roformer_model, roformer_output_format, roformer_overlap):
124
- files_list = []
125
- files_list.clear()
126
- directory = "./outputs"
127
- random_id = str(random.randint(10000, 99999))
128
- pattern = f"{random_id}"
129
- os.makedirs("outputs", exist_ok=True)
130
- write(f'{random_id}.wav', roformer_audio[0], roformer_audio[1])
131
- full_roformer_model = roformer_models[roformer_model]
132
- prompt = f"audio-separator {random_id}.wav --model_filename {full_roformer_model} --output_dir=./outputs --output_format={roformer_output_format} --normalization=0.9 --mdxc_overlap={roformer_overlap}"
133
- os.system(prompt)
134
-
135
- for file in os.listdir(directory):
136
- if re.search(pattern, file):
137
- files_list.append(os.path.join(directory, file))
138
-
139
- stem1_file = files_list[0]
140
- stem2_file = files_list[1]
141
-
142
- return stem1_file, stem2_file
143
-
144
- def mdxc_separator(mdx23c_audio, mdx23c_model, mdx23c_output_format, mdx23c_segment_size, mdx23c_overlap):
145
- files_list = []
146
- files_list.clear()
147
- directory = "./outputs"
148
- random_id = str(random.randint(10000, 99999))
149
- pattern = f"{random_id}"
150
- os.makedirs("outputs", exist_ok=True)
151
- write(f'{random_id}.wav', mdx23c_audio[0], mdx23c_audio[1])
152
- prompt = f"audio-separator {random_id}.wav --model_filename {mdx23c_model} --output_dir=./outputs --output_format={mdx23c_output_format} --normalization=0.9 --mdxc_segment_size={mdx23c_segment_size} --mdxc_overlap={mdx23c_overlap}"
153
- os.system(prompt)
154
-
155
- for file in os.listdir(directory):
156
- if re.search(pattern, file):
157
- files_list.append(os.path.join(directory, file))
158
-
159
- stem1_file = files_list[0]
160
- stem2_file = files_list[1]
161
-
162
- return stem1_file, stem2_file
163
-
164
- def mdxnet_separator(mdxnet_audio, mdxnet_model, mdxnet_output_format, mdxnet_segment_size, mdxnet_overlap, mdxnet_denoise):
165
- files_list = []
166
- files_list.clear()
167
- directory = "./outputs"
168
- random_id = str(random.randint(10000, 99999))
169
- pattern = f"{random_id}"
170
- os.makedirs("outputs", exist_ok=True)
171
- write(f'{random_id}.wav', mdxnet_audio[0], mdxnet_audio[1])
172
- prompt = f"audio-separator {random_id}.wav --model_filename {mdxnet_model} --output_dir=./outputs --output_format={mdxnet_output_format} --normalization=0.9 --mdx_segment_size={mdxnet_segment_size} --mdx_overlap={mdxnet_overlap}"
173
-
174
- if mdxnet_denoise:
175
- prompt += " --mdx_enable_denoise"
176
-
177
- os.system(prompt)
178
-
179
- for file in os.listdir(directory):
180
- if re.search(pattern, file):
181
- files_list.append(os.path.join(directory, file))
182
-
183
- stem1_file = files_list[0]
184
- stem2_file = files_list[1]
185
-
186
- return stem1_file, stem2_file
187
-
188
- def vrarch_separator(vrarch_audio, vrarch_model, vrarch_output_format, vrarch_window_size, vrarch_agression, vrarch_tta, vrarch_high_end_process):
189
- files_list = []
190
- files_list.clear()
191
- directory = "./outputs"
192
- random_id = str(random.randint(10000, 99999))
193
- pattern = f"{random_id}"
194
- os.makedirs("outputs", exist_ok=True)
195
- write(f'{random_id}.wav', vrarch_audio[0], vrarch_audio[1])
196
- prompt = f"audio-separator {random_id}.wav --model_filename {vrarch_model} --output_dir=./outputs --output_format={vrarch_output_format} --normalization=0.9 --vr_window_size={vrarch_window_size} --vr_aggression={vrarch_agression}"
197
-
198
- if vrarch_tta:
199
- prompt += " --vr_enable_tta"
200
- if vrarch_high_end_process:
201
- prompt += " --vr_high_end_process"
202
-
203
- os.system(prompt)
204
-
205
- for file in os.listdir(directory):
206
- if re.search(pattern, file):
207
- files_list.append(os.path.join(directory, file))
208
-
209
- stem1_file = files_list[0]
210
- stem2_file = files_list[1]
211
-
212
- return stem1_file, stem2_file
213
-
214
- def demucs_separator(demucs_audio, demucs_model, demucs_output_format, demucs_shifts, demucs_overlap):
215
- files_list = []
216
- files_list.clear()
217
- directory = "./outputs"
218
- random_id = str(random.randint(10000, 99999))
219
- pattern = f"{random_id}"
220
- os.makedirs("outputs", exist_ok=True)
221
- write(f'{random_id}.wav', demucs_audio[0], demucs_audio[1])
222
- prompt = f"audio-separator {random_id}.wav --model_filename {demucs_model} --output_dir=./outputs --output_format={demucs_output_format} --normalization=0.9 --demucs_shifts={demucs_shifts} --demucs_overlap={demucs_overlap}"
223
-
224
- os.system(prompt)
225
-
226
- for file in os.listdir(directory):
227
- if re.search(pattern, file):
228
- files_list.append(os.path.join(directory, file))
229
-
230
- stem1_file = files_list[0]
231
- stem2_file = files_list[1]
232
- stem3_file = files_list[2]
233
- stem4_file = files_list[3]
234
-
235
- return stem1_file, stem2_file, stem3_file, stem4_file
236
-
237
- with gr.Blocks(theme="NoCrypt/miku@1.2.2", title="🎵 UVR5 UI 🎵") as app:
238
- gr.Markdown("<h1> 🎵 UVR5 UI 🎵 </h1>")
239
- gr.Markdown("If you liked this HF Space you can give me a ❤️")
240
- with gr.Tabs():
241
- with gr.TabItem("BS/Mel Roformer"):
242
- with gr.Row():
243
- roformer_model = gr.Dropdown(
244
- label = "Select the Model",
245
- choices=list(roformer_models.keys()),
246
- interactive = True
247
- )
248
- roformer_output_format = gr.Dropdown(
249
- label = "Select the Output Format",
250
- choices = output_format,
251
- interactive = True
252
- )
253
- with gr.Row():
254
- roformer_overlap = gr.Slider(
255
- minimum = 2,
256
- maximum = 4,
257
- step = 1,
258
- label = "Overlap",
259
- info = "Amount of overlap between prediction windows.",
260
- value = 4,
261
- interactive = True
262
- )
263
- with gr.Row():
264
- roformer_audio = gr.Audio(
265
- label = "Input Audio",
266
- type = "numpy",
267
- interactive = True
268
- )
269
- with gr.Row():
270
- roformer_button = gr.Button("Separate!", variant = "primary")
271
- with gr.Row():
272
- roformer_stem1 = gr.Audio(
273
- show_download_button = True,
274
- interactive = False,
275
- label = "Stem 1",
276
- type = "filepath"
277
- )
278
- roformer_stem2 = gr.Audio(
279
- show_download_button = True,
280
- interactive = False,
281
- label = "Stem 2",
282
- type = "filepath"
283
- )
284
-
285
- roformer_button.click(roformer_separator, [roformer_audio, roformer_model, roformer_output_format, roformer_overlap], [roformer_stem1, roformer_stem2])
286
-
287
- with gr.TabItem("MDX23C"):
288
- with gr.Row():
289
- mdx23c_model = gr.Dropdown(
290
- label = "Select the Model",
291
- choices = mdx23c_models,
292
- interactive = True
293
- )
294
- mdx23c_output_format = gr.Dropdown(
295
- label = "Select the Output Format",
296
- choices = output_format,
297
- interactive = True
298
- )
299
- with gr.Row():
300
- mdx23c_segment_size = gr.Slider(
301
- minimum = 32,
302
- maximum = 4000,
303
- step = 32,
304
- label = "Segment Size",
305
- info = "Larger consumes more resources, but may give better results.",
306
- value = 256,
307
- interactive = True
308
- )
309
- mdx23c_overlap = gr.Slider(
310
- minimum = 2,
311
- maximum = 50,
312
- step = 1,
313
- label = "Overlap",
314
- info = "Amount of overlap between prediction windows.",
315
- value = 8,
316
- interactive = True
317
- )
318
- with gr.Row():
319
- mdx23c_audio = gr.Audio(
320
- label = "Input Audio",
321
- type = "numpy",
322
- interactive = True
323
- )
324
- with gr.Row():
325
- mdx23c_button = gr.Button("Separate!", variant = "primary")
326
- with gr.Row():
327
- mdx23c_stem1 = gr.Audio(
328
- show_download_button = True,
329
- interactive = False,
330
- label = "Stem 1",
331
- type = "filepath"
332
- )
333
- mdx23c_stem2 = gr.Audio(
334
- show_download_button = True,
335
- interactive = False,
336
- label = "Stem 2",
337
- type = "filepath"
338
- )
339
-
340
- mdx23c_button.click(mdxc_separator, [mdx23c_audio, mdx23c_model, mdx23c_output_format, mdx23c_segment_size, mdx23c_overlap], [mdx23c_stem1, mdx23c_stem2])
341
-
342
- with gr.TabItem("MDX-NET"):
343
- with gr.Row():
344
- mdxnet_model = gr.Dropdown(
345
- label = "Select the Model",
346
- choices = mdxnet_models,
347
- interactive = True
348
- )
349
- mdxnet_output_format = gr.Dropdown(
350
- label = "Select the Output Format",
351
- choices = output_format,
352
- interactive = True
353
- )
354
- with gr.Row():
355
- mdxnet_segment_size = gr.Slider(
356
- minimum = 32,
357
- maximum = 4000,
358
- step = 32,
359
- label = "Segment Size",
360
- info = "Larger consumes more resources, but may give better results.",
361
- value = 256,
362
- interactive = True
363
- )
364
- mdxnet_overlap = gr.Dropdown(
365
- label = "Overlap",
366
- choices = mdxnet_overlap_values,
367
- value = mdxnet_overlap_values[0],
368
- interactive = True
369
- )
370
- mdxnet_denoise = gr.Checkbox(
371
- label = "Denoise",
372
- info = "Enable denoising during separation.",
373
- value = True,
374
- interactive = True
375
- )
376
- with gr.Row():
377
- mdxnet_audio = gr.Audio(
378
- label = "Input Audio",
379
- type = "numpy",
380
- interactive = True
381
- )
382
- with gr.Row():
383
- mdxnet_button = gr.Button("Separate!", variant = "primary")
384
- with gr.Row():
385
- mdxnet_stem1 = gr.Audio(
386
- show_download_button = True,
387
- interactive = False,
388
- label = "Stem 1",
389
- type = "filepath"
390
- )
391
- mdxnet_stem2 = gr.Audio(
392
- show_download_button = True,
393
- interactive = False,
394
- label = "Stem 2",
395
- type = "filepath"
396
- )
397
-
398
- mdxnet_button.click(mdxnet_separator, [mdxnet_audio, mdxnet_model, mdxnet_output_format, mdxnet_segment_size, mdxnet_overlap, mdxnet_denoise], [mdxnet_stem1, mdxnet_stem2])
399
-
400
- with gr.TabItem("VR ARCH"):
401
- with gr.Row():
402
- vrarch_model = gr.Dropdown(
403
- label = "Select the Model",
404
- choices = vrarch_models,
405
- interactive = True
406
- )
407
- vrarch_output_format = gr.Dropdown(
408
- label = "Select the Output Format",
409
- choices = output_format,
410
- interactive = True
411
- )
412
- with gr.Row():
413
- vrarch_window_size = gr.Dropdown(
414
- label = "Window Size",
415
- choices = vrarch_window_size_values,
416
- value = vrarch_window_size_values[0],
417
- interactive = True
418
- )
419
- vrarch_agression = gr.Slider(
420
- minimum = 1,
421
- maximum = 50,
422
- step = 1,
423
- label = "Agression",
424
- info = "Intensity of primary stem extraction.",
425
- value = 5,
426
- interactive = True
427
- )
428
- vrarch_tta = gr.Checkbox(
429
- label = "TTA",
430
- info = "Enable Test-Time-Augmentation; slow but improves quality.",
431
- value = True,
432
- visible = True,
433
- interactive = True,
434
- )
435
- vrarch_high_end_process = gr.Checkbox(
436
- label = "High End Process",
437
- info = "Mirror the missing frequency range of the output.",
438
- value = False,
439
- visible = True,
440
- interactive = True,
441
- )
442
- with gr.Row():
443
- vrarch_audio = gr.Audio(
444
- label = "Input Audio",
445
- type = "numpy",
446
- interactive = True
447
- )
448
- with gr.Row():
449
- vrarch_button = gr.Button("Separate!", variant = "primary")
450
- with gr.Row():
451
- vrarch_stem1 = gr.Audio(
452
- show_download_button = True,
453
- interactive = False,
454
- type = "filepath",
455
- label = "Stem 1"
456
- )
457
- vrarch_stem2 = gr.Audio(
458
- show_download_button = True,
459
- interactive = False,
460
- type = "filepath",
461
- label = "Stem 2"
462
- )
463
-
464
- vrarch_button.click(vrarch_separator, [vrarch_audio, vrarch_model, vrarch_output_format, vrarch_window_size, vrarch_agression, vrarch_tta, vrarch_high_end_process], [vrarch_stem1, vrarch_stem2])
465
-
466
- with gr.TabItem("Demucs"):
467
- with gr.Row():
468
- demucs_model = gr.Dropdown(
469
- label = "Select the Model",
470
- choices = demucs_models,
471
- interactive = True
472
- )
473
- demucs_output_format = gr.Dropdown(
474
- label = "Select the Output Format",
475
- choices = output_format,
476
- interactive = True
477
- )
478
- with gr.Row():
479
- demucs_shifts = gr.Slider(
480
- minimum = 1,
481
- maximum = 20,
482
- step = 1,
483
- label = "Shifts",
484
- info = "Number of predictions with random shifts, higher = slower but better quality.",
485
- value = 2,
486
- interactive = True
487
- )
488
- demucs_overlap = gr.Dropdown(
489
- label = "Overlap",
490
- choices = demucs_overlap_values,
491
- value = demucs_overlap_values[0],
492
- interactive = True
493
- )
494
- with gr.Row():
495
- demucs_audio = gr.Audio(
496
- label = "Input Audio",
497
- type = "numpy",
498
- interactive = True
499
- )
500
- with gr.Row():
501
- demucs_button = gr.Button("Separate!", variant = "primary")
502
- with gr.Row():
503
- demucs_stem1 = gr.Audio(
504
- show_download_button = True,
505
- interactive = False,
506
- type = "filepath",
507
- label = "Stem 1"
508
- )
509
- demucs_stem2 = gr.Audio(
510
- show_download_button = True,
511
- interactive = False,
512
- type = "filepath",
513
- label = "Stem 2"
514
- )
515
- with gr.Row():
516
- demucs_stem3 = gr.Audio(
517
- show_download_button = True,
518
- interactive = False,
519
- type = "filepath",
520
- label = "Stem 3"
521
- )
522
- demucs_stem4 = gr.Audio(
523
- show_download_button = True,
524
- interactive = False,
525
- type = "filepath",
526
- label = "Stem 4"
527
- )
528
-
529
- demucs_button.click(demucs_separator, [demucs_audio, demucs_model, demucs_output_format, demucs_shifts, demucs_overlap], [demucs_stem1, demucs_stem2, demucs_stem3, demucs_stem4])
530
-
531
- with gr.TabItem("Credits"):
532
- gr.Markdown(
533
- """
534
- UVR5 UI created by **[Not Eddy (Spanish Mod)](http://discord.com/users/274566299349155851)** in **[AI HUB](https://discord.gg/aihub)** community.
535
-
536
- * python-audio-separator by [beveradb](https://github.com/beveradb).
537
- * Thanks to [Ilaria](https://github.com/TheStingerX) and [Mikus](https://github.com/cappuch) for the help with the code.
538
- * Improvements by [Blane187](https://github.com/Blane187).
539
-
540
- You can donate to the original UVR5 project here:
541
- [!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://www.buymeacoffee.com/uvr5)
542
- """
543
- )
544
-
545
- app.queue()
 
546
  app.launch()
 
1
+ import os
2
+ import re
3
+ import random
4
+ from scipy.io.wavfile import write
5
+ import gradio as gr
6
+
7
+ roformer_models = {
8
+ 'BS-Roformer-Viperx-1297.ckpt': 'model_bs_roformer_ep_317_sdr_12.9755.ckpt',
9
+ 'BS-Roformer-Viperx-1296.ckpt': 'model_bs_roformer_ep_368_sdr_12.9628.ckpt',
10
+ 'BS-Roformer-Viperx-1053.ckpt': 'model_bs_roformer_ep_937_sdr_10.5309.ckpt',
11
+ 'Mel-Roformer-Viperx-1143.ckpt': 'model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt'
12
+ }
13
+
14
+ mdx23c_models = [
15
+ 'MDX23C_D1581.ckpt',
16
+ 'MDX23C-8KFFT-InstVoc_HQ.ckpt',
17
+ 'MDX23C-8KFFT-InstVoc_HQ_2.ckpt',
18
+ ]
19
+
20
+ mdxnet_models = [
21
+ 'UVR-MDX-NET-Inst_full_292.onnx',
22
+ 'UVR-MDX-NET_Inst_187_beta.onnx',
23
+ 'UVR-MDX-NET_Inst_82_beta.onnx',
24
+ 'UVR-MDX-NET_Inst_90_beta.onnx',
25
+ 'UVR-MDX-NET_Main_340.onnx',
26
+ 'UVR-MDX-NET_Main_390.onnx',
27
+ 'UVR-MDX-NET_Main_406.onnx',
28
+ 'UVR-MDX-NET_Main_427.onnx',
29
+ 'UVR-MDX-NET_Main_438.onnx',
30
+ 'UVR-MDX-NET-Inst_HQ_1.onnx',
31
+ 'UVR-MDX-NET-Inst_HQ_2.onnx',
32
+ 'UVR-MDX-NET-Inst_HQ_3.onnx',
33
+ 'UVR-MDX-NET-Inst_HQ_4.onnx',
34
+ 'UVR_MDXNET_Main.onnx',
35
+ 'UVR-MDX-NET-Inst_Main.onnx',
36
+ 'UVR_MDXNET_1_9703.onnx',
37
+ 'UVR_MDXNET_2_9682.onnx',
38
+ 'UVR_MDXNET_3_9662.onnx',
39
+ 'UVR-MDX-NET-Inst_1.onnx',
40
+ 'UVR-MDX-NET-Inst_2.onnx',
41
+ 'UVR-MDX-NET-Inst_3.onnx',
42
+ 'UVR_MDXNET_KARA.onnx',
43
+ 'UVR_MDXNET_KARA_2.onnx',
44
+ 'UVR_MDXNET_9482.onnx',
45
+ 'UVR-MDX-NET-Voc_FT.onnx',
46
+ 'Kim_Vocal_1.onnx',
47
+ 'Kim_Vocal_2.onnx',
48
+ 'Kim_Inst.onnx',
49
+ 'Reverb_HQ_By_FoxJoy.onnx',
50
+ 'UVR-MDX-NET_Crowd_HQ_1.onnx',
51
+ 'kuielab_a_vocals.onnx',
52
+ 'kuielab_a_other.onnx',
53
+ 'kuielab_a_bass.onnx',
54
+ 'kuielab_a_drums.onnx',
55
+ 'kuielab_b_vocals.onnx',
56
+ 'kuielab_b_other.onnx',
57
+ 'kuielab_b_bass.onnx',
58
+ 'kuielab_b_drums.onnx',
59
+ ]
60
+
61
+ vrarch_models = [
62
+ '1_HP-UVR.pth',
63
+ '2_HP-UVR.pth',
64
+ '3_HP-Vocal-UVR.pth',
65
+ '4_HP-Vocal-UVR.pth',
66
+ '5_HP-Karaoke-UVR.pth',
67
+ '6_HP-Karaoke-UVR.pth',
68
+ '7_HP2-UVR.pth',
69
+ '8_HP2-UVR.pth',
70
+ '9_HP2-UVR.pth',
71
+ '10_SP-UVR-2B-32000-1.pth',
72
+ '11_SP-UVR-2B-32000-2.pth',
73
+ '12_SP-UVR-3B-44100.pth',
74
+ '13_SP-UVR-4B-44100-1.pth',
75
+ '14_SP-UVR-4B-44100-2.pth',
76
+ '15_SP-UVR-MID-44100-1.pth',
77
+ '16_SP-UVR-MID-44100-2.pth',
78
+ '17_HP-Wind_Inst-UVR.pth',
79
+ 'UVR-De-Echo-Aggressive.pth',
80
+ 'UVR-De-Echo-Normal.pth',
81
+ 'UVR-DeEcho-DeReverb.pth',
82
+ 'UVR-DeNoise-Lite.pth',
83
+ 'UVR-DeNoise.pth',
84
+ 'UVR-BVE-4B_SN-44100-1.pth',
85
+ 'MGM_HIGHEND_v4.pth',
86
+ 'MGM_LOWEND_A_v4.pth',
87
+ 'MGM_LOWEND_B_v4.pth',
88
+ 'MGM_MAIN_v4.pth',
89
+ ]
90
+
91
+ demucs_models = [
92
+ 'htdemucs_ft.yaml',
93
+ 'htdemucs.yaml',
94
+ 'hdemucs_mmi.yaml',
95
+ ]
96
+
97
+ output_format = [
98
+ 'wav',
99
+ 'flac',
100
+ 'mp3',
101
+ ]
102
+
103
+ mdxnet_overlap_values = [
104
+ '0.25',
105
+ '0.5',
106
+ '0.75',
107
+ '0.99',
108
+ ]
109
+
110
+ vrarch_window_size_values = [
111
+ '320',
112
+ '512',
113
+ '1024',
114
+ ]
115
+
116
+ demucs_overlap_values = [
117
+ '0.25',
118
+ '0.50',
119
+ '0.75',
120
+ '0.99',
121
+ ]
122
+
123
+ def roformer_separator(roformer_audio, roformer_model, roformer_output_format, roformer_overlap):
124
+ files_list = []
125
+ files_list.clear()
126
+ directory = "./outputs"
127
+ random_id = str(random.randint(10000, 99999))
128
+ pattern = f"{random_id}"
129
+ os.makedirs("outputs", exist_ok=True)
130
+ write(f'{random_id}.wav', roformer_audio[0], roformer_audio[1])
131
+ full_roformer_model = roformer_models[roformer_model]
132
+ prompt = f"audio-separator {random_id}.wav --model_filename {full_roformer_model} --output_dir=./outputs --output_format={roformer_output_format} --normalization=0.9 --mdxc_overlap={roformer_overlap}"
133
+ os.system(prompt)
134
+
135
+ for file in os.listdir(directory):
136
+ if re.search(pattern, file):
137
+ files_list.append(os.path.join(directory, file))
138
+
139
+ stem1_file = files_list[0]
140
+ stem2_file = files_list[1]
141
+
142
+ return stem1_file, stem2_file
143
+
144
+ def mdxc_separator(mdx23c_audio, mdx23c_model, mdx23c_output_format, mdx23c_segment_size, mdx23c_overlap):
145
+ files_list = []
146
+ files_list.clear()
147
+ directory = "./outputs"
148
+ random_id = str(random.randint(10000, 99999))
149
+ pattern = f"{random_id}"
150
+ os.makedirs("outputs", exist_ok=True)
151
+ write(f'{random_id}.wav', mdx23c_audio[0], mdx23c_audio[1])
152
+ prompt = f"audio-separator {random_id}.wav --model_filename {mdx23c_model} --output_dir=./outputs --output_format={mdx23c_output_format} --normalization=0.9 --mdxc_segment_size={mdx23c_segment_size} --mdxc_overlap={mdx23c_overlap}"
153
+ os.system(prompt)
154
+
155
+ for file in os.listdir(directory):
156
+ if re.search(pattern, file):
157
+ files_list.append(os.path.join(directory, file))
158
+
159
+ stem1_file = files_list[0]
160
+ stem2_file = files_list[1]
161
+
162
+ return stem1_file, stem2_file
163
+
164
+ def mdxnet_separator(mdxnet_audio, mdxnet_model, mdxnet_output_format, mdxnet_segment_size, mdxnet_overlap, mdxnet_denoise):
165
+ files_list = []
166
+ files_list.clear()
167
+ directory = "./outputs"
168
+ random_id = str(random.randint(10000, 99999))
169
+ pattern = f"{random_id}"
170
+ os.makedirs("outputs", exist_ok=True)
171
+ write(f'{random_id}.wav', mdxnet_audio[0], mdxnet_audio[1])
172
+ prompt = f"audio-separator {random_id}.wav --model_filename {mdxnet_model} --output_dir=./outputs --output_format={mdxnet_output_format} --normalization=0.9 --mdx_segment_size={mdxnet_segment_size} --mdx_overlap={mdxnet_overlap}"
173
+
174
+ if mdxnet_denoise:
175
+ prompt += " --mdx_enable_denoise"
176
+
177
+ os.system(prompt)
178
+
179
+ for file in os.listdir(directory):
180
+ if re.search(pattern, file):
181
+ files_list.append(os.path.join(directory, file))
182
+
183
+ stem1_file = files_list[0]
184
+ stem2_file = files_list[1]
185
+
186
+ return stem1_file, stem2_file
187
+
188
+ def vrarch_separator(vrarch_audio, vrarch_model, vrarch_output_format, vrarch_window_size, vrarch_agression, vrarch_tta, vrarch_high_end_process):
189
+ files_list = []
190
+ files_list.clear()
191
+ directory = "./outputs"
192
+ random_id = str(random.randint(10000, 99999))
193
+ pattern = f"{random_id}"
194
+ os.makedirs("outputs", exist_ok=True)
195
+ write(f'{random_id}.wav', vrarch_audio[0], vrarch_audio[1])
196
+ prompt = f"audio-separator {random_id}.wav --model_filename {vrarch_model} --output_dir=./outputs --output_format={vrarch_output_format} --normalization=0.9 --vr_window_size={vrarch_window_size} --vr_aggression={vrarch_agression}"
197
+
198
+ if vrarch_tta:
199
+ prompt += " --vr_enable_tta"
200
+ if vrarch_high_end_process:
201
+ prompt += " --vr_high_end_process"
202
+
203
+ os.system(prompt)
204
+
205
+ for file in os.listdir(directory):
206
+ if re.search(pattern, file):
207
+ files_list.append(os.path.join(directory, file))
208
+
209
+ stem1_file = files_list[0]
210
+ stem2_file = files_list[1]
211
+
212
+ return stem1_file, stem2_file
213
+
214
+ def demucs_separator(demucs_audio, demucs_model, demucs_output_format, demucs_shifts, demucs_overlap):
215
+ files_list = []
216
+ files_list.clear()
217
+ directory = "./outputs"
218
+ random_id = str(random.randint(10000, 99999))
219
+ pattern = f"{random_id}"
220
+ os.makedirs("outputs", exist_ok=True)
221
+ write(f'{random_id}.wav', demucs_audio[0], demucs_audio[1])
222
+ prompt = f"audio-separator {random_id}.wav --model_filename {demucs_model} --output_dir=./outputs --output_format={demucs_output_format} --normalization=0.9 --demucs_shifts={demucs_shifts} --demucs_overlap={demucs_overlap}"
223
+
224
+ os.system(prompt)
225
+
226
+ for file in os.listdir(directory):
227
+ if re.search(pattern, file):
228
+ files_list.append(os.path.join(directory, file))
229
+
230
+ stem1_file = files_list[0]
231
+ stem2_file = files_list[1]
232
+ stem3_file = files_list[2]
233
+ stem4_file = files_list[3]
234
+
235
+ return stem1_file, stem2_file, stem3_file, stem4_file
236
+
237
+ with gr.Blocks(theme="NoCrypt/miku@1.2.2", title="🎵 UVR5 UI 🎵") as app:
238
+ gr.Markdown("<h1> 🎵 UVR5 UI 🎵 </h1>")
239
+ gr.Markdown("If you liked this HF Space you can give me a ❤️")
240
+ gr.Markdown("Try UVR5 UI with GPU using Colab [here](https://colab.research.google.com/github/Eddycrack864/UVR5-UI/blob/main/UVR_UI.ipynb)")
241
+ with gr.Tabs():
242
+ with gr.TabItem("BS/Mel Roformer"):
243
+ with gr.Row():
244
+ roformer_model = gr.Dropdown(
245
+ label = "Select the Model",
246
+ choices=list(roformer_models.keys()),
247
+ interactive = True
248
+ )
249
+ roformer_output_format = gr.Dropdown(
250
+ label = "Select the Output Format",
251
+ choices = output_format,
252
+ interactive = True
253
+ )
254
+ with gr.Row():
255
+ roformer_overlap = gr.Slider(
256
+ minimum = 2,
257
+ maximum = 4,
258
+ step = 1,
259
+ label = "Overlap",
260
+ info = "Amount of overlap between prediction windows.",
261
+ value = 4,
262
+ interactive = True
263
+ )
264
+ with gr.Row():
265
+ roformer_audio = gr.Audio(
266
+ label = "Input Audio",
267
+ type = "numpy",
268
+ interactive = True
269
+ )
270
+ with gr.Row():
271
+ roformer_button = gr.Button("Separate!", variant = "primary")
272
+ with gr.Row():
273
+ roformer_stem1 = gr.Audio(
274
+ show_download_button = True,
275
+ interactive = False,
276
+ label = "Stem 1",
277
+ type = "filepath"
278
+ )
279
+ roformer_stem2 = gr.Audio(
280
+ show_download_button = True,
281
+ interactive = False,
282
+ label = "Stem 2",
283
+ type = "filepath"
284
+ )
285
+
286
+ roformer_button.click(roformer_separator, [roformer_audio, roformer_model, roformer_output_format, roformer_overlap], [roformer_stem1, roformer_stem2])
287
+
288
+ with gr.TabItem("MDX23C"):
289
+ with gr.Row():
290
+ mdx23c_model = gr.Dropdown(
291
+ label = "Select the Model",
292
+ choices = mdx23c_models,
293
+ interactive = True
294
+ )
295
+ mdx23c_output_format = gr.Dropdown(
296
+ label = "Select the Output Format",
297
+ choices = output_format,
298
+ interactive = True
299
+ )
300
+ with gr.Row():
301
+ mdx23c_segment_size = gr.Slider(
302
+ minimum = 32,
303
+ maximum = 4000,
304
+ step = 32,
305
+ label = "Segment Size",
306
+ info = "Larger consumes more resources, but may give better results.",
307
+ value = 256,
308
+ interactive = True
309
+ )
310
+ mdx23c_overlap = gr.Slider(
311
+ minimum = 2,
312
+ maximum = 50,
313
+ step = 1,
314
+ label = "Overlap",
315
+ info = "Amount of overlap between prediction windows.",
316
+ value = 8,
317
+ interactive = True
318
+ )
319
+ with gr.Row():
320
+ mdx23c_audio = gr.Audio(
321
+ label = "Input Audio",
322
+ type = "numpy",
323
+ interactive = True
324
+ )
325
+ with gr.Row():
326
+ mdx23c_button = gr.Button("Separate!", variant = "primary")
327
+ with gr.Row():
328
+ mdx23c_stem1 = gr.Audio(
329
+ show_download_button = True,
330
+ interactive = False,
331
+ label = "Stem 1",
332
+ type = "filepath"
333
+ )
334
+ mdx23c_stem2 = gr.Audio(
335
+ show_download_button = True,
336
+ interactive = False,
337
+ label = "Stem 2",
338
+ type = "filepath"
339
+ )
340
+
341
+ mdx23c_button.click(mdxc_separator, [mdx23c_audio, mdx23c_model, mdx23c_output_format, mdx23c_segment_size, mdx23c_overlap], [mdx23c_stem1, mdx23c_stem2])
342
+
343
+ with gr.TabItem("MDX-NET"):
344
+ with gr.Row():
345
+ mdxnet_model = gr.Dropdown(
346
+ label = "Select the Model",
347
+ choices = mdxnet_models,
348
+ interactive = True
349
+ )
350
+ mdxnet_output_format = gr.Dropdown(
351
+ label = "Select the Output Format",
352
+ choices = output_format,
353
+ interactive = True
354
+ )
355
+ with gr.Row():
356
+ mdxnet_segment_size = gr.Slider(
357
+ minimum = 32,
358
+ maximum = 4000,
359
+ step = 32,
360
+ label = "Segment Size",
361
+ info = "Larger consumes more resources, but may give better results.",
362
+ value = 256,
363
+ interactive = True
364
+ )
365
+ mdxnet_overlap = gr.Dropdown(
366
+ label = "Overlap",
367
+ choices = mdxnet_overlap_values,
368
+ value = mdxnet_overlap_values[0],
369
+ interactive = True
370
+ )
371
+ mdxnet_denoise = gr.Checkbox(
372
+ label = "Denoise",
373
+ info = "Enable denoising during separation.",
374
+ value = True,
375
+ interactive = True
376
+ )
377
+ with gr.Row():
378
+ mdxnet_audio = gr.Audio(
379
+ label = "Input Audio",
380
+ type = "numpy",
381
+ interactive = True
382
+ )
383
+ with gr.Row():
384
+ mdxnet_button = gr.Button("Separate!", variant = "primary")
385
+ with gr.Row():
386
+ mdxnet_stem1 = gr.Audio(
387
+ show_download_button = True,
388
+ interactive = False,
389
+ label = "Stem 1",
390
+ type = "filepath"
391
+ )
392
+ mdxnet_stem2 = gr.Audio(
393
+ show_download_button = True,
394
+ interactive = False,
395
+ label = "Stem 2",
396
+ type = "filepath"
397
+ )
398
+
399
+ mdxnet_button.click(mdxnet_separator, [mdxnet_audio, mdxnet_model, mdxnet_output_format, mdxnet_segment_size, mdxnet_overlap, mdxnet_denoise], [mdxnet_stem1, mdxnet_stem2])
400
+
401
+ with gr.TabItem("VR ARCH"):
402
+ with gr.Row():
403
+ vrarch_model = gr.Dropdown(
404
+ label = "Select the Model",
405
+ choices = vrarch_models,
406
+ interactive = True
407
+ )
408
+ vrarch_output_format = gr.Dropdown(
409
+ label = "Select the Output Format",
410
+ choices = output_format,
411
+ interactive = True
412
+ )
413
+ with gr.Row():
414
+ vrarch_window_size = gr.Dropdown(
415
+ label = "Window Size",
416
+ choices = vrarch_window_size_values,
417
+ value = vrarch_window_size_values[0],
418
+ interactive = True
419
+ )
420
+ vrarch_agression = gr.Slider(
421
+ minimum = 1,
422
+ maximum = 50,
423
+ step = 1,
424
+ label = "Agression",
425
+ info = "Intensity of primary stem extraction.",
426
+ value = 5,
427
+ interactive = True
428
+ )
429
+ vrarch_tta = gr.Checkbox(
430
+ label = "TTA",
431
+ info = "Enable Test-Time-Augmentation; slow but improves quality.",
432
+ value = True,
433
+ visible = True,
434
+ interactive = True,
435
+ )
436
+ vrarch_high_end_process = gr.Checkbox(
437
+ label = "High End Process",
438
+ info = "Mirror the missing frequency range of the output.",
439
+ value = False,
440
+ visible = True,
441
+ interactive = True,
442
+ )
443
+ with gr.Row():
444
+ vrarch_audio = gr.Audio(
445
+ label = "Input Audio",
446
+ type = "numpy",
447
+ interactive = True
448
+ )
449
+ with gr.Row():
450
+ vrarch_button = gr.Button("Separate!", variant = "primary")
451
+ with gr.Row():
452
+ vrarch_stem1 = gr.Audio(
453
+ show_download_button = True,
454
+ interactive = False,
455
+ type = "filepath",
456
+ label = "Stem 1"
457
+ )
458
+ vrarch_stem2 = gr.Audio(
459
+ show_download_button = True,
460
+ interactive = False,
461
+ type = "filepath",
462
+ label = "Stem 2"
463
+ )
464
+
465
+ vrarch_button.click(vrarch_separator, [vrarch_audio, vrarch_model, vrarch_output_format, vrarch_window_size, vrarch_agression, vrarch_tta, vrarch_high_end_process], [vrarch_stem1, vrarch_stem2])
466
+
467
+ with gr.TabItem("Demucs"):
468
+ with gr.Row():
469
+ demucs_model = gr.Dropdown(
470
+ label = "Select the Model",
471
+ choices = demucs_models,
472
+ interactive = True
473
+ )
474
+ demucs_output_format = gr.Dropdown(
475
+ label = "Select the Output Format",
476
+ choices = output_format,
477
+ interactive = True
478
+ )
479
+ with gr.Row():
480
+ demucs_shifts = gr.Slider(
481
+ minimum = 1,
482
+ maximum = 20,
483
+ step = 1,
484
+ label = "Shifts",
485
+ info = "Number of predictions with random shifts, higher = slower but better quality.",
486
+ value = 2,
487
+ interactive = True
488
+ )
489
+ demucs_overlap = gr.Dropdown(
490
+ label = "Overlap",
491
+ choices = demucs_overlap_values,
492
+ value = demucs_overlap_values[0],
493
+ interactive = True
494
+ )
495
+ with gr.Row():
496
+ demucs_audio = gr.Audio(
497
+ label = "Input Audio",
498
+ type = "numpy",
499
+ interactive = True
500
+ )
501
+ with gr.Row():
502
+ demucs_button = gr.Button("Separate!", variant = "primary")
503
+ with gr.Row():
504
+ demucs_stem1 = gr.Audio(
505
+ show_download_button = True,
506
+ interactive = False,
507
+ type = "filepath",
508
+ label = "Stem 1"
509
+ )
510
+ demucs_stem2 = gr.Audio(
511
+ show_download_button = True,
512
+ interactive = False,
513
+ type = "filepath",
514
+ label = "Stem 2"
515
+ )
516
+ with gr.Row():
517
+ demucs_stem3 = gr.Audio(
518
+ show_download_button = True,
519
+ interactive = False,
520
+ type = "filepath",
521
+ label = "Stem 3"
522
+ )
523
+ demucs_stem4 = gr.Audio(
524
+ show_download_button = True,
525
+ interactive = False,
526
+ type = "filepath",
527
+ label = "Stem 4"
528
+ )
529
+
530
+ demucs_button.click(demucs_separator, [demucs_audio, demucs_model, demucs_output_format, demucs_shifts, demucs_overlap], [demucs_stem1, demucs_stem2, demucs_stem3, demucs_stem4])
531
+
532
+ with gr.TabItem("Credits"):
533
+ gr.Markdown(
534
+ """
535
+ UVR5 UI created by **[Not Eddy (Spanish Mod)](http://discord.com/users/274566299349155851)** in **[AI HUB](https://discord.gg/aihub)** community.
536
+
537
+ * python-audio-separator by [beveradb](https://github.com/beveradb).
538
+ * Thanks to [Ilaria](https://github.com/TheStingerX) and [Mikus](https://github.com/cappuch) for the help with the code.
539
+ * Improvements by [Blane187](https://github.com/Blane187).
540
+
541
+ You can donate to the original UVR5 project here:
542
+ [!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://www.buymeacoffee.com/uvr5)
543
+ """
544
+ )
545
+
546
+ app.queue()
547
  app.launch()