Ryouko65777 commited on
Commit
81f1963
·
verified ·
1 Parent(s): efd93f9

Upload 2 files

Browse files
Files changed (2) hide show
  1. app (2).py +951 -0
  2. requirements (1).txt +4 -0
app (2).py ADDED
@@ -0,0 +1,951 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import random
4
+ from scipy.io.wavfile import write
5
+ from scipy.io.wavfile import read
6
+ import numpy as np
7
+ import gradio as gr
8
+ import yt_dlp
9
+ import subprocess
10
+
11
+ roformer_models = {
12
+ 'BS-Roformer-Viperx-1297.ckpt': 'model_bs_roformer_ep_317_sdr_12.9755.ckpt',
13
+ 'BS-Roformer-Viperx-1296.ckpt': 'model_bs_roformer_ep_368_sdr_12.9628.ckpt',
14
+ 'BS-Roformer-Viperx-1053.ckpt': 'model_bs_roformer_ep_937_sdr_10.5309.ckpt',
15
+ 'Mel-Roformer-Viperx-1143.ckpt': 'model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt'
16
+ }
17
+
18
+ mdx23c_models = [
19
+ 'MDX23C_D1581.ckpt',
20
+ 'MDX23C-8KFFT-InstVoc_HQ.ckpt',
21
+ 'MDX23C-8KFFT-InstVoc_HQ_2.ckpt',
22
+ ]
23
+
24
+ mdxnet_models = [
25
+ 'UVR-MDX-NET-Inst_full_292.onnx',
26
+ 'UVR-MDX-NET_Inst_187_beta.onnx',
27
+ 'UVR-MDX-NET_Inst_82_beta.onnx',
28
+ 'UVR-MDX-NET_Inst_90_beta.onnx',
29
+ 'UVR-MDX-NET_Main_340.onnx',
30
+ 'UVR-MDX-NET_Main_390.onnx',
31
+ 'UVR-MDX-NET_Main_406.onnx',
32
+ 'UVR-MDX-NET_Main_427.onnx',
33
+ 'UVR-MDX-NET_Main_438.onnx',
34
+ 'UVR-MDX-NET-Inst_HQ_1.onnx',
35
+ 'UVR-MDX-NET-Inst_HQ_2.onnx',
36
+ 'UVR-MDX-NET-Inst_HQ_3.onnx',
37
+ 'UVR-MDX-NET-Inst_HQ_4.onnx',
38
+ 'UVR_MDXNET_Main.onnx',
39
+ 'UVR-MDX-NET-Inst_Main.onnx',
40
+ 'UVR_MDXNET_1_9703.onnx',
41
+ 'UVR_MDXNET_2_9682.onnx',
42
+ 'UVR_MDXNET_3_9662.onnx',
43
+ 'UVR-MDX-NET-Inst_1.onnx',
44
+ 'UVR-MDX-NET-Inst_2.onnx',
45
+ 'UVR-MDX-NET-Inst_3.onnx',
46
+ 'UVR_MDXNET_KARA.onnx',
47
+ 'UVR_MDXNET_KARA_2.onnx',
48
+ 'UVR_MDXNET_9482.onnx',
49
+ 'UVR-MDX-NET-Voc_FT.onnx',
50
+ 'Kim_Vocal_1.onnx',
51
+ 'Kim_Vocal_2.onnx',
52
+ 'Kim_Inst.onnx',
53
+ 'Reverb_HQ_By_FoxJoy.onnx',
54
+ 'UVR-MDX-NET_Crowd_HQ_1.onnx',
55
+ 'kuielab_a_vocals.onnx',
56
+ 'kuielab_a_other.onnx',
57
+ 'kuielab_a_bass.onnx',
58
+ 'kuielab_a_drums.onnx',
59
+ 'kuielab_b_vocals.onnx',
60
+ 'kuielab_b_other.onnx',
61
+ 'kuielab_b_bass.onnx',
62
+ 'kuielab_b_drums.onnx',
63
+ ]
64
+
65
+ vrarch_models = [
66
+ '1_HP-UVR.pth',
67
+ '2_HP-UVR.pth',
68
+ '3_HP-Vocal-UVR.pth',
69
+ '4_HP-Vocal-UVR.pth',
70
+ '5_HP-Karaoke-UVR.pth',
71
+ '6_HP-Karaoke-UVR.pth',
72
+ '7_HP2-UVR.pth',
73
+ '8_HP2-UVR.pth',
74
+ '9_HP2-UVR.pth',
75
+ '10_SP-UVR-2B-32000-1.pth',
76
+ '11_SP-UVR-2B-32000-2.pth',
77
+ '12_SP-UVR-3B-44100.pth',
78
+ '13_SP-UVR-4B-44100-1.pth',
79
+ '14_SP-UVR-4B-44100-2.pth',
80
+ '15_SP-UVR-MID-44100-1.pth',
81
+ '16_SP-UVR-MID-44100-2.pth',
82
+ '17_HP-Wind_Inst-UVR.pth',
83
+ 'UVR-De-Echo-Aggressive.pth',
84
+ 'UVR-De-Echo-Normal.pth',
85
+ 'UVR-DeEcho-DeReverb.pth',
86
+ 'UVR-DeNoise-Lite.pth',
87
+ 'UVR-DeNoise.pth',
88
+ 'UVR-BVE-4B_SN-44100-1.pth',
89
+ 'MGM_HIGHEND_v4.pth',
90
+ 'MGM_LOWEND_A_v4.pth',
91
+ 'MGM_LOWEND_B_v4.pth',
92
+ 'MGM_MAIN_v4.pth',
93
+ ]
94
+
95
+ demucs_models = [
96
+ 'htdemucs_ft.yaml',
97
+ 'htdemucs.yaml',
98
+ 'hdemucs_mmi.yaml',
99
+ ]
100
+
101
+ output_format = [
102
+ 'wav',
103
+ 'flac',
104
+ 'mp3',
105
+ ]
106
+
107
+ mdxnet_overlap_values = [
108
+ '0.25',
109
+ '0.5',
110
+ '0.75',
111
+ '0.99',
112
+ ]
113
+
114
+ vrarch_window_size_values = [
115
+ '320',
116
+ '512',
117
+ '1024',
118
+ ]
119
+
120
+ demucs_overlap_values = [
121
+ '0.25',
122
+ '0.50',
123
+ '0.75',
124
+ '0.99',
125
+ ]
126
+
127
+ def download_audio(url):
128
+ ydl_opts = {
129
+ 'format': 'bestaudio/best',
130
+ 'outtmpl': 'ytdl/%(title)s.%(ext)s',
131
+ 'postprocessors': [{
132
+ 'key': 'FFmpegExtractAudio',
133
+ 'preferredcodec': 'wav',
134
+ 'preferredquality': '192',
135
+ }],
136
+ }
137
+
138
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
139
+ info_dict = ydl.extract_info(url, download=True)
140
+ file_path = ydl.prepare_filename(info_dict).rsplit('.', 1)[0] + '.wav'
141
+ sample_rate, audio_data = read(file_path)
142
+ audio_array = np.asarray(audio_data, dtype=np.int16)
143
+
144
+ return sample_rate, audio_array
145
+
146
+ def roformer_separator(roformer_audio, roformer_model, roformer_output_format, roformer_overlap, roformer_segment_size):
147
+ files_list = []
148
+ files_list.clear()
149
+ directory = "./outputs"
150
+ random_id = str(random.randint(10000, 99999))
151
+ pattern = f"{random_id}"
152
+ os.makedirs("outputs", exist_ok=True)
153
+ write(f'{random_id}.wav', roformer_audio[0], roformer_audio[1])
154
+ full_roformer_model = roformer_models[roformer_model]
155
+ prompt = f"audio-separator {random_id}.wav --model_filename {full_roformer_model} --output_dir=./outputs --output_format={roformer_output_format} --normalization=0.9 --mdxc_overlap={roformer_overlap} --mdxc_segment_size={roformer_segment_size}"
156
+ os.system(prompt)
157
+
158
+ for file in os.listdir(directory):
159
+ if re.search(pattern, file):
160
+ files_list.append(os.path.join(directory, file))
161
+
162
+ stem1_file = files_list[0]
163
+ stem2_file = files_list[1]
164
+
165
+ return stem1_file, stem2_file
166
+
167
+ def mdxc_separator(mdx23c_audio, mdx23c_model, mdx23c_output_format, mdx23c_segment_size, mdx23c_overlap, mdx23c_denoise):
168
+ files_list = []
169
+ files_list.clear()
170
+ directory = "./outputs"
171
+ random_id = str(random.randint(10000, 99999))
172
+ pattern = f"{random_id}"
173
+ os.makedirs("outputs", exist_ok=True)
174
+ write(f'{random_id}.wav', mdx23c_audio[0], mdx23c_audio[1])
175
+ prompt = f"audio-separator {random_id}.wav --model_filename {mdx23c_model} --output_dir=./outputs --output_format={mdx23c_output_format} --normalization=0.9 --mdxc_segment_size={mdx23c_segment_size} --mdxc_overlap={mdx23c_overlap}"
176
+
177
+ if mdx23c_denoise:
178
+ prompt += " --mdx_enable_denoise"
179
+
180
+ os.system(prompt)
181
+
182
+ for file in os.listdir(directory):
183
+ if re.search(pattern, file):
184
+ files_list.append(os.path.join(directory, file))
185
+
186
+ stem1_file = files_list[0]
187
+ stem2_file = files_list[1]
188
+
189
+ return stem1_file, stem2_file
190
+
191
+ def mdxnet_separator(mdxnet_audio, mdxnet_model, mdxnet_output_format, mdxnet_segment_size, mdxnet_overlap, mdxnet_denoise):
192
+ files_list = []
193
+ files_list.clear()
194
+ directory = "./outputs"
195
+ random_id = str(random.randint(10000, 99999))
196
+ pattern = f"{random_id}"
197
+ os.makedirs("outputs", exist_ok=True)
198
+ write(f'{random_id}.wav', mdxnet_audio[0], mdxnet_audio[1])
199
+ prompt = f"audio-separator {random_id}.wav --model_filename {mdxnet_model} --output_dir=./outputs --output_format={mdxnet_output_format} --normalization=0.9 --mdx_segment_size={mdxnet_segment_size} --mdx_overlap={mdxnet_overlap}"
200
+
201
+ if mdxnet_denoise:
202
+ prompt += " --mdx_enable_denoise"
203
+
204
+ os.system(prompt)
205
+
206
+ for file in os.listdir(directory):
207
+ if re.search(pattern, file):
208
+ files_list.append(os.path.join(directory, file))
209
+
210
+ stem1_file = files_list[0]
211
+ stem2_file = files_list[1]
212
+
213
+ return stem1_file, stem2_file
214
+
215
+ def vrarch_separator(vrarch_audio, vrarch_model, vrarch_output_format, vrarch_window_size, vrarch_agression, vrarch_tta, vrarch_high_end_process):
216
+ files_list = []
217
+ files_list.clear()
218
+ directory = "./outputs"
219
+ random_id = str(random.randint(10000, 99999))
220
+ pattern = f"{random_id}"
221
+ os.makedirs("outputs", exist_ok=True)
222
+ write(f'{random_id}.wav', vrarch_audio[0], vrarch_audio[1])
223
+ prompt = f"audio-separator {random_id}.wav --model_filename {vrarch_model} --output_dir=./outputs --output_format={vrarch_output_format} --normalization=0.9 --vr_window_size={vrarch_window_size} --vr_aggression={vrarch_agression}"
224
+
225
+ if vrarch_tta:
226
+ prompt += " --vr_enable_tta"
227
+ if vrarch_high_end_process:
228
+ prompt += " --vr_high_end_process"
229
+
230
+ os.system(prompt)
231
+
232
+ for file in os.listdir(directory):
233
+ if re.search(pattern, file):
234
+ files_list.append(os.path.join(directory, file))
235
+
236
+ stem1_file = files_list[0]
237
+ stem2_file = files_list[1]
238
+
239
+ return stem1_file, stem2_file
240
+
241
+ def demucs_separator(demucs_audio, demucs_model, demucs_output_format, demucs_shifts, demucs_overlap):
242
+ files_list = []
243
+ files_list.clear()
244
+ directory = "./outputs"
245
+ random_id = str(random.randint(10000, 99999))
246
+ pattern = f"{random_id}"
247
+ os.makedirs("outputs", exist_ok=True)
248
+ write(f'{random_id}.wav', demucs_audio[0], demucs_audio[1])
249
+ prompt = f"audio-separator {random_id}.wav --model_filename {demucs_model} --output_dir=./outputs --output_format={demucs_output_format} --normalization=0.9 --demucs_shifts={demucs_shifts} --demucs_overlap={demucs_overlap}"
250
+
251
+ os.system(prompt)
252
+
253
+ for file in os.listdir(directory):
254
+ if re.search(pattern, file):
255
+ files_list.append(os.path.join(directory, file))
256
+
257
+ stem1_file = files_list[0]
258
+ stem2_file = files_list[1]
259
+ stem3_file = files_list[2]
260
+ stem4_file = files_list[3]
261
+
262
+ return stem1_file, stem2_file, stem3_file, stem4_file
263
+
264
+ def roformer_batch(path_input, path_output, model, output_format, overlap, segment_size):
265
+ found_files = []
266
+ logs = []
267
+ logs.clear()
268
+
269
+ extensions = (".mp3", ".wav", ".flac")
270
+
271
+ full_roformer_model = roformer_models[model]
272
+
273
+ for audio_files in os.listdir(path_input):
274
+ if audio_files.endswith(extensions):
275
+ found_files.append(audio_files)
276
+ total_files = len(found_files)
277
+
278
+ if total_files == 0:
279
+ logs.append("No valid audio files.")
280
+ yield "\n".join(logs)
281
+ else:
282
+ logs.append(f"{total_files} audio files found")
283
+ found_files.sort()
284
+
285
+ for audio_files in found_files:
286
+ file_path = os.path.join(path_input, audio_files)
287
+ prompt = ["audio-separator", file_path, "-m", f"{full_roformer_model}", f"--output_dir={path_output}", f"--output_format={output_format}", "--normalization=0.9", f"--mdxc_overlap={overlap}", f"--mdxc_segment_size={segment_size}"]
288
+ logs.append(f"Processing file: {audio_files}")
289
+ yield "\n".join(logs)
290
+ subprocess.run(prompt)
291
+ logs.append(f"File: {audio_files} processed!")
292
+ yield "\n".join(logs)
293
+
294
+ def mdx23c_batch(path_input, path_output, model, output_format, overlap, segment_size, denoise):
295
+ found_files = []
296
+ logs = []
297
+ logs.clear()
298
+
299
+ extensions = (".mp3", ".wav", ".flac")
300
+
301
+ for audio_files in os.listdir(path_input):
302
+ if audio_files.endswith(extensions):
303
+ found_files.append(audio_files)
304
+ total_files = len(found_files)
305
+
306
+ if total_files == 0:
307
+ logs.append("No valid audio files.")
308
+ yield "\n".join(logs)
309
+ else:
310
+ logs.append(f"{total_files} audio files found")
311
+ found_files.sort()
312
+
313
+ for audio_files in found_files:
314
+ file_path = os.path.join(path_input, audio_files)
315
+ prompt = ["audio-separator", file_path, "-m", f"{model}", f"--output_dir={path_output}", f"--output_format={output_format}", "--normalization=0.9", f"--mdxc_overlap={overlap}", f"--mdxc_segment_size={segment_size}"]
316
+
317
+ if denoise:
318
+ prompt.append("--mdx_enable_denoise")
319
+
320
+ logs.append(f"Processing file: {audio_files}")
321
+ yield "\n".join(logs)
322
+ subprocess.run(prompt)
323
+ logs.append(f"File: {audio_files} processed!")
324
+ yield "\n".join(logs)
325
+
326
+ def mdxnet_batch(path_input, path_output, model, output_format, overlap, segment_size, denoise):
327
+ found_files = []
328
+ logs = []
329
+ logs.clear()
330
+
331
+ extensions = (".mp3", ".wav", ".flac")
332
+
333
+ for audio_files in os.listdir(path_input):
334
+ if audio_files.endswith(extensions):
335
+ found_files.append(audio_files)
336
+ total_files = len(found_files)
337
+
338
+ if total_files == 0:
339
+ logs.append("No valid audio files.")
340
+ yield "\n".join(logs)
341
+ else:
342
+ logs.append(f"{total_files} audio files found")
343
+ found_files.sort()
344
+
345
+ for audio_files in found_files:
346
+ file_path = os.path.join(path_input, audio_files)
347
+ prompt = ["audio-separator", file_path, "-m", f"{model}", f"--output_dir={path_output}", f"--output_format={output_format}", "--normalization=0.9", f"--mdx_overlap={overlap}", f"--mdx_segment_size={segment_size}"]
348
+
349
+ if denoise:
350
+ prompt.append("--mdx_enable_denoise")
351
+
352
+ logs.append(f"Processing file: {audio_files}")
353
+ yield "\n".join(logs)
354
+ subprocess.run(prompt)
355
+ logs.append(f"File: {audio_files} processed!")
356
+ yield "\n".join(logs)
357
+
358
+ def vrarch_batch(path_input, path_output, model, output_format, window_size, agression, tta, high_end_process):
359
+ found_files = []
360
+ logs = []
361
+ logs.clear()
362
+
363
+ extensions = (".mp3", ".wav", ".flac")
364
+
365
+ for audio_files in os.listdir(path_input):
366
+ if audio_files.endswith(extensions):
367
+ found_files.append(audio_files)
368
+ total_files = len(found_files)
369
+
370
+ if total_files == 0:
371
+ logs.append("No valid audio files.")
372
+ yield "\n".join(logs)
373
+ else:
374
+ logs.append(f"{total_files} audio files found")
375
+ found_files.sort()
376
+
377
+ for audio_files in found_files:
378
+ file_path = os.path.join(path_input, audio_files)
379
+ prompt = ["audio-separator", file_path, "-m", f"{model}", f"--output_dir={path_output}", f"--output_format={output_format}", "--normalization=0.9", f"--vr_window_size={window_size}", f"--vr_aggression={agression}"]
380
+
381
+ if tta:
382
+ prompt.append("--vr_enable_tta")
383
+ if high_end_process:
384
+ prompt.append("--vr_high_end_process")
385
+
386
+ logs.append(f"Processing file: {audio_files}")
387
+ yield "\n".join(logs)
388
+ subprocess.run(prompt)
389
+ logs.append(f"File: {audio_files} processed!")
390
+ yield "\n".join(logs)
391
+
392
+ def demucs_batch(path_input, path_output, model, output_format, shifts, overlap):
393
+ found_files = []
394
+ logs = []
395
+ logs.clear()
396
+
397
+ extensions = (".mp3", ".wav", ".flac")
398
+
399
+ for audio_files in os.listdir(path_input):
400
+ if audio_files.endswith(extensions):
401
+ found_files.append(audio_files)
402
+ total_files = len(found_files)
403
+
404
+ if total_files == 0:
405
+ logs.append("No valid audio files.")
406
+ yield "\n".join(logs)
407
+ else:
408
+ logs.append(f"{total_files} audio files found")
409
+ found_files.sort()
410
+
411
+ for audio_files in found_files:
412
+ file_path = os.path.join(path_input, audio_files)
413
+ prompt = ["audio-separator", file_path, "-m", f"{model}", f"--output_dir={path_output}", f"--output_format={output_format}", "--normalization=0.9", f"--demucs_shifts={shifts}", f"--demucs_overlap={overlap}"]
414
+
415
+ logs.append(f"Processing file: {audio_files}")
416
+ yield "\n".join(logs)
417
+ subprocess.run(prompt)
418
+ logs.append(f"File: {audio_files} processed!")
419
+ yield "\n".join(logs)
420
+
421
+
422
+ css = """
423
+ .title { font-size: 3em; align-items: center; text-align: center; }
424
+ .info { align-items: center; text-align: center; }
425
+ .block.result { margin: 1em 0; padding: 1em; box-shadow: 0 0 3px 3px #664422, 0 0 3px 2px #664422 inset; border-radius: 6px; background: #665544; }
426
+ """
427
+
428
+
429
+ with gr.Blocks(theme="NoCrypt/miku@1.2.2", fill_width=True, css=css, title="🎵 UVR5 UI 🎵") as app:
430
+ gr.Markdown("<h1><center> 🎵 UVR5 UI 🎵 </center></h1>")
431
+ gr.Markdown("If you liked this HF Space you can give me a ❤️")
432
+ gr.Markdown("Try UVR5 UI using Colab [here](https://colab.research.google.com/github/Eddycrack864/UVR5-UI/blob/main/UVR_UI.ipynb)")
433
+ with gr.Tabs():
434
+ with gr.Tab("BS/Mel Roformer"):
435
+ with gr.Row():
436
+ roformer_model = gr.Dropdown(
437
+ label = "Select the Model",
438
+ choices=list(roformer_models.keys()),
439
+ interactive = True
440
+ )
441
+ roformer_output_format = gr.Dropdown(
442
+ label = "Select the Output Format",
443
+ choices = output_format,
444
+ interactive = True
445
+ )
446
+ with gr.Row():
447
+ roformer_overlap = gr.Slider(
448
+ minimum = 2,
449
+ maximum = 4,
450
+ step = 1,
451
+ label = "Overlap",
452
+ info = "Amount of overlap between prediction windows.",
453
+ value = 4,
454
+ interactive = True
455
+ )
456
+ roformer_segment_size = gr.Slider(
457
+ minimum = 32,
458
+ maximum = 4000,
459
+ step = 32,
460
+ label = "Segment Size",
461
+ info = "Larger consumes more resources, but may give better results.",
462
+ value = 256,
463
+ interactive = True
464
+ )
465
+ with gr.Row():
466
+ roformer_audio = gr.Audio(
467
+ label = "Input Audio",
468
+ type = "numpy",
469
+ interactive = True
470
+ )
471
+ with gr.Accordion("Separation by Link", open = False):
472
+ with gr.Row():
473
+ roformer_link = gr.Textbox(
474
+ label = "Link",
475
+ placeholder = "Paste the link here",
476
+ interactive = True
477
+ )
478
+ with gr.Row():
479
+ gr.Markdown("You can paste the link to the video/audio from many sites, check the complete list [here](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md)")
480
+ with gr.Row():
481
+ roformer_download_button = gr.Button(
482
+ "Download!",
483
+ variant = "primary"
484
+ )
485
+
486
+ roformer_download_button.click(download_audio, [roformer_link], [roformer_audio])
487
+
488
+ with gr.Accordion("Batch Separation", open = False):
489
+ with gr.Row():
490
+ roformer_input_path = gr.Textbox(
491
+ label = "Input Path",
492
+ placeholder = "Place the input path here",
493
+ interactive = True
494
+ )
495
+ roformer_output_path = gr.Textbox(
496
+ label = "Output Path",
497
+ placeholder = "Place the output path here",
498
+ interactive = True
499
+ )
500
+ with gr.Row():
501
+ roformer_bath_button = gr.Button("Separate!", variant = "primary")
502
+ with gr.Row():
503
+ roformer_info = gr.Textbox(
504
+ label = "Output Information",
505
+ interactive = False
506
+ )
507
+
508
+ roformer_bath_button.click(roformer_batch, [roformer_input_path, roformer_output_path, roformer_model, roformer_output_format, roformer_overlap, roformer_segment_size], [roformer_info])
509
+
510
+ with gr.Row():
511
+ roformer_button = gr.Button("Separate!", variant = "primary")
512
+ with gr.Row():
513
+ roformer_stem1 = gr.Audio(
514
+ show_download_button = True,
515
+ interactive = False,
516
+ label = "Stem 1",
517
+ type = "filepath"
518
+ )
519
+ roformer_stem2 = gr.Audio(
520
+ show_download_button = True,
521
+ interactive = False,
522
+ label = "Stem 2",
523
+ type = "filepath"
524
+ )
525
+
526
+ roformer_button.click(roformer_separator, [roformer_audio, roformer_model, roformer_output_format, roformer_overlap, roformer_segment_size], [roformer_stem1, roformer_stem2])
527
+
528
+ with gr.Tab("MDX23C"):
529
+ with gr.Row():
530
+ mdx23c_model = gr.Dropdown(
531
+ label = "Select the Model",
532
+ choices = mdx23c_models,
533
+ interactive = True
534
+ )
535
+ mdx23c_output_format = gr.Dropdown(
536
+ label = "Select the Output Format",
537
+ choices = output_format,
538
+ interactive = True
539
+ )
540
+ with gr.Row():
541
+ mdx23c_segment_size = gr.Slider(
542
+ minimum = 32,
543
+ maximum = 4000,
544
+ step = 32,
545
+ label = "Segment Size",
546
+ info = "Larger consumes more resources, but may give better results.",
547
+ value = 256,
548
+ interactive = True
549
+ )
550
+ mdx23c_overlap = gr.Slider(
551
+ minimum = 2,
552
+ maximum = 50,
553
+ step = 1,
554
+ label = "Overlap",
555
+ info = "Amount of overlap between prediction windows.",
556
+ value = 8,
557
+ interactive = True
558
+ )
559
+ mdx23c_denoise = gr.Checkbox(
560
+ label = "Denoise",
561
+ info = "Enable denoising during separation.",
562
+ value = False,
563
+ interactive = True
564
+ )
565
+ with gr.Row():
566
+ mdx23c_audio = gr.Audio(
567
+ label = "Input Audio",
568
+ type = "numpy",
569
+ interactive = True
570
+ )
571
+ with gr.Accordion("Separation by Link", open = False):
572
+ with gr.Row():
573
+ mdx23c_link = gr.Textbox(
574
+ label = "Link",
575
+ placeholder = "Paste the link here",
576
+ interactive = True
577
+ )
578
+ with gr.Row():
579
+ gr.Markdown("You can paste the link to the video/audio from many sites, check the complete list [here](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md)")
580
+ with gr.Row():
581
+ mdx23c_download_button = gr.Button(
582
+ "Download!",
583
+ variant = "primary"
584
+ )
585
+
586
+ mdx23c_download_button.click(download_audio, [mdx23c_link], [mdx23c_audio])
587
+
588
+ with gr.Accordion("Batch Separation", open = False):
589
+ with gr.Row():
590
+ mdx23c_input_path = gr.Textbox(
591
+ label = "Input Path",
592
+ placeholder = "Place the input path here",
593
+ interactive = True
594
+ )
595
+ mdx23c_output_path = gr.Textbox(
596
+ label = "Output Path",
597
+ placeholder = "Place the output path here",
598
+ interactive = True
599
+ )
600
+ with gr.Row():
601
+ mdx23c_bath_button = gr.Button("Separate!", variant = "primary")
602
+ with gr.Row():
603
+ mdx23c_info = gr.Textbox(
604
+ label = "Output Information",
605
+ interactive = False
606
+ )
607
+
608
+ mdx23c_bath_button.click(mdx23c_batch, [mdx23c_input_path, mdx23c_output_path, mdx23c_model, mdx23c_output_format, mdx23c_overlap, mdx23c_segment_size, mdx23c_denoise], [mdx23c_info])
609
+
610
+ with gr.Row():
611
+ mdx23c_button = gr.Button("Separate!", variant = "primary")
612
+ with gr.Row():
613
+ mdx23c_stem1 = gr.Audio(
614
+ show_download_button = True,
615
+ interactive = False,
616
+ label = "Stem 1",
617
+ type = "filepath"
618
+ )
619
+ mdx23c_stem2 = gr.Audio(
620
+ show_download_button = True,
621
+ interactive = False,
622
+ label = "Stem 2",
623
+ type = "filepath"
624
+ )
625
+
626
+ mdx23c_button.click(mdxc_separator, [mdx23c_audio, mdx23c_model, mdx23c_output_format, mdx23c_segment_size, mdx23c_overlap, mdx23c_denoise], [mdx23c_stem1, mdx23c_stem2])
627
+
628
+ with gr.Tab("MDX-NET"):
629
+ with gr.Row():
630
+ mdxnet_model = gr.Dropdown(
631
+ label = "Select the Model",
632
+ choices = mdxnet_models,
633
+ interactive = True
634
+ )
635
+ mdxnet_output_format = gr.Dropdown(
636
+ label = "Select the Output Format",
637
+ choices = output_format,
638
+ interactive = True
639
+ )
640
+ with gr.Row():
641
+ mdxnet_segment_size = gr.Slider(
642
+ minimum = 32,
643
+ maximum = 4000,
644
+ step = 32,
645
+ label = "Segment Size",
646
+ info = "Larger consumes more resources, but may give better results.",
647
+ value = 256,
648
+ interactive = True
649
+ )
650
+ mdxnet_overlap = gr.Dropdown(
651
+ label = "Overlap",
652
+ choices = mdxnet_overlap_values,
653
+ value = mdxnet_overlap_values[0],
654
+ interactive = True
655
+ )
656
+ mdxnet_denoise = gr.Checkbox(
657
+ label = "Denoise",
658
+ info = "Enable denoising during separation.",
659
+ value = True,
660
+ interactive = True
661
+ )
662
+ with gr.Row():
663
+ mdxnet_audio = gr.Audio(
664
+ label = "Input Audio",
665
+ type = "numpy",
666
+ interactive = True
667
+ )
668
+ with gr.Accordion("Separation by Link", open = False):
669
+ with gr.Row():
670
+ mdxnet_link = gr.Textbox(
671
+ label = "Link",
672
+ placeholder = "Paste the link here",
673
+ interactive = True
674
+ )
675
+ with gr.Row():
676
+ gr.Markdown("You can paste the link to the video/audio from many sites, check the complete list [here](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md)")
677
+ with gr.Row():
678
+ mdxnet_download_button = gr.Button(
679
+ "Download!",
680
+ variant = "primary"
681
+ )
682
+
683
+ mdxnet_download_button.click(download_audio, [mdxnet_link], [mdxnet_audio])
684
+
685
+ with gr.Accordion("Batch Separation", open = False):
686
+ with gr.Row():
687
+ mdxnet_input_path = gr.Textbox(
688
+ label = "Input Path",
689
+ placeholder = "Place the input path here",
690
+ interactive = True
691
+ )
692
+ mdxnet_output_path = gr.Textbox(
693
+ label = "Output Path",
694
+ placeholder = "Place the output path here",
695
+ interactive = True
696
+ )
697
+ with gr.Row():
698
+ mdxnet_bath_button = gr.Button("Separate!", variant = "primary")
699
+ with gr.Row():
700
+ mdxnet_info = gr.Textbox(
701
+ label = "Output Information",
702
+ interactive = False
703
+ )
704
+
705
+ mdxnet_bath_button.click(mdxnet_batch, [mdxnet_input_path, mdxnet_output_path, mdxnet_model, mdxnet_output_format, mdxnet_overlap, mdxnet_segment_size, mdxnet_denoise], [mdxnet_info])
706
+
707
+ with gr.Row():
708
+ mdxnet_button = gr.Button("Separate!", variant = "primary")
709
+ with gr.Row():
710
+ mdxnet_stem1 = gr.Audio(
711
+ show_download_button = True,
712
+ interactive = False,
713
+ label = "Stem 1",
714
+ type = "filepath"
715
+ )
716
+ mdxnet_stem2 = gr.Audio(
717
+ show_download_button = True,
718
+ interactive = False,
719
+ label = "Stem 2",
720
+ type = "filepath"
721
+ )
722
+
723
+ mdxnet_button.click(mdxnet_separator, [mdxnet_audio, mdxnet_model, mdxnet_output_format, mdxnet_segment_size, mdxnet_overlap, mdxnet_denoise], [mdxnet_stem1, mdxnet_stem2])
724
+
725
+ with gr.TabItem("VR ARCH"):
726
+ with gr.Row():
727
+ vrarch_model = gr.Dropdown(
728
+ label = "Select the Model",
729
+ choices = vrarch_models,
730
+ interactive = True
731
+ )
732
+ vrarch_output_format = gr.Dropdown(
733
+ label = "Select the Output Format",
734
+ choices = output_format,
735
+ interactive = True
736
+ )
737
+ with gr.Row():
738
+ vrarch_window_size = gr.Dropdown(
739
+ label = "Window Size",
740
+ choices = vrarch_window_size_values,
741
+ value = vrarch_window_size_values[0],
742
+ interactive = True
743
+ )
744
+ vrarch_agression = gr.Slider(
745
+ minimum = 1,
746
+ maximum = 50,
747
+ step = 1,
748
+ label = "Agression",
749
+ info = "Intensity of primary stem extraction.",
750
+ value = 5,
751
+ interactive = True
752
+ )
753
+ vrarch_tta = gr.Checkbox(
754
+ label = "TTA",
755
+ info = "Enable Test-Time-Augmentation; slow but improves quality.",
756
+ value = True,
757
+ visible = True,
758
+ interactive = True,
759
+ )
760
+ vrarch_high_end_process = gr.Checkbox(
761
+ label = "High End Process",
762
+ info = "Mirror the missing frequency range of the output.",
763
+ value = False,
764
+ visible = True,
765
+ interactive = True,
766
+ )
767
+ with gr.Row():
768
+ vrarch_audio = gr.Audio(
769
+ label = "Input Audio",
770
+ type = "numpy",
771
+ interactive = True
772
+ )
773
+ with gr.Accordion("Separation by Link", open = False):
774
+ with gr.Row():
775
+ vrarch_link = gr.Textbox(
776
+ label = "Link",
777
+ placeholder = "Paste the link here",
778
+ interactive = True
779
+ )
780
+ with gr.Row():
781
+ gr.Markdown("You can paste the link to the video/audio from many sites, check the complete list [here](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md)")
782
+ with gr.Row():
783
+ vrarch_download_button = gr.Button(
784
+ "Download!",
785
+ variant = "primary"
786
+ )
787
+
788
+ vrarch_download_button.click(download_audio, [vrarch_link], [vrarch_audio])
789
+
790
+ with gr.Accordion("Batch Separation", open = False):
791
+ with gr.Row():
792
+ vrarch_input_path = gr.Textbox(
793
+ label = "Input Path",
794
+ placeholder = "Place the input path here",
795
+ interactive = True
796
+ )
797
+ vrarch_output_path = gr.Textbox(
798
+ label = "Output Path",
799
+ placeholder = "Place the output path here",
800
+ interactive = True
801
+ )
802
+ with gr.Row():
803
+ vrarch_bath_button = gr.Button("Separate!", variant = "primary")
804
+ with gr.Row():
805
+ vrarch_info = gr.Textbox(
806
+ label = "Output Information",
807
+ interactive = False
808
+ )
809
+
810
+ vrarch_bath_button.click(vrarch_batch, [vrarch_input_path, vrarch_output_path, vrarch_model, vrarch_output_format, vrarch_window_size, vrarch_agression, vrarch_tta, vrarch_high_end_process], [vrarch_info])
811
+
812
+ with gr.Row():
813
+ vrarch_button = gr.Button("Separate!", variant = "primary")
814
+ with gr.Row():
815
+ vrarch_stem1 = gr.Audio(
816
+ show_download_button = True,
817
+ interactive = False,
818
+ type = "filepath",
819
+ label = "Stem 1"
820
+ )
821
+ vrarch_stem2 = gr.Audio(
822
+ show_download_button = True,
823
+ interactive = False,
824
+ type = "filepath",
825
+ label = "Stem 2"
826
+ )
827
+
828
+ vrarch_button.click(vrarch_separator, [vrarch_audio, vrarch_model, vrarch_output_format, vrarch_window_size, vrarch_agression, vrarch_tta, vrarch_high_end_process], [vrarch_stem1, vrarch_stem2])
829
+
830
+ with gr.Tab("Demucs"):
831
+ with gr.Row():
832
+ demucs_model = gr.Dropdown(
833
+ label = "Select the Model",
834
+ choices = demucs_models,
835
+ interactive = True
836
+ )
837
+ demucs_output_format = gr.Dropdown(
838
+ label = "Select the Output Format",
839
+ choices = output_format,
840
+ interactive = True
841
+ )
842
+ with gr.Row():
843
+ demucs_shifts = gr.Slider(
844
+ minimum = 1,
845
+ maximum = 20,
846
+ step = 1,
847
+ label = "Shifts",
848
+ info = "Number of predictions with random shifts, higher = slower but better quality.",
849
+ value = 2,
850
+ interactive = True
851
+ )
852
+ demucs_overlap = gr.Dropdown(
853
+ label = "Overlap",
854
+ choices = demucs_overlap_values,
855
+ value = demucs_overlap_values[0],
856
+ interactive = True
857
+ )
858
+ with gr.Row():
859
+ demucs_audio = gr.Audio(
860
+ label = "Input Audio",
861
+ type = "numpy",
862
+ interactive = True
863
+ )
864
+ with gr.Accordion("Separation by Link", open = False):
865
+ with gr.Row():
866
+ demucs_link = gr.Textbox(
867
+ label = "Link",
868
+ placeholder = "Paste the link here",
869
+ interactive = True
870
+ )
871
+ with gr.Row():
872
+ gr.Markdown("You can paste the link to the video/audio from many sites, check the complete list [here](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md)")
873
+ with gr.Row():
874
+ demucs_download_button = gr.Button(
875
+ "Download!",
876
+ variant = "primary"
877
+ )
878
+
879
+ demucs_download_button.click(download_audio, [demucs_link], [demucs_audio])
880
+
881
+ with gr.Accordion("Batch Separation", open = False):
882
+ with gr.Row():
883
+ demucs_input_path = gr.Textbox(
884
+ label = "Input Path",
885
+ placeholder = "Place the input path here",
886
+ interactive = True
887
+ )
888
+ demucs_output_path = gr.Textbox(
889
+ label = "Output Path",
890
+ placeholder = "Place the output path here",
891
+ interactive = True
892
+ )
893
+ with gr.Row():
894
+ demucs_bath_button = gr.Button("Separate!", variant = "primary")
895
+ with gr.Row():
896
+ demucs_info = gr.Textbox(
897
+ label = "Output Information",
898
+ interactive = False
899
+ )
900
+
901
+ demucs_bath_button.click(demucs_batch, [demucs_input_path, demucs_output_path, demucs_model, demucs_output_format, demucs_shifts, demucs_overlap], [demucs_info])
902
+
903
+ with gr.Row():
904
+ demucs_button = gr.Button("Separate!", variant = "primary")
905
+ with gr.Row():
906
+ demucs_stem1 = gr.Audio(
907
+ show_download_button = True,
908
+ interactive = False,
909
+ type = "filepath",
910
+ label = "Stem 1"
911
+ )
912
+ demucs_stem2 = gr.Audio(
913
+ show_download_button = True,
914
+ interactive = False,
915
+ type = "filepath",
916
+ label = "Stem 2"
917
+ )
918
+ with gr.Row():
919
+ demucs_stem3 = gr.Audio(
920
+ show_download_button = True,
921
+ interactive = False,
922
+ type = "filepath",
923
+ label = "Stem 3"
924
+ )
925
+ demucs_stem4 = gr.Audio(
926
+ show_download_button = True,
927
+ interactive = False,
928
+ type = "filepath",
929
+ label = "Stem 4"
930
+ )
931
+
932
+ demucs_button.click(demucs_separator, [demucs_audio, demucs_model, demucs_output_format, demucs_shifts, demucs_overlap], [demucs_stem1, demucs_stem2, demucs_stem3, demucs_stem4])
933
+
934
+ with gr.Tab("Credits"):
935
+ gr.Markdown(
936
+ """
937
+ UVR5 UI created by **[Eddycrack 864](https://github.com/Eddycrack864).** Join **[AI HUB](https://discord.gg/aihub)** community.
938
+ * python-audio-separator by [beveradb](https://github.com/beveradb).
939
+ * Special thanks to [Ilaria](https://github.com/TheStingerX) for hosting this space and help.
940
+ * Thanks to [Mikus](https://github.com/cappuch) for the help with the code.
941
+ * Thanks to [Nick088](https://huggingface.co/Nick088) for the help to fix roformers.
942
+ * Thanks to [yt_dlp](https://github.com/yt-dlp/yt-dlp) devs.
943
+ * Separation by link source code and improvements by [Blane187](https://huggingface.co/Blane187).
944
+
945
+ You can donate to the original UVR5 project here:
946
+ [!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://www.buymeacoffee.com/uvr5)
947
+ """
948
+ )
949
+
950
+ app.queue()
951
+ app.launch()
requirements (1).txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ numpy==1.25.2
2
+ audio-separator[cpu]==0.17.5
3
+ scipy
4
+ yt_dlp