aliceoq commited on
Commit
f40dace
1 Parent(s): 839be56

separate vocals, add progress bar, show selected audio

Browse files
Files changed (3) hide show
  1. app.py +88 -49
  2. audios/pica_pau_bolo_de_murango.m4a +0 -0
  3. requirements.txt +1 -0
app.py CHANGED
@@ -2,7 +2,7 @@ import subprocess, torch, os, traceback, sys, warnings, shutil, numpy as np
2
  from mega import Mega
3
  os.environ["no_proxy"] = "localhost, 127.0.0.1, ::1"
4
  import threading
5
- from time import sleep
6
  from subprocess import Popen
7
  import datetime, requests
8
  now_dir = os.getcwd()
@@ -31,6 +31,8 @@ from vc_infer_pipeline import VC
31
  from config import Config
32
 
33
  from utils import load_audio, CSVutil
 
 
34
 
35
  DoFormant = False
36
  Quefrency = 1.0
@@ -201,17 +203,33 @@ for root, dirs, files in os.walk(index_root, topdown=False):
201
  index_paths.append("%s/%s" % (root, name))
202
 
203
  def vc_single(
204
- input_audio_path,
 
 
205
  ):
 
 
 
206
  global tgt_sr, net_g, vc, hubert_model, version
207
  if input_audio_path is None:
208
  return "You need to upload an audio", None
209
  try:
 
 
 
 
 
 
 
 
 
 
 
210
  audio = load_audio(input_audio_path, 16000, DoFormant, Quefrency, Timbre)
211
  audio_max = np.abs(audio).max() / 0.95
212
  if audio_max > 1:
213
  audio /= audio_max
214
- times = [0, 0, 0]
215
  if hubert_model == None:
216
  load_hubert()
217
  if_f0 = cpt.get("f0", 1)
@@ -226,6 +244,7 @@ def vc_single(
226
  .replace("trained", "added")
227
  )
228
  )
 
229
  audio_opt = vc.pipeline(
230
  hubert_model,
231
  net_g,
@@ -245,21 +264,22 @@ def vc_single(
245
  version,
246
  protect,
247
  crepe_hop_length,
 
248
  f0_file=None,
249
  )
 
250
  if resample_sr >= 16000 and tgt_sr != resample_sr:
251
  tgt_sr = resample_sr
252
- index_info = (
253
- "Using index:%s." % file_index
254
- if os.path.exists(file_index)
255
- else "Index not used."
256
- )
257
- return "Success.\n %s\nTime:\n npy:%ss, f0:%ss, infer:%ss" % (
258
- index_info,
259
- times[0],
260
- times[1],
261
- times[2],
262
- ), (tgt_sr, audio_opt)
263
  except:
264
  info = traceback.format_exc()
265
  print(info)
@@ -269,13 +289,12 @@ def get_vc(sid):
269
  global n_spk, tgt_sr, net_g, vc, cpt, version
270
  if sid == "" or sid == []:
271
  global hubert_model
272
- if hubert_model != None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的
273
  print("clean_empty_cache")
274
  del net_g, n_spk, vc, hubert_model, tgt_sr # ,cpt
275
  hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None
276
  if torch.cuda.is_available():
277
  torch.cuda.empty_cache()
278
- ###楼下不这么折腾清理不干净
279
  if_f0 = cpt.get("f0", 1)
280
  version = cpt.get("version", "v1")
281
  if version == "v1":
@@ -379,30 +398,6 @@ def save_to_wav2(dropbox):
379
  file_path=dropbox.name
380
  shutil.move(file_path,'./audios')
381
  return os.path.join('./audios',os.path.basename(file_path))
382
-
383
-
384
- def match_index(sid0):
385
- folder=sid0.split(".")[0]
386
- parent_dir="./logs/"+folder
387
- if os.path.exists(parent_dir):
388
- for filename in os.listdir(parent_dir):
389
- if filename.endswith(".index"):
390
- index_path=os.path.join(parent_dir,filename)
391
- return index_path
392
- else:
393
- return ''
394
-
395
-
396
- def match_index(sid0):
397
- folder=sid0.split(".")[0]
398
- parent_dir="./logs/"+folder
399
- if os.path.exists(parent_dir):
400
- for filename in os.listdir(parent_dir):
401
- if filename.endswith(".index"):
402
- index_path=os.path.join(parent_dir,filename)
403
- return index_path
404
- else:
405
- return ''
406
 
407
  def check_for_name():
408
  if len(names) > 0:
@@ -457,9 +452,52 @@ def download_from_youtube(url):
457
  pass
458
  filename = subprocess.getoutput(f'yt-dlp --print filename {url} --format m4a -o "./audios/%(title)s.%(ext)s"')
459
  subprocess.getoutput(f'yt-dlp {url} --format m4a -o "./audios/%(title)s.%(ext)s"')
460
- if os.path.exists(filename[1:]):
461
  return filename
462
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
463
  css = """
464
  .padding {padding-left: 15px; padding-top: 5px;}
465
  """
@@ -476,7 +514,7 @@ with gr.Blocks(theme = gr.themes.Base(), title="Vocais da Loirinha 👱🏻‍
476
  with gr.Row().style(equal_height=True):
477
  with gr.Column():
478
  with gr.Row():
479
- model_dropdown = gr.Dropdown(label="1. Escolha a voz:", choices=sorted(names), value=check_for_name())
480
  if check_for_name() != '':
481
  get_vc(sorted(names)[0])
482
  model_dropdown.change(
@@ -488,12 +526,12 @@ with gr.Blocks(theme = gr.themes.Base(), title="Vocais da Loirinha 👱🏻‍
488
  yt_link_textbox = gr.Textbox(label="Insira um link para uma música no Youtube:")
489
  download_yt_button = gr.Button("Baixar áudio do vídeo")
490
  dropbox = gr.File(label="OU selecione um arquivo:")
491
- record_button = gr.Audio(source="microphone", label="OR grave o áudio:", type="filepath")
492
 
493
  with gr.Column():
494
  with gr.Row():
495
  audio_dropdown = gr.Dropdown(
496
- label="3. Escolha o áudio",
497
  value="",
498
  choices=audio_files,
499
  scale=1
@@ -507,16 +545,17 @@ with gr.Blocks(theme = gr.themes.Base(), title="Vocais da Loirinha 👱🏻‍
507
  record_button.change(fn=change_choices2, inputs=[], outputs=[audio_dropdown])
508
  refresh_button.click(fn=update_dropdowns, inputs=[], outputs=[model_dropdown, audio_dropdown])
509
  selected_audio = gr.Audio(label="Áudio selecionado", interactive=False)
 
510
  separate_checkbox = gr.Checkbox(label="Separar vocais e instrumental",
511
- info="Se os vocais não estiverem isolados no áudio selecionado, ative esta opção. Os vocais serão extraídos durante a conversão e depois reintegrados ao áudio final com os instrumentais.")
512
- convert_button = gr.Button("Convert", variant="primary")
513
  output_audio = gr.Audio(
514
- label="Output Audio (Click on the Three Dots in the Right Corner to Download)",
515
  type='filepath',
516
  interactive=False,
517
  )
518
- output_audio_textbox = gr.Textbox(label="Resultado", interactive=False, placeholder="Nenhum áudio gerado.")
519
- convert_button.click(vc_single, [audio_dropdown], [output_audio_textbox, output_audio])
520
 
521
  with gr.TabItem("Adicione uma voz"):
522
  with gr.Column():
 
2
  from mega import Mega
3
  os.environ["no_proxy"] = "localhost, 127.0.0.1, ::1"
4
  import threading
5
+ from time import time
6
  from subprocess import Popen
7
  import datetime, requests
8
  now_dir = os.getcwd()
 
31
  from config import Config
32
 
33
  from utils import load_audio, CSVutil
34
+ import demucs.separate
35
+ import audiosegment
36
 
37
  DoFormant = False
38
  Quefrency = 1.0
 
203
  index_paths.append("%s/%s" % (root, name))
204
 
205
  def vc_single(
206
+ input_audio,
207
+ separate_vocals_bool,
208
+ progress = gr.Progress()
209
  ):
210
+ progress(0, desc="Preparando áudio...")
211
+ overlay_audios_bool = False
212
+ input_audio_path = input_audio
213
  global tgt_sr, net_g, vc, hubert_model, version
214
  if input_audio_path is None:
215
  return "You need to upload an audio", None
216
  try:
217
+ t1 = 0
218
+ t2 = 0
219
+ if (separate_vocals_bool):
220
+ t1 = time()
221
+ progress(0.1, desc="Separando vocais...")
222
+ path_to_separated_vocals = separate_vocals(input_audio_path)
223
+ if (path_to_separated_vocals):
224
+ input_audio_path = path_to_separated_vocals
225
+ overlay_audios_bool = True
226
+ t2 = time()
227
+ progress(0.2, desc="Carregando áudio...")
228
  audio = load_audio(input_audio_path, 16000, DoFormant, Quefrency, Timbre)
229
  audio_max = np.abs(audio).max() / 0.95
230
  if audio_max > 1:
231
  audio /= audio_max
232
+ times = [0, 0, 0, t2 - t1, 0]
233
  if hubert_model == None:
234
  load_hubert()
235
  if_f0 = cpt.get("f0", 1)
 
244
  .replace("trained", "added")
245
  )
246
  )
247
+ progress(0.3, desc="Gerando áudio...")
248
  audio_opt = vc.pipeline(
249
  hubert_model,
250
  net_g,
 
264
  version,
265
  protect,
266
  crepe_hop_length,
267
+ progress,
268
  f0_file=None,
269
  )
270
+ progress(0.8, desc="Áudio convertido...")
271
  if resample_sr >= 16000 and tgt_sr != resample_sr:
272
  tgt_sr = resample_sr
273
+ if (overlay_audios_bool):
274
+ t1 = time()
275
+ progress(0.9, desc="Juntando vocal e instrumental...")
276
+ (tgt_sr, audio_opt) = overlay_audios(tgt_sr, audio_opt, input_audio_path.replace("vocals", "no_vocals"))
277
+ remove_separated_files(input_audio_path)
278
+ t2 = time()
279
+ times[4] = t2 - t1
280
+ return {"visible": True, "__type__": "update", "value": "Áudio convertido com sucesso!\nTempo: %1fs" % (
281
+ sum(times),
282
+ )}, (tgt_sr, audio_opt)
 
283
  except:
284
  info = traceback.format_exc()
285
  print(info)
 
289
  global n_spk, tgt_sr, net_g, vc, cpt, version
290
  if sid == "" or sid == []:
291
  global hubert_model
292
+ if hubert_model != None:
293
  print("clean_empty_cache")
294
  del net_g, n_spk, vc, hubert_model, tgt_sr # ,cpt
295
  hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None
296
  if torch.cuda.is_available():
297
  torch.cuda.empty_cache()
 
298
  if_f0 = cpt.get("f0", 1)
299
  version = cpt.get("version", "v1")
300
  if version == "v1":
 
398
  file_path=dropbox.name
399
  shutil.move(file_path,'./audios')
400
  return os.path.join('./audios',os.path.basename(file_path))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
401
 
402
  def check_for_name():
403
  if len(names) > 0:
 
452
  pass
453
  filename = subprocess.getoutput(f'yt-dlp --print filename {url} --format m4a -o "./audios/%(title)s.%(ext)s"')
454
  subprocess.getoutput(f'yt-dlp {url} --format m4a -o "./audios/%(title)s.%(ext)s"')
455
+ if os.path.exists(filename):
456
  return filename
457
 
458
+ def find_vocals(root_directory, target_folder_name, file_name='vocals.wav'):
459
+ for root, dirs, files in os.walk(root_directory):
460
+ if target_folder_name in dirs:
461
+ folder_path = os.path.join(root, target_folder_name)
462
+ vocals_path = os.path.join(folder_path, file_name)
463
+ if os.path.exists(vocals_path):
464
+ return vocals_path
465
+ return None
466
+
467
+ def separate_vocals(audio_path):
468
+ audio_name = audio_path[9:-4]
469
+ if (os.path.exists(audio_path) and audio_name):
470
+ demucs.separate.main(["--two-stems", "vocals", audio_path, "-o", './audios'])
471
+ vocals_path = find_vocals('./audios', audio_name)
472
+ if vocals_path:
473
+ return vocals_path
474
+ return None
475
+
476
+ # aqui ainda não tá 100%
477
+ def overlay_audios(sample_rate, np_array, accompaniment_path):
478
+ if (not os.path.exists(accompaniment_path)):
479
+ return (sample_rate, np_array)
480
+ sound1 = audiosegment.from_numpy_array(np_array, sample_rate)
481
+ sound2 = audiosegment.from_file(accompaniment_path)
482
+ overlay = sound1.overlay(sound2, position=0)
483
+ return (overlay.frame_rate, overlay.to_numpy_array())
484
+
485
+ def remove_separated_files(vocals_path):
486
+ parent_dir = os.path.dirname(vocals_path)
487
+ try:
488
+ shutil.rmtree(parent_dir)
489
+ print(f"Deleted {parent_dir} folder and its contents")
490
+ except FileNotFoundError:
491
+ print(f"{parent_dir} folder not found")
492
+ except Exception as e:
493
+ print(f"An error occurred: {str(e)}")
494
+
495
+ def hide_output_text():
496
+ return {"visible": False, "__type__": "update", "value": ""}
497
+
498
+ def show_selected_audio(input_audio_path):
499
+ return input_audio_path
500
+
501
  css = """
502
  .padding {padding-left: 15px; padding-top: 5px;}
503
  """
 
514
  with gr.Row().style(equal_height=True):
515
  with gr.Column():
516
  with gr.Row():
517
+ model_dropdown = gr.Dropdown(label="1. Selecione a voz:", choices=sorted(names), value=check_for_name())
518
  if check_for_name() != '':
519
  get_vc(sorted(names)[0])
520
  model_dropdown.change(
 
526
  yt_link_textbox = gr.Textbox(label="Insira um link para uma música no Youtube:")
527
  download_yt_button = gr.Button("Baixar áudio do vídeo")
528
  dropbox = gr.File(label="OU selecione um arquivo:")
529
+ record_button = gr.Audio(source="microphone", label="OU grave o áudio:", type="filepath")
530
 
531
  with gr.Column():
532
  with gr.Row():
533
  audio_dropdown = gr.Dropdown(
534
+ label="3. Selecione o áudio",
535
  value="",
536
  choices=audio_files,
537
  scale=1
 
545
  record_button.change(fn=change_choices2, inputs=[], outputs=[audio_dropdown])
546
  refresh_button.click(fn=update_dropdowns, inputs=[], outputs=[model_dropdown, audio_dropdown])
547
  selected_audio = gr.Audio(label="Áudio selecionado", interactive=False)
548
+ audio_dropdown.select(show_selected_audio, inputs=[audio_dropdown], outputs=[selected_audio])
549
  separate_checkbox = gr.Checkbox(label="Separar vocais e instrumental",
550
+ info="Marque esta opção quando o áudio selecionado NÃO tiver a voz isolada. Os vocais serão extraídos para a conversão e depois reintegrados ao áudio final com os instrumentais. ⚠️ O tempo de conversão pode aumentar significamente com essa opção ativada.")
551
+ convert_button = gr.Button("Gerar áudio", variant="primary")
552
  output_audio = gr.Audio(
553
+ label="Áudio convertido (Clique nos três pontos para fazer o download)",
554
  type='filepath',
555
  interactive=False,
556
  )
557
+ output_audio_textbox = gr.Textbox(label="Resultado", interactive=False, visible=True, placeholder="Nenhum áudio gerado.")
558
+ convert_button.click(hide_output_text, outputs=[output_audio_textbox]).then(vc_single, [audio_dropdown, separate_checkbox], [output_audio_textbox, output_audio])
559
 
560
  with gr.TabItem("Adicione uma voz"):
561
  with gr.Column():
audios/pica_pau_bolo_de_murango.m4a DELETED
Binary file (64 kB)
 
requirements.txt CHANGED
@@ -21,3 +21,4 @@ onnxruntime
21
  pyngrok==4.1.12
22
  torch
23
  yt-dlp==2023.07.06
 
 
21
  pyngrok==4.1.12
22
  torch
23
  yt-dlp==2023.07.06
24
+ audiosegment==0.23.0