ArkanDash commited on
Commit
e9edf68
1 Parent(s): 65c00af

feat(gcolab): youtube feature

Browse files
Files changed (1) hide show
  1. app.py +54 -1
app.py CHANGED
@@ -9,6 +9,12 @@ import librosa
9
  import torch
10
  import asyncio
11
  import edge_tts
 
 
 
 
 
 
12
  from datetime import datetime
13
  from fairseq import checkpoint_utils
14
  from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono
@@ -78,6 +84,42 @@ def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
78
  return info, (None, None)
79
  return vc_fn
80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  def load_hubert():
82
  global hubert_model
83
  models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
@@ -142,7 +184,6 @@ if __name__ == '__main__':
142
  "![visitor badge](https://visitor-badge.glitch.me/badge?page_id=ArkanDash.Rvc-Models)\n\n"
143
  "[![image](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1hx6kKvIuv5XNY1Gai2PEuZhpO5z6xpVh?usp=sharing)\n\n"
144
  "[![Original Repo](https://badgen.net/badge/icon/github?icon=github&label=Original%20Repo)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)"
145
-
146
  )
147
  with gr.Tabs():
148
  for (name, title, author, cover, vc_fn) in models:
@@ -156,6 +197,13 @@ if __name__ == '__main__':
156
  '</div>'
157
  )
158
  with gr.Row():
 
 
 
 
 
 
 
159
  with gr.Column():
160
  if args.files:
161
  vc_input = gr.Textbox(label="Input audio path")
@@ -182,6 +230,11 @@ if __name__ == '__main__':
182
  with gr.Column():
183
  vc_output1 = gr.Textbox(label="Output Message")
184
  vc_output2 = gr.Audio(label="Output Audio")
 
 
185
  vc_submit.click(vc_fn, [vc_input, vc_transpose, vc_f0method, vc_index_ratio, tts_mode, tts_text, tts_voice], [vc_output1, vc_output2])
186
  tts_mode.change(change_to_tts_mode, [tts_mode], [vc_input, tts_text, tts_voice])
 
 
 
187
  app.queue(concurrency_count=1, max_size=20, api_open=args.api).launch(share=args.share)
 
9
  import torch
10
  import asyncio
11
  import edge_tts
12
+ import yt_dlp
13
+ import ffmpeg
14
+ import subprocess
15
+ import sys
16
+ import io
17
+ import wave
18
  from datetime import datetime
19
  from fairseq import checkpoint_utils
20
  from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono
 
84
  return info, (None, None)
85
  return vc_fn
86
 
87
+ def cut_vocal_and_inst(yt_url):
88
+ if yt_url != "":
89
+ if not os.path.exists("/content/youtube_audio"):
90
+ os.mkdir("/content/youtube_audio")
91
+ ydl_opts = {
92
+ 'format': 'bestaudio/best',
93
+ 'postprocessors': [{
94
+ 'key': 'FFmpegExtractAudio',
95
+ 'preferredcodec': 'wav',
96
+ }],
97
+ "outtmpl": '/content/youtube_audio/audio',
98
+ }
99
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
100
+ ydl.download([yt_url])
101
+ yt_audio_path = "/content/youtube_audio/audio.wav"
102
+ command = f"demucs --two-stems=vocals {yt_audio_path}"
103
+ result = subprocess.run(command.split(), stdout=subprocess.PIPE)
104
+ print(result.stdout.decode())
105
+ return ("/content/rvc-models/separated/htdemucs/audio/vocals.wav", "/content/rvc-models/separated/htdemucs/audio/no_vocals.wav", yt_audio_path, "/content/rvc-models/separated/htdemucs/audio/vocals.wav")
106
+
107
+ def combine_vocal_and_inst(audio_data):
108
+ print(audio_data)
109
+ if not os.path.exists("/content/result"):
110
+ os.mkdir("/content/result")
111
+ vocal_path = "/content/result/output.wav"
112
+ inst_path = "/content/rvc-models/separated/htdemucs/audio/no_vocals.wav"
113
+ output_path = "/content/result/combine.mp3"
114
+ with wave.open(vocal_path, "w") as wave_file:
115
+ wave_file.setnchannels(1)
116
+ wave_file.setsampwidth(2)
117
+ wave_file.setframerate(audio_data[0])
118
+ wave_file.writeframes(audio_data[1].tobytes())
119
+ command = f'ffmpeg -y -i {inst_path} -i {vocal_path} -filter_complex amix=inputs=2:duration=longest -b:a 320k -c:a libmp3lame {output_path}'
120
+ result = subprocess.run(command.split(), stdout=subprocess.PIPE)
121
+ return output_path
122
+
123
  def load_hubert():
124
  global hubert_model
125
  models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
 
184
  "![visitor badge](https://visitor-badge.glitch.me/badge?page_id=ArkanDash.Rvc-Models)\n\n"
185
  "[![image](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1hx6kKvIuv5XNY1Gai2PEuZhpO5z6xpVh?usp=sharing)\n\n"
186
  "[![Original Repo](https://badgen.net/badge/icon/github?icon=github&label=Original%20Repo)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)"
 
187
  )
188
  with gr.Tabs():
189
  for (name, title, author, cover, vc_fn) in models:
 
197
  '</div>'
198
  )
199
  with gr.Row():
200
+ if args.files:
201
+ with gr.Column():
202
+ vc_youtube = gr.Textbox(label="Youtube URL")
203
+ vc_convert = gr.Button("Convert", variant="primary")
204
+ vc_vocal_preview = gr.Audio(label="Vocal Preview")
205
+ vc_inst_preview = gr.Audio(label="Instrumental Preview")
206
+ vc_audio_preview = gr.Audio(label="Audio Preview")
207
  with gr.Column():
208
  if args.files:
209
  vc_input = gr.Textbox(label="Input audio path")
 
230
  with gr.Column():
231
  vc_output1 = gr.Textbox(label="Output Message")
232
  vc_output2 = gr.Audio(label="Output Audio")
233
+ vc_combine = gr.Button("Combine",variant="primary")
234
+ vc_outputCombine = gr.Audio(label="Output Combined Audio")
235
  vc_submit.click(vc_fn, [vc_input, vc_transpose, vc_f0method, vc_index_ratio, tts_mode, tts_text, tts_voice], [vc_output1, vc_output2])
236
  tts_mode.change(change_to_tts_mode, [tts_mode], [vc_input, tts_text, tts_voice])
237
+ if args.files:
238
+ vc_convert.click(cut_vocal_and_inst, vc_youtube, [vc_vocal_preview, vc_inst_preview, vc_audio_preview, vc_input])
239
+ vc_combine.click(combine_vocal_and_inst, vc_output2, vc_outputCombine)
240
  app.queue(concurrency_count=1, max_size=20, api_open=args.api).launch(share=args.share)