Spark808 commited on
Commit
6df2588
1 Parent(s): a388cc2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -36
app.py CHANGED
@@ -3,37 +3,46 @@ import json
3
  import argparse
4
  import traceback
5
  import logging
6
- from datetime import datetime
7
-
8
  import gradio as gr
9
  import numpy as np
10
  import librosa
11
  import torch
12
-
 
 
13
  from fairseq import checkpoint_utils
14
  from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono
15
  from vc_infer_pipeline import VC
16
- from config import is_half, device
17
-
 
 
18
  logging.getLogger("numba").setLevel(logging.WARNING)
19
 
20
-
21
  def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
22
  def vc_fn(
23
- vc_microphone,
24
- vc_transpose,
25
- vc_f0method,
26
- vc_index_ratio,
27
- vc_output1,
28
- vc_output2
29
  ):
30
  try:
31
- # Get the recorded audio from the microphone
32
- audio, sr = vc_microphone()
33
-
34
- # Your existing processing logic for audio
 
 
 
 
 
 
 
 
 
 
35
  times = [0, 0, 0]
36
- f0_up_key = int(vc_transpose)
37
  audio_opt = vc.pipeline(
38
  hubert_model,
39
  net_g,
@@ -41,27 +50,22 @@ def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
41
  audio,
42
  times,
43
  f0_up_key,
44
- vc_f0method,
45
  file_index,
46
  file_big_npy,
47
- vc_index_ratio,
48
  if_f0,
49
  )
50
-
51
  print(
52
  f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}]: npy: {times[0]}, f0: {times[1]}s, infer: {times[2]}s"
53
  )
54
- vc_output1.update("Success")
55
- vc_output2.update((tgt_sr, audio_opt))
56
- except Exception as e:
57
  info = traceback.format_exc()
58
  print(info)
59
- vc_output1.update(str(e))
60
- vc_output2.update((None, None))
61
-
62
  return vc_fn
63
 
64
-
65
  def load_hubert():
66
  global hubert_model
67
  models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
@@ -76,7 +80,6 @@ def load_hubert():
76
  hubert_model = hubert_model.float()
77
  hubert_model.eval()
78
 
79
-
80
  if __name__ == '__main__':
81
  parser = argparse.ArgumentParser()
82
  parser.add_argument('--api', action="store_true", default=False)
@@ -110,7 +113,6 @@ if __name__ == '__main__':
110
  net_g = net_g.float()
111
  vc = VC(tgt_sr, device, is_half)
112
  models.append((name, title, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index, npy)))
113
-
114
  with gr.Blocks() as app:
115
  gr.Markdown(
116
  "# <center> RVC generator\n"
@@ -123,14 +125,16 @@ if __name__ == '__main__':
123
  with gr.Row():
124
  gr.Markdown(
125
  '<div align="center">'
126
- f'<div>{title}</div>\n' +
127
- (f'<img style="width:auto;height:300px;" src="file/{cover}">' if os.path.exists(cover) else "") +
128
  '</div>'
129
  )
130
  with gr.Row():
131
  with gr.Column():
132
- # Use microphone instead of file upload
133
- vc_microphone = gr.Microphone(label="Record your voice")
 
 
134
  vc_transpose = gr.Number(label="Transpose", value=0)
135
  vc_f0method = gr.Radio(
136
  label="Pitch extraction algorithm, PM is fast but Harvest is better for low frequencies",
@@ -149,6 +153,5 @@ if __name__ == '__main__':
149
  with gr.Column():
150
  vc_output1 = gr.Textbox(label="Output Message")
151
  vc_output2 = gr.Audio(label="Output Audio")
152
-
153
- vc_submit.click(vc_fn, vc_microphone, vc_transpose, vc_f0method, vc_index_ratio, vc_output1, vc_output2)
154
- app.queue(concurrency_count=1, max_size=20, api_open=args.api).launch(share=args.share)
 
3
  import argparse
4
  import traceback
5
  import logging
 
 
6
  import gradio as gr
7
  import numpy as np
8
  import librosa
9
  import torch
10
+ import asyncio
11
+ import edge_tts
12
+ from datetime import datetime
13
  from fairseq import checkpoint_utils
14
  from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono
15
  from vc_infer_pipeline import VC
16
+ from config import (
17
+ is_half,
18
+ device
19
+ )
20
  logging.getLogger("numba").setLevel(logging.WARNING)
21
 
 
22
  def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
23
  def vc_fn(
24
+ input_audio,
25
+ f0_up_key,
26
+ f0_method,
27
+ index_rate
 
 
28
  ):
29
  try:
30
+ if args.files:
31
+ audio, sr = librosa.load(input_audio, sr=16000, mono=True)
32
+ else:
33
+ if input_audio is None:
34
+ return "You need to upload an audio", None
35
+ sampling_rate, audio = input_audio
36
+ duration = audio.shape[0] / sampling_rate
37
+ if duration > 10000000:
38
+ return "no", None
39
+ audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
40
+ if len(audio.shape) > 1:
41
+ audio = librosa.to_mono(audio.transpose(1, 0))
42
+ if sampling_rate != 16000:
43
+ audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
44
  times = [0, 0, 0]
45
+ f0_up_key = int(f0_up_key)
46
  audio_opt = vc.pipeline(
47
  hubert_model,
48
  net_g,
 
50
  audio,
51
  times,
52
  f0_up_key,
53
+ f0_method,
54
  file_index,
55
  file_big_npy,
56
+ index_rate,
57
  if_f0,
58
  )
 
59
  print(
60
  f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}]: npy: {times[0]}, f0: {times[1]}s, infer: {times[2]}s"
61
  )
62
+ return "Success", (tgt_sr, audio_opt)
63
+ except:
 
64
  info = traceback.format_exc()
65
  print(info)
66
+ return info, (None, None)
 
 
67
  return vc_fn
68
 
 
69
  def load_hubert():
70
  global hubert_model
71
  models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
 
80
  hubert_model = hubert_model.float()
81
  hubert_model.eval()
82
 
 
83
  if __name__ == '__main__':
84
  parser = argparse.ArgumentParser()
85
  parser.add_argument('--api', action="store_true", default=False)
 
113
  net_g = net_g.float()
114
  vc = VC(tgt_sr, device, is_half)
115
  models.append((name, title, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index, npy)))
 
116
  with gr.Blocks() as app:
117
  gr.Markdown(
118
  "# <center> RVC generator\n"
 
125
  with gr.Row():
126
  gr.Markdown(
127
  '<div align="center">'
128
+ f'<div>{title}</div>\n'+
129
+ (f'<img style="width:auto;height:300px;" src="file/{cover}">' if cover else "")+
130
  '</div>'
131
  )
132
  with gr.Row():
133
  with gr.Column():
134
+ if args.files:
135
+ vc_input = gr.Textbox(label="Input audio path")
136
+ else:
137
+ vc_input = gr.Audio(label="Input audio")
138
  vc_transpose = gr.Number(label="Transpose", value=0)
139
  vc_f0method = gr.Radio(
140
  label="Pitch extraction algorithm, PM is fast but Harvest is better for low frequencies",
 
153
  with gr.Column():
154
  vc_output1 = gr.Textbox(label="Output Message")
155
  vc_output2 = gr.Audio(label="Output Audio")
156
+ vc_submit.click(vc_fn, [vc_input, vc_transpose, vc_f0method, vc_index_ratio], [vc_output1, vc_output2])
157
+ app.queue(concurrency_count=1, max_size=20, api_open=args.api).launch(share=args.share)