Spark808 commited on
Commit
6a666dd
1 Parent(s): 6df2588

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -38
app.py CHANGED
@@ -3,46 +3,30 @@ import json
3
  import argparse
4
  import traceback
5
  import logging
 
 
6
  import gradio as gr
7
  import numpy as np
8
  import librosa
9
  import torch
10
- import asyncio
11
- import edge_tts
12
- from datetime import datetime
13
  from fairseq import checkpoint_utils
14
  from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono
15
  from vc_infer_pipeline import VC
16
- from config import (
17
- is_half,
18
- device
19
- )
20
  logging.getLogger("numba").setLevel(logging.WARNING)
21
 
 
22
  def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
23
- def vc_fn(
24
- input_audio,
25
- f0_up_key,
26
- f0_method,
27
- index_rate
28
- ):
29
  try:
30
- if args.files:
31
- audio, sr = librosa.load(input_audio, sr=16000, mono=True)
32
- else:
33
- if input_audio is None:
34
- return "You need to upload an audio", None
35
- sampling_rate, audio = input_audio
36
- duration = audio.shape[0] / sampling_rate
37
- if duration > 10000000:
38
- return "no", None
39
- audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
40
- if len(audio.shape) > 1:
41
- audio = librosa.to_mono(audio.transpose(1, 0))
42
- if sampling_rate != 16000:
43
- audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
44
  times = [0, 0, 0]
45
- f0_up_key = int(f0_up_key)
46
  audio_opt = vc.pipeline(
47
  hubert_model,
48
  net_g,
@@ -50,12 +34,13 @@ def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
50
  audio,
51
  times,
52
  f0_up_key,
53
- f0_method,
54
  file_index,
55
  file_big_npy,
56
- index_rate,
57
  if_f0,
58
  )
 
59
  print(
60
  f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}]: npy: {times[0]}, f0: {times[1]}s, infer: {times[2]}s"
61
  )
@@ -64,8 +49,10 @@ def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
64
  info = traceback.format_exc()
65
  print(info)
66
  return info, (None, None)
 
67
  return vc_fn
68
 
 
69
  def load_hubert():
70
  global hubert_model
71
  models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
@@ -80,6 +67,7 @@ def load_hubert():
80
  hubert_model = hubert_model.float()
81
  hubert_model.eval()
82
 
 
83
  if __name__ == '__main__':
84
  parser = argparse.ArgumentParser()
85
  parser.add_argument('--api', action="store_true", default=False)
@@ -113,6 +101,7 @@ if __name__ == '__main__':
113
  net_g = net_g.float()
114
  vc = VC(tgt_sr, device, is_half)
115
  models.append((name, title, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index, npy)))
 
116
  with gr.Blocks() as app:
117
  gr.Markdown(
118
  "# <center> RVC generator\n"
@@ -125,16 +114,14 @@ if __name__ == '__main__':
125
  with gr.Row():
126
  gr.Markdown(
127
  '<div align="center">'
128
- f'<div>{title}</div>\n'+
129
- (f'<img style="width:auto;height:300px;" src="file/{cover}">' if cover else "")+
130
  '</div>'
131
  )
132
  with gr.Row():
133
  with gr.Column():
134
- if args.files:
135
- vc_input = gr.Textbox(label="Input audio path")
136
- else:
137
- vc_input = gr.Audio(label="Input audio")
138
  vc_transpose = gr.Number(label="Transpose", value=0)
139
  vc_f0method = gr.Radio(
140
  label="Pitch extraction algorithm, PM is fast but Harvest is better for low frequencies",
@@ -153,5 +140,6 @@ if __name__ == '__main__':
153
  with gr.Column():
154
  vc_output1 = gr.Textbox(label="Output Message")
155
  vc_output2 = gr.Audio(label="Output Audio")
156
- vc_submit.click(vc_fn, [vc_input, vc_transpose, vc_f0method, vc_index_ratio], [vc_output1, vc_output2])
157
- app.queue(concurrency_count=1, max_size=20, api_open=args.api).launch(share=args.share)
 
 
3
  import argparse
4
  import traceback
5
  import logging
6
+ from datetime import datetime
7
+
8
  import gradio as gr
9
  import numpy as np
10
  import librosa
11
  import torch
12
+
 
 
13
  from fairseq import checkpoint_utils
14
  from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono
15
  from vc_infer_pipeline import VC
16
+ from config import is_half, device
17
+
 
 
18
  logging.getLogger("numba").setLevel(logging.WARNING)
19
 
20
+
21
  def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
22
+ def vc_fn(vc_transpose, vc_f0method, vc_index_ratio):
 
 
 
 
 
23
  try:
24
+ # Get the recorded audio from the microphone
25
+ audio, sr = vc_microphone.record(num_frames=16000) # Adjust the sample rate if needed
26
+
27
+ # Your existing processing logic for audio
 
 
 
 
 
 
 
 
 
 
28
  times = [0, 0, 0]
29
+ f0_up_key = int(vc_transpose)
30
  audio_opt = vc.pipeline(
31
  hubert_model,
32
  net_g,
 
34
  audio,
35
  times,
36
  f0_up_key,
37
+ vc_f0method,
38
  file_index,
39
  file_big_npy,
40
+ vc_index_ratio,
41
  if_f0,
42
  )
43
+
44
  print(
45
  f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}]: npy: {times[0]}, f0: {times[1]}s, infer: {times[2]}s"
46
  )
 
49
  info = traceback.format_exc()
50
  print(info)
51
  return info, (None, None)
52
+
53
  return vc_fn
54
 
55
+
56
  def load_hubert():
57
  global hubert_model
58
  models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
 
67
  hubert_model = hubert_model.float()
68
  hubert_model.eval()
69
 
70
+
71
  if __name__ == '__main__':
72
  parser = argparse.ArgumentParser()
73
  parser.add_argument('--api', action="store_true", default=False)
 
101
  net_g = net_g.float()
102
  vc = VC(tgt_sr, device, is_half)
103
  models.append((name, title, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index, npy)))
104
+
105
  with gr.Blocks() as app:
106
  gr.Markdown(
107
  "# <center> RVC generator\n"
 
114
  with gr.Row():
115
  gr.Markdown(
116
  '<div align="center">'
117
+ f'<div>{title}</div>\n' +
118
+ (f'<img style="width:auto;height:300px;" src="file/{cover}">' if cover else "") +
119
  '</div>'
120
  )
121
  with gr.Row():
122
  with gr.Column():
123
+ # Use microphone instead of file upload
124
+ vc_microphone = gr.Microphone(label="Record your voice")
 
 
125
  vc_transpose = gr.Number(label="Transpose", value=0)
126
  vc_f0method = gr.Radio(
127
  label="Pitch extraction algorithm, PM is fast but Harvest is better for low frequencies",
 
140
  with gr.Column():
141
  vc_output1 = gr.Textbox(label="Output Message")
142
  vc_output2 = gr.Audio(label="Output Audio")
143
+
144
+ vc_submit.click(vc_fn, [vc_transpose, vc_f0method, vc_index_ratio], [vc_output1, vc_output2])
145
+ app.queue(concurrency_count=1, max_size=20, api_open=args.api).launch(share=args.share)