Spark808 commited on
Commit
c6f82b0
1 Parent(s): 6ee22c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -56
app.py CHANGED
@@ -24,34 +24,23 @@ def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
24
  input_audio,
25
  f0_up_key,
26
  f0_method,
27
- index_rate,
28
- tts_mode,
29
- tts_text,
30
- tts_voice
31
  ):
32
  try:
33
- if tts_mode:
34
- if len(tts_text) > 1000000 :
35
- return "Text is too long", None
36
- if tts_text is None or tts_voice is None:
37
- return "You need to enter text and select a voice", None
38
- asyncio.run(edge_tts.Communicate(tts_text, "-".join(tts_voice.split('-')[:-1])).save("tts.mp3"))
39
- audio, sr = librosa.load("tts.mp3", sr=16000, mono=True)
40
  else:
41
- if args.files:
42
- audio, sr = librosa.load(input_audio, sr=16000, mono=True)
43
- else:
44
- if input_audio is None:
45
- return "You need to upload an audio", None
46
- sampling_rate, audio = input_audio
47
- duration = audio.shape[0] / sampling_rate
48
- if duration > 10000000 :
49
- return "no", None
50
- audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
51
- if len(audio.shape) > 1:
52
- audio = librosa.to_mono(audio.transpose(1, 0))
53
- if sampling_rate != 16000:
54
- audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
55
  times = [0, 0, 0]
56
  f0_up_key = int(f0_up_key)
57
  audio_opt = vc.pipeline(
@@ -91,14 +80,6 @@ def load_hubert():
91
  hubert_model = hubert_model.float()
92
  hubert_model.eval()
93
 
94
- def change_to_tts_mode(tts_mode):
95
- if tts_mode:
96
- return gr.Audio.update(visible=False), gr.Textbox.update(visible=True), gr.Dropdown.update(visible=True)
97
- else:
98
- return gr.Audio.update(visible=True), gr.Textbox.update(visible=False), gr.Dropdown.update(visible=False)
99
- def save_audio(audio):
100
- return audio
101
-
102
  if __name__ == '__main__':
103
  parser = argparse.ArgumentParser()
104
  parser.add_argument('--api', action="store_true", default=False)
@@ -107,35 +88,31 @@ if __name__ == '__main__':
107
  args, unknown = parser.parse_known_args()
108
  load_hubert()
109
  models = []
110
- tts_voice_list = asyncio.get_event_loop().run_until_complete(edge_tts.list_voices())
111
- voices = [f"{v['ShortName']}-{v['Gender']}" for v in tts_voice_list]
112
  with open("weights/model_info.json", "r", encoding="utf-8") as f:
113
  models_info = json.load(f)
114
  for name, info in models_info.items():
115
  if not info['enable']:
116
  continue
117
  title = info['title']
118
- author = info.get("author", None)
119
  cover = f"weights/{name}/{info['cover']}"
120
  index = f"weights/{name}/{info['feature_retrieval_library']}"
121
  npy = f"weights/{name}/{info['feature_file']}"
122
  cpt = torch.load(f"weights/{name}/{name}.pth", map_location="cpu")
123
  tgt_sr = cpt["config"][-1]
124
- cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
125
  if_f0 = cpt.get("f0", 1)
126
  if if_f0 == 1:
127
  net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=is_half)
128
  else:
129
  net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
130
  del net_g.enc_q
131
- print(net_g.load_state_dict(cpt["weight"], strict=False)) # 不加这一行清不干净, 真奇葩
132
  net_g.eval().to(device)
133
  if is_half:
134
  net_g = net_g.half()
135
  else:
136
  net_g = net_g.float()
137
  vc = VC(tgt_sr, device, is_half)
138
- models.append((name, title, author, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index, npy)))
139
  with gr.Blocks() as app:
140
  gr.Markdown(
141
  "# <center> RVC generator\n"
@@ -143,13 +120,12 @@ if __name__ == '__main__':
143
  "[![buymeacoffee](https://badgen.net/badge/icon/buymeacoffee?icon=buymeacoffee&label)](https://www.buymeacoffee.com/spark808)\n\n"
144
  )
145
  with gr.Tabs():
146
- for (name, title, author, cover, vc_fn) in models:
147
  with gr.TabItem(name):
148
  with gr.Row():
149
  gr.Markdown(
150
  '<div align="center">'
151
  f'<div>{title}</div>\n'+
152
- (f'<div>Model author: {author}</div>' if author else "")+
153
  (f'<img style="width:auto;height:300px;" src="file/{cover}">' if cover else "")+
154
  '</div>'
155
  )
@@ -173,22 +149,9 @@ if __name__ == '__main__':
173
  value=0.6,
174
  interactive=True,
175
  )
176
- tts_mode = gr.Checkbox(label="tts (use edge-tts as input)", value=False)
177
- tts_text = gr.Textbox(visible=False,label="TTS text (10000000 words limitation)" "TTS text")
178
- tts_voice = gr.Dropdown(label="Edge-tts speaker", choices=voices, visible=False, allow_custom_value=False, value="en-US-AnaNeural-Female")
179
  vc_submit = gr.Button("Generate", variant="primary")
180
  with gr.Column():
181
  vc_output1 = gr.Textbox(label="Output Message")
182
  vc_output2 = gr.Audio(label="Output Audio")
183
- vc_submit.click(vc_fn, [vc_input, vc_transpose, vc_f0method, vc_index_ratio, tts_mode, tts_text, tts_voice], [vc_output1, vc_output2])
184
- tts_mode.change(change_to_tts_mode, [tts_mode], [vc_input, tts_text, tts_voice])
185
- app.queue(concurrency_count=1, max_size=20, api_open=args.api).launch(share=args.share)
186
- iface = gr.Interface(
187
- fn=save_audio,
188
- inputs=gr.inputs.Audio(source="microphone", type="file"),
189
- outputs="audio",
190
- title="Voice Recorder",
191
- description="Press the Record button to record your voice, and then press Stop when you're done.",
192
- )
193
-
194
- iface.launch()
 
24
  input_audio,
25
  f0_up_key,
26
  f0_method,
27
+ index_rate
 
 
 
28
  ):
29
  try:
30
+ if args.files:
31
+ audio, sr = librosa.load(input_audio, sr=16000, mono=True)
 
 
 
 
 
32
  else:
33
+ if input_audio is None:
34
+ return "You need to upload an audio", None
35
+ sampling_rate, audio = input_audio
36
+ duration = audio.shape[0] / sampling_rate
37
+ if duration > 10000000:
38
+ return "no", None
39
+ audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
40
+ if len(audio.shape) > 1:
41
+ audio = librosa.to_mono(audio.transpose(1, 0))
42
+ if sampling_rate != 16000:
43
+ audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
 
 
 
44
  times = [0, 0, 0]
45
  f0_up_key = int(f0_up_key)
46
  audio_opt = vc.pipeline(
 
80
  hubert_model = hubert_model.float()
81
  hubert_model.eval()
82
 
 
 
 
 
 
 
 
 
83
  if __name__ == '__main__':
84
  parser = argparse.ArgumentParser()
85
  parser.add_argument('--api', action="store_true", default=False)
 
88
  args, unknown = parser.parse_known_args()
89
  load_hubert()
90
  models = []
 
 
91
  with open("weights/model_info.json", "r", encoding="utf-8") as f:
92
  models_info = json.load(f)
93
  for name, info in models_info.items():
94
  if not info['enable']:
95
  continue
96
  title = info['title']
 
97
  cover = f"weights/{name}/{info['cover']}"
98
  index = f"weights/{name}/{info['feature_retrieval_library']}"
99
  npy = f"weights/{name}/{info['feature_file']}"
100
  cpt = torch.load(f"weights/{name}/{name}.pth", map_location="cpu")
101
  tgt_sr = cpt["config"][-1]
 
102
  if_f0 = cpt.get("f0", 1)
103
  if if_f0 == 1:
104
  net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=is_half)
105
  else:
106
  net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
107
  del net_g.enc_q
108
+ print(net_g.load_state_dict(cpt["weight"], strict=False))
109
  net_g.eval().to(device)
110
  if is_half:
111
  net_g = net_g.half()
112
  else:
113
  net_g = net_g.float()
114
  vc = VC(tgt_sr, device, is_half)
115
+ models.append((name, title, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index, npy)))
116
  with gr.Blocks() as app:
117
  gr.Markdown(
118
  "# <center> RVC generator\n"
 
120
  "[![buymeacoffee](https://badgen.net/badge/icon/buymeacoffee?icon=buymeacoffee&label)](https://www.buymeacoffee.com/spark808)\n\n"
121
  )
122
  with gr.Tabs():
123
+ for (name, title, cover, vc_fn) in models:
124
  with gr.TabItem(name):
125
  with gr.Row():
126
  gr.Markdown(
127
  '<div align="center">'
128
  f'<div>{title}</div>\n'+
 
129
  (f'<img style="width:auto;height:300px;" src="file/{cover}">' if cover else "")+
130
  '</div>'
131
  )
 
149
  value=0.6,
150
  interactive=True,
151
  )
 
 
 
152
  vc_submit = gr.Button("Generate", variant="primary")
153
  with gr.Column():
154
  vc_output1 = gr.Textbox(label="Output Message")
155
  vc_output2 = gr.Audio(label="Output Audio")
156
+ vc_submit.click(vc_fn, [vc_input, vc_transpose, vc_f0method, vc_index_ratio], [vc_output1, vc_output2])
157
+ app.queue(concurrency_count=1, max_size=20, api_open=args.api).launch(share=args.share)