Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
updrade gradio version
Browse files
README.md
CHANGED
@@ -4,7 +4,7 @@ emoji: πποΈ
|
|
4 |
colorFrom: red
|
5 |
colorTo: pink
|
6 |
sdk: gradio
|
7 |
-
sdk_version:
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
|
|
4 |
colorFrom: red
|
5 |
colorTo: pink
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 4.36.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
app.py
CHANGED
@@ -8,6 +8,7 @@ from pathlib import Path
|
|
8 |
import librosa
|
9 |
import numpy as np
|
10 |
import torch
|
|
|
11 |
from torch import no_grad, LongTensor
|
12 |
import commons
|
13 |
import utils
|
@@ -20,18 +21,6 @@ from mel_processing import spectrogram_torch
|
|
20 |
|
21 |
limitation = os.getenv("SYSTEM") == "spaces" # limit text and audio length in huggingface spaces
|
22 |
|
23 |
-
audio_postprocess_ori = gr.Audio.postprocess
|
24 |
-
|
25 |
-
|
26 |
-
def audio_postprocess(self, y):
|
27 |
-
data = audio_postprocess_ori(self, y)
|
28 |
-
if data is None:
|
29 |
-
return None
|
30 |
-
return gr_processing_utils.encode_url_or_file_to_base64(data["name"])
|
31 |
-
|
32 |
-
|
33 |
-
gr.Audio.postprocess = audio_postprocess
|
34 |
-
|
35 |
|
36 |
def get_text(text, hps, is_symbol):
|
37 |
text_norm = text_to_sequence(text, hps.symbols, [] if is_symbol else hps.data.text_cleaners)
|
@@ -99,10 +88,7 @@ def create_vc_fn(model, hps, speaker_ids):
|
|
99 |
|
100 |
|
101 |
def create_soft_vc_fn(model, hps, speaker_ids):
|
102 |
-
def soft_vc_fn(target_speaker,
|
103 |
-
input_audio = input_audio1
|
104 |
-
if input_audio is None:
|
105 |
-
input_audio = input_audio2
|
106 |
if input_audio is None:
|
107 |
return "You need to upload an audio", None
|
108 |
sampling_rate, audio = input_audio
|
@@ -206,7 +192,7 @@ if __name__ == '__main__':
|
|
206 |
|
207 |
with app:
|
208 |
gr.Markdown("# Moe TTS And Voice Conversion Using VITS Model\n\n"
|
209 |
-
"![visitor badge](https://
|
210 |
"[Open In Colab]"
|
211 |
"(https://colab.research.google.com/drive/14Pb8lpmwZL-JI5Ub6jpG4sz2-8KS0kbS?usp=sharing)"
|
212 |
" without queue and length limitation.\n\n"
|
@@ -230,7 +216,7 @@ if __name__ == '__main__':
|
|
230 |
type="index", value=speakers[0])
|
231 |
tts_input3 = gr.Slider(label="Speed", value=1, minimum=0.5, maximum=2, step=0.1)
|
232 |
with gr.Accordion(label="Advanced Options", open=False):
|
233 |
-
temp_text_var = gr.
|
234 |
symbol_input = gr.Checkbox(value=False, label="Symbol input")
|
235 |
symbol_list = gr.Dataset(label="Symbol list", components=[tts_input1],
|
236 |
samples=[[x] for x in symbols],
|
@@ -239,16 +225,13 @@ if __name__ == '__main__':
|
|
239 |
tts_submit = gr.Button("Generate", variant="primary")
|
240 |
tts_output1 = gr.Textbox(label="Output Message")
|
241 |
tts_output2 = gr.Audio(label="Output Audio", elem_id=f"tts-audio{i}")
|
242 |
-
download = gr.Button("Download Audio")
|
243 |
-
download.click(None, [], [], _js=download_audio_js.format(audio_id=f"tts-audio{i}"))
|
244 |
-
|
245 |
tts_submit.click(tts_fn, [tts_input1, tts_input2, tts_input3, symbol_input],
|
246 |
-
[tts_output1, tts_output2])
|
247 |
symbol_input.change(to_symbol_fn,
|
248 |
[symbol_input, tts_input1, temp_text_var],
|
249 |
[tts_input1, temp_text_var])
|
250 |
symbol_list.click(None, [symbol_list, symbol_list_json], [],
|
251 |
-
|
252 |
(i,symbols) => {{
|
253 |
let root = document.querySelector("body > gradio-app");
|
254 |
if (root.shadowRoot != null)
|
@@ -284,9 +267,8 @@ if __name__ == '__main__':
|
|
284 |
vc_submit = gr.Button("Convert", variant="primary")
|
285 |
vc_output1 = gr.Textbox(label="Output Message")
|
286 |
vc_output2 = gr.Audio(label="Output Audio", elem_id=f"vc-audio{i}")
|
287 |
-
|
288 |
-
|
289 |
-
vc_submit.click(vc_fn, [vc_input1, vc_input2, vc_input3], [vc_output1, vc_output2])
|
290 |
with gr.TabItem("Soft Voice Conversion"):
|
291 |
with gr.Tabs():
|
292 |
for i, (name, author, cover_path, speakers, soft_vc_fn) in enumerate(models_soft_vc):
|
@@ -297,22 +279,12 @@ if __name__ == '__main__':
|
|
297 |
f"model author: {author}")
|
298 |
vc_input1 = gr.Dropdown(label="Target Speaker", choices=speakers, type="index",
|
299 |
value=speakers[0])
|
300 |
-
|
301 |
-
with source_tabs:
|
302 |
-
with gr.TabItem("microphone"):
|
303 |
-
vc_input2 = gr.Audio(label="Input Audio (30s limitation)", source="microphone")
|
304 |
-
with gr.TabItem("upload"):
|
305 |
-
vc_input3 = gr.Audio(label="Input Audio (30s limitation)", source="upload")
|
306 |
vc_submit = gr.Button("Convert", variant="primary")
|
307 |
vc_output1 = gr.Textbox(label="Output Message")
|
308 |
vc_output2 = gr.Audio(label="Output Audio", elem_id=f"svc-audio{i}")
|
309 |
-
|
310 |
-
|
311 |
-
# clear inputs
|
312 |
-
source_tabs.set_event_trigger("select", None, [], [vc_input2, vc_input3],
|
313 |
-
js="()=>[null,null]")
|
314 |
-
vc_submit.click(soft_vc_fn, [vc_input1, vc_input2, vc_input3],
|
315 |
-
[vc_output1, vc_output2])
|
316 |
gr.Markdown(
|
317 |
"unofficial demo for \n\n"
|
318 |
"- [https://github.com/CjangCjengh/MoeGoe](https://github.com/CjangCjengh/MoeGoe)\n"
|
@@ -320,4 +292,4 @@ if __name__ == '__main__':
|
|
320 |
"- [https://github.com/luoyily/MoeTTS](https://github.com/luoyily/MoeTTS)\n"
|
321 |
"- [https://github.com/Francis-Komizu/Sovits](https://github.com/Francis-Komizu/Sovits)"
|
322 |
)
|
323 |
-
app.
|
|
|
8 |
import librosa
|
9 |
import numpy as np
|
10 |
import torch
|
11 |
+
from gradio import FileData
|
12 |
from torch import no_grad, LongTensor
|
13 |
import commons
|
14 |
import utils
|
|
|
21 |
|
22 |
limitation = os.getenv("SYSTEM") == "spaces" # limit text and audio length in huggingface spaces
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
def get_text(text, hps, is_symbol):
|
26 |
text_norm = text_to_sequence(text, hps.symbols, [] if is_symbol else hps.data.text_cleaners)
|
|
|
88 |
|
89 |
|
90 |
def create_soft_vc_fn(model, hps, speaker_ids):
|
91 |
+
def soft_vc_fn(target_speaker, input_audio):
|
|
|
|
|
|
|
92 |
if input_audio is None:
|
93 |
return "You need to upload an audio", None
|
94 |
sampling_rate, audio = input_audio
|
|
|
192 |
|
193 |
with app:
|
194 |
gr.Markdown("# Moe TTS And Voice Conversion Using VITS Model\n\n"
|
195 |
+
"![visitor badge](https://api.visitorbadge.io/api/visitors?path=skytnt.moe-tts&countColor=%23263759&style=flat&labelStyle=lower)\n\n"
|
196 |
"[Open In Colab]"
|
197 |
"(https://colab.research.google.com/drive/14Pb8lpmwZL-JI5Ub6jpG4sz2-8KS0kbS?usp=sharing)"
|
198 |
" without queue and length limitation.\n\n"
|
|
|
216 |
type="index", value=speakers[0])
|
217 |
tts_input3 = gr.Slider(label="Speed", value=1, minimum=0.5, maximum=2, step=0.1)
|
218 |
with gr.Accordion(label="Advanced Options", open=False):
|
219 |
+
temp_text_var = gr.State()
|
220 |
symbol_input = gr.Checkbox(value=False, label="Symbol input")
|
221 |
symbol_list = gr.Dataset(label="Symbol list", components=[tts_input1],
|
222 |
samples=[[x] for x in symbols],
|
|
|
225 |
tts_submit = gr.Button("Generate", variant="primary")
|
226 |
tts_output1 = gr.Textbox(label="Output Message")
|
227 |
tts_output2 = gr.Audio(label="Output Audio", elem_id=f"tts-audio{i}")
|
|
|
|
|
|
|
228 |
tts_submit.click(tts_fn, [tts_input1, tts_input2, tts_input3, symbol_input],
|
229 |
+
[tts_output1, tts_output2], concurrency_limit=3)
|
230 |
symbol_input.change(to_symbol_fn,
|
231 |
[symbol_input, tts_input1, temp_text_var],
|
232 |
[tts_input1, temp_text_var])
|
233 |
symbol_list.click(None, [symbol_list, symbol_list_json], [],
|
234 |
+
js=f"""
|
235 |
(i,symbols) => {{
|
236 |
let root = document.querySelector("body > gradio-app");
|
237 |
if (root.shadowRoot != null)
|
|
|
267 |
vc_submit = gr.Button("Convert", variant="primary")
|
268 |
vc_output1 = gr.Textbox(label="Output Message")
|
269 |
vc_output2 = gr.Audio(label="Output Audio", elem_id=f"vc-audio{i}")
|
270 |
+
vc_submit.click(vc_fn, [vc_input1, vc_input2, vc_input3], [vc_output1, vc_output2],
|
271 |
+
concurrency_limit=3)
|
|
|
272 |
with gr.TabItem("Soft Voice Conversion"):
|
273 |
with gr.Tabs():
|
274 |
for i, (name, author, cover_path, speakers, soft_vc_fn) in enumerate(models_soft_vc):
|
|
|
279 |
f"model author: {author}")
|
280 |
vc_input1 = gr.Dropdown(label="Target Speaker", choices=speakers, type="index",
|
281 |
value=speakers[0])
|
282 |
+
vc_input2 = gr.Audio(label="Input Audio (30s limitation)")
|
|
|
|
|
|
|
|
|
|
|
283 |
vc_submit = gr.Button("Convert", variant="primary")
|
284 |
vc_output1 = gr.Textbox(label="Output Message")
|
285 |
vc_output2 = gr.Audio(label="Output Audio", elem_id=f"svc-audio{i}")
|
286 |
+
vc_submit.click(soft_vc_fn, [vc_input1, vc_input2],
|
287 |
+
[vc_output1, vc_output2], concurrency_limit=3)
|
|
|
|
|
|
|
|
|
|
|
288 |
gr.Markdown(
|
289 |
"unofficial demo for \n\n"
|
290 |
"- [https://github.com/CjangCjengh/MoeGoe](https://github.com/CjangCjengh/MoeGoe)\n"
|
|
|
292 |
"- [https://github.com/luoyily/MoeTTS](https://github.com/luoyily/MoeTTS)\n"
|
293 |
"- [https://github.com/Francis-Komizu/Sovits](https://github.com/Francis-Komizu/Sovits)"
|
294 |
)
|
295 |
+
app.launch(show_api=False, share=args.share, allowed_paths=["./saved_model"])
|