Spaces:
Runtime error
Runtime error
Upload folder using huggingface_hub
Browse files- Data/models/G_94000.pth +3 -0
- app.py +91 -20
- config.yml +2 -2
Data/models/G_94000.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fa25ff864873ca90fbc2997a24fe4eb2778ec1c13c25bcfef573dc4900147cf7
|
3 |
+
size 718770997
|
app.py
CHANGED
@@ -19,7 +19,6 @@ import torch
|
|
19 |
import utils
|
20 |
from infer import infer, latest_version, get_net_g, infer_multilang
|
21 |
import gradio as gr
|
22 |
-
import webbrowser
|
23 |
import numpy as np
|
24 |
from config import config
|
25 |
from tools.translate import translate
|
@@ -125,9 +124,9 @@ def tts_split(
|
|
125 |
cut_by_sent,
|
126 |
interval_between_para,
|
127 |
interval_between_sent,
|
128 |
-
reference_audio,
|
129 |
emotion,
|
130 |
):
|
|
|
131 |
if language == "mix":
|
132 |
return ("invalid", None)
|
133 |
while text.find("\n\n") != -1:
|
@@ -207,9 +206,9 @@ def tts_fn(
|
|
207 |
noise_scale_w,
|
208 |
length_scale,
|
209 |
language,
|
210 |
-
reference_audio,
|
211 |
emotion,
|
212 |
):
|
|
|
213 |
audio_list = []
|
214 |
if language == "mix":
|
215 |
bool_valid, str_valid = re_matching.validate_text(text)
|
@@ -370,6 +369,7 @@ if __name__ == "__main__":
|
|
370 |
text = gr.TextArea(
|
371 |
label="输入文本内容",
|
372 |
placeholder="""
|
|
|
373 |
如果你选择语言为\'mix\',必须按照格式输入,否则报错:
|
374 |
格式举例(zh是中文,jp是日语,不区分大小写;说话人举例:gongzi):
|
375 |
[说话人1]<zh>你好,こんにちは! <jp>こんにちは,世界。
|
@@ -379,7 +379,6 @@ if __name__ == "__main__":
|
|
379 |
另外,所有的语言选项都可以用'|'分割长段实现分句生成。
|
380 |
""",
|
381 |
)
|
382 |
-
trans = gr.Button("中翻日", variant="primary")
|
383 |
slicer = gr.Button("快速切分", variant="primary")
|
384 |
speaker = gr.Dropdown(
|
385 |
choices=speakers, value=speakers[0], label="Speaker"
|
@@ -400,7 +399,7 @@ if __name__ == "__main__":
|
|
400 |
minimum=0.1, maximum=2, value=1.0, step=0.1, label="Length"
|
401 |
)
|
402 |
language = gr.Dropdown(
|
403 |
-
choices=languages, value=languages[
|
404 |
)
|
405 |
btn = gr.Button("生成音频!", variant="primary")
|
406 |
with gr.Column():
|
@@ -433,8 +432,93 @@ if __name__ == "__main__":
|
|
433 |
# show_download_button=False,
|
434 |
# value=os.path.abspath("./img/参数说明.png"),
|
435 |
# )
|
436 |
-
|
437 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
438 |
btn.click(
|
439 |
tts_fn,
|
440 |
inputs=[
|
@@ -445,17 +529,11 @@ if __name__ == "__main__":
|
|
445 |
noise_scale_w,
|
446 |
length_scale,
|
447 |
language,
|
448 |
-
reference_audio,
|
449 |
emotion,
|
450 |
],
|
451 |
outputs=[text_output, audio_output],
|
452 |
)
|
453 |
|
454 |
-
trans.click(
|
455 |
-
translate,
|
456 |
-
inputs=[text],
|
457 |
-
outputs=[text],
|
458 |
-
)
|
459 |
slicer.click(
|
460 |
tts_split,
|
461 |
inputs=[
|
@@ -469,17 +547,10 @@ if __name__ == "__main__":
|
|
469 |
opt_cut_by_sent,
|
470 |
interval_between_para,
|
471 |
interval_between_sent,
|
472 |
-
reference_audio,
|
473 |
emotion,
|
474 |
],
|
475 |
outputs=[text_output, audio_output],
|
476 |
)
|
477 |
|
478 |
-
reference_audio.upload(
|
479 |
-
lambda x: librosa.load(x, 16000)[::-1],
|
480 |
-
inputs=[reference_audio],
|
481 |
-
outputs=[reference_audio],
|
482 |
-
)
|
483 |
print("推理页面已开启!")
|
484 |
-
webbrowser.open(f"http://127.0.0.1:{config.webui_config.port}")
|
485 |
app.launch(share=config.webui_config.share, server_port=config.webui_config.port)
|
|
|
19 |
import utils
|
20 |
from infer import infer, latest_version, get_net_g, infer_multilang
|
21 |
import gradio as gr
|
|
|
22 |
import numpy as np
|
23 |
from config import config
|
24 |
from tools.translate import translate
|
|
|
124 |
cut_by_sent,
|
125 |
interval_between_para,
|
126 |
interval_between_sent,
|
|
|
127 |
emotion,
|
128 |
):
|
129 |
+
reference_audio = None
|
130 |
if language == "mix":
|
131 |
return ("invalid", None)
|
132 |
while text.find("\n\n") != -1:
|
|
|
206 |
noise_scale_w,
|
207 |
length_scale,
|
208 |
language,
|
|
|
209 |
emotion,
|
210 |
):
|
211 |
+
reference_audio = None
|
212 |
audio_list = []
|
213 |
if language == "mix":
|
214 |
bool_valid, str_valid = re_matching.validate_text(text)
|
|
|
369 |
text = gr.TextArea(
|
370 |
label="输入文本内容",
|
371 |
placeholder="""
|
372 |
+
目前只支持日语!!
|
373 |
如果你选择语言为\'mix\',必须按照格式输入,否则报错:
|
374 |
格式举例(zh是中文,jp是日语,不区分大小写;说话人举例:gongzi):
|
375 |
[说话人1]<zh>你好,こんにちは! <jp>こんにちは,世界。
|
|
|
379 |
另外,所有的语言选项都可以用'|'分割长段实现分句生成。
|
380 |
""",
|
381 |
)
|
|
|
382 |
slicer = gr.Button("快速切分", variant="primary")
|
383 |
speaker = gr.Dropdown(
|
384 |
choices=speakers, value=speakers[0], label="Speaker"
|
|
|
399 |
minimum=0.1, maximum=2, value=1.0, step=0.1, label="Length"
|
400 |
)
|
401 |
language = gr.Dropdown(
|
402 |
+
choices=languages, value=languages[1], label="Language"
|
403 |
)
|
404 |
btn = gr.Button("生成音频!", variant="primary")
|
405 |
with gr.Column():
|
|
|
432 |
# show_download_button=False,
|
433 |
# value=os.path.abspath("./img/参数说明.png"),
|
434 |
# )
|
435 |
+
|
436 |
+
gr.Examples(
|
437 |
+
examples=[
|
438 |
+
[
|
439 |
+
"誕生日おめでとうございます",
|
440 |
+
"春風",
|
441 |
+
0.2,
|
442 |
+
0.6,
|
443 |
+
0.8,
|
444 |
+
1.0,
|
445 |
+
"JP",
|
446 |
+
False,
|
447 |
+
1,
|
448 |
+
0.2,
|
449 |
+
0,
|
450 |
+
],
|
451 |
+
[
|
452 |
+
"まさか本当に恋人になれるなんて……",
|
453 |
+
"春風",
|
454 |
+
0.2,
|
455 |
+
0.6,
|
456 |
+
0.8,
|
457 |
+
1.0,
|
458 |
+
"JP",
|
459 |
+
False,
|
460 |
+
1,
|
461 |
+
0.2,
|
462 |
+
0,
|
463 |
+
],
|
464 |
+
[
|
465 |
+
"痕跡探しに活用するかどうかも、改めて検討しましょう。 どうせ探し出せたところで、 今の私たちでは彼らに対抗できない",
|
466 |
+
"希亜",
|
467 |
+
0.2,
|
468 |
+
0.6,
|
469 |
+
0.8,
|
470 |
+
1.0,
|
471 |
+
"JP",
|
472 |
+
False,
|
473 |
+
1,
|
474 |
+
0.2,
|
475 |
+
0,
|
476 |
+
],
|
477 |
+
[
|
478 |
+
"じゃあさじゃあさ、 にぃにが先輩のどこに魅力を感じているか 教えてください",
|
479 |
+
"天",
|
480 |
+
0.2,
|
481 |
+
0.6,
|
482 |
+
0.8,
|
483 |
+
1.0,
|
484 |
+
"JP",
|
485 |
+
False,
|
486 |
+
1,
|
487 |
+
0.2,
|
488 |
+
0,
|
489 |
+
],
|
490 |
+
[
|
491 |
+
"ご、ごめんね、なんとかしたいって気持ちはあるけれど、 そこまでの覚悟はなくて……",
|
492 |
+
"都",
|
493 |
+
0.2,
|
494 |
+
0.6,
|
495 |
+
0.8,
|
496 |
+
1.0,
|
497 |
+
"JP",
|
498 |
+
False,
|
499 |
+
1,
|
500 |
+
0.2,
|
501 |
+
0,
|
502 |
+
],
|
503 |
+
],
|
504 |
+
inputs=[
|
505 |
+
text,
|
506 |
+
speaker,
|
507 |
+
sdp_ratio,
|
508 |
+
noise_scale,
|
509 |
+
noise_scale_w,
|
510 |
+
length_scale,
|
511 |
+
language,
|
512 |
+
opt_cut_by_sent,
|
513 |
+
interval_between_para,
|
514 |
+
interval_between_sent,
|
515 |
+
emotion,
|
516 |
+
],
|
517 |
+
outputs=[text_output, audio_output],
|
518 |
+
fn=tts_split,
|
519 |
+
cache_examples=True,
|
520 |
+
)
|
521 |
+
|
522 |
btn.click(
|
523 |
tts_fn,
|
524 |
inputs=[
|
|
|
529 |
noise_scale_w,
|
530 |
length_scale,
|
531 |
language,
|
|
|
532 |
emotion,
|
533 |
],
|
534 |
outputs=[text_output, audio_output],
|
535 |
)
|
536 |
|
|
|
|
|
|
|
|
|
|
|
537 |
slicer.click(
|
538 |
tts_split,
|
539 |
inputs=[
|
|
|
547 |
opt_cut_by_sent,
|
548 |
interval_between_para,
|
549 |
interval_between_sent,
|
|
|
550 |
emotion,
|
551 |
],
|
552 |
outputs=[text_output, audio_output],
|
553 |
)
|
554 |
|
|
|
|
|
|
|
|
|
|
|
555 |
print("推理页面已开启!")
|
|
|
556 |
app.launch(share=config.webui_config.share, server_port=config.webui_config.port)
|
config.yml
CHANGED
@@ -98,9 +98,9 @@ train_ms:
|
|
98 |
# 注意, “:” 后需要加空格
|
99 |
webui:
|
100 |
# 推理设备
|
101 |
-
device: "
|
102 |
# 模型路径
|
103 |
-
model: "models/
|
104 |
# 配置文件路径
|
105 |
config_path: "configs/haruka.json"
|
106 |
# 端口号
|
|
|
98 |
# 注意, “:” 后需要加空格
|
99 |
webui:
|
100 |
# 推理设备
|
101 |
+
device: "cpu"
|
102 |
# 模型路径
|
103 |
+
model: "models/G_94000.pth"
|
104 |
# 配置文件路径
|
105 |
config_path: "configs/haruka.json"
|
106 |
# 端口号
|