Spaces:
Runtime error
Runtime error
fcyai
commited on
Commit
·
5e720ce
1
Parent(s):
7f57608
Reorder tabs in Gradio interface
Browse files- webui_mix.py +498 -495
webui_mix.py
CHANGED
@@ -445,384 +445,103 @@ with gr.Blocks() as demo:
|
|
445 |
# </div>
|
446 |
# """)
|
447 |
gr.Markdown("# Deployed by [chattts.dev](https://chattts.dev?refer=hf-story-telling)")
|
448 |
-
with gr.Tab("音色抽卡"):
|
449 |
-
with gr.Row():
|
450 |
-
with gr.Column(scale=1):
|
451 |
-
texts = [
|
452 |
-
"四川美食确实以辣闻名,但也有不辣的选择。比如甜水面、赖汤圆、蛋烘糕、叶儿粑等,这些小吃口味温和,甜而不腻,也很受欢迎。",
|
453 |
-
"我是一个充满活力的人,喜欢运动,喜欢旅行,喜欢尝试新鲜事物。我喜欢挑战自己,不断突破自己的极限,让自己变得更加强大。",
|
454 |
-
"罗森宣布将于7月24日退市,在华门店超6000家!",
|
455 |
-
]
|
456 |
-
# gr.Markdown("### 随机音色抽卡")
|
457 |
-
# gr.Markdown("""
|
458 |
-
# 免抽卡,直接找稳定音色👇
|
459 |
-
|
460 |
-
# [ModelScope ChatTTS Speaker(国内)](https://modelscope.cn/studios/ttwwwaa/ChatTTS_Speaker) | [HuggingFace ChatTTS Speaker(国外)](https://huggingface.co/spaces/taa/ChatTTS_Speaker)
|
461 |
-
|
462 |
-
# 在相同的 seed 和 温度等参数下,音色具有一定的一致性。点击下面的“随机音色生成”按钮将生成多个 seed。找到满意的音色后,点击音频下方“保存”按钮。
|
463 |
-
# **注意:不同机器使用相同种子生成的音频音色可能不同,同一机器使用相同种子多次生成的音频音色也可能变化。**
|
464 |
-
# """)
|
465 |
-
input_text = gr.Textbox(label="测试文本",
|
466 |
-
info="**每行文本**都会生成一段音频,最终输出的音频是将这些音频段合成后的结果。建议使用**多行文本**进行测试,以确保音色稳定性。",
|
467 |
-
lines=4, placeholder="请输入文本...", value='\n'.join(texts))
|
468 |
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
473 |
|
474 |
-
# 保存的种子
|
475 |
-
gr.Markdown("### 种子管理界面")
|
476 |
-
seed_list = gr.DataFrame(
|
477 |
-
label="种子列表",
|
478 |
-
headers=["Index", "Seed", "Name", "Path"],
|
479 |
-
datatype=["number", "number", "str", "str"],
|
480 |
-
interactive=True,
|
481 |
-
col_count=(4, "fixed"),
|
482 |
-
value=display_seeds
|
483 |
-
)
|
484 |
|
485 |
-
|
486 |
-
|
487 |
-
|
488 |
-
|
489 |
-
|
|
|
|
|
490 |
|
491 |
-
with gr.Row():
|
492 |
-
# 添加已保存的种子音频播放组件
|
493 |
-
audio_player = gr.Audio(label="播放已保存种子音频", visible=False)
|
494 |
|
495 |
-
|
496 |
-
|
497 |
-
|
498 |
-
|
499 |
-
|
500 |
-
|
501 |
|
502 |
-
with gr.Column(scale=1):
|
503 |
-
audio_components = []
|
504 |
-
for i in range(max_audio_components):
|
505 |
-
visible = i < num_seeds_default
|
506 |
-
a = gr.Audio(f"Audio {i}", visible=visible)
|
507 |
-
t = gr.Button(f"Seed", visible=visible)
|
508 |
-
s = gr.State(value=None)
|
509 |
-
t.click(do_save_seed, inputs=[t, s], outputs=None).success(display_seeds, outputs=seed_list)
|
510 |
-
audio_components.append(a)
|
511 |
-
audio_components.append(t)
|
512 |
-
audio_components.append(s)
|
513 |
|
514 |
-
|
515 |
-
|
516 |
-
# audio = gr.Audio(label="Output Audio")
|
517 |
|
518 |
-
generate_button.click(
|
519 |
-
audio_interface_empty,
|
520 |
-
inputs=[num_seeds, input_text],
|
521 |
-
outputs=audio_components
|
522 |
-
).success(audio_interface, inputs=[num_seeds, input_text], outputs=audio_components)
|
523 |
-
with gr.Tab("长音频生成"):
|
524 |
-
with gr.Row():
|
525 |
-
with gr.Column():
|
526 |
-
gr.Markdown("### 文本")
|
527 |
-
# gr.Markdown("请上传要转换的文本文件(.txt 格式)。")
|
528 |
-
# text_file_input = gr.File(label="文本文件", file_types=[".txt"])
|
529 |
-
default_text = "四川美食确实以辣闻名,但也有不辣的选择。比如甜水面、赖汤圆、蛋烘糕、叶儿粑等,这些小吃口味温和,甜而不腻,也很受欢迎。"
|
530 |
-
text_file_input = gr.Textbox(label=f"朗读文本(字数: {len(default_text)})", lines=4,
|
531 |
-
placeholder="Please Input Text...", value=default_text)
|
532 |
-
# 当文本框内容发生变化时调用 update_label 函数
|
533 |
-
text_file_input.change(update_label, inputs=text_file_input, outputs=text_file_input)
|
534 |
-
# 加入停顿按钮
|
535 |
-
with gr.Row():
|
536 |
-
break_button = gr.Button("+停顿", variant="secondary")
|
537 |
-
laugh_button = gr.Button("+笑声", variant="secondary")
|
538 |
-
refine_button = gr.Button("Refine Text(预处理 加入停顿词、笑声等)", variant="secondary")
|
539 |
|
540 |
-
|
541 |
-
|
542 |
-
|
543 |
-
|
544 |
-
|
545 |
-
|
546 |
-
|
547 |
-
|
548 |
-
|
549 |
-
|
550 |
-
|
551 |
-
|
552 |
-
generate_audio_seed = gr.Button("\U0001F3B2")
|
553 |
-
tab_roleid = gr.Tab(label="内置音色")
|
554 |
-
with tab_roleid:
|
555 |
-
roleid_input = gr.Dropdown(label="内置音色",
|
556 |
-
choices=[("发姐", "1"),
|
557 |
-
("纯情男大学生", "2"),
|
558 |
-
("阳光开朗大男孩", "3"),
|
559 |
-
("知心小姐姐", "4"),
|
560 |
-
("电视台女主持", "5"),
|
561 |
-
("魅力大叔", "6"),
|
562 |
-
("优雅甜美", "7"),
|
563 |
-
("贴心男宝2", "21"),
|
564 |
-
("正式打工人", "8"),
|
565 |
-
("贴心男宝1", "9")],
|
566 |
-
value="1",
|
567 |
-
info="选择音色后会覆盖种子。感谢 @QuantumDriver 提供音色")
|
568 |
-
tab_pt = gr.Tab(label="上传.PT文件")
|
569 |
-
with tab_pt:
|
570 |
-
pt_input = gr.File(label="上传音色文件", file_types=[".pt"], height=100)
|
571 |
|
572 |
-
with gr.Row():
|
573 |
-
style_select = gr.Radio(label="预设参数", info="语速部分可自行更改",
|
574 |
-
choices=["小说朗读", "对话", "中英混合", "默认"], value="默认",
|
575 |
-
interactive=True, )
|
576 |
-
with gr.Row():
|
577 |
-
# refine
|
578 |
-
refine_text_input = gr.Checkbox(label="Refine",
|
579 |
-
info="打开后会自动根据下方参数添加笑声/停顿等。关闭后可自行添加 [uv_break] [laugh] 或者点击下方 Refin按钮先行转换",
|
580 |
-
value=True)
|
581 |
-
speed_input = gr.Slider(label="语速", minimum=1, maximum=10, value=DEFAULT_SPEED, step=1)
|
582 |
-
with gr.Row():
|
583 |
-
oral_input = gr.Slider(label="口语化", minimum=0, maximum=9, value=DEFAULT_ORAL, step=1)
|
584 |
-
laugh_input = gr.Slider(label="笑声", minimum=0, maximum=2, value=DEFAULT_LAUGH, step=1)
|
585 |
-
bk_input = gr.Slider(label="停顿", minimum=0, maximum=7, value=DEFAULT_BK, step=1)
|
586 |
-
# gr.Markdown("### 文本参数")
|
587 |
-
with gr.Row():
|
588 |
-
min_length_input = gr.Number(label="文本分段长度", info="大于这个数值进行分段",
|
589 |
-
value=DEFAULT_SEG_LENGTH, precision=0)
|
590 |
-
batch_size_input = gr.Number(label="批大小", info="越高越快 太高爆显存 4G推荐3 其他酌情",
|
591 |
-
value=DEFAULT_BATCH_SIZE, precision=0)
|
592 |
-
with gr.Accordion("其他参数", open=False):
|
593 |
-
with gr.Row():
|
594 |
-
# 温度 top_P top_K
|
595 |
-
temperature_input = gr.Slider(label="温度", minimum=0.01, maximum=1.0, step=0.01,
|
596 |
-
value=DEFAULT_TEMPERATURE)
|
597 |
-
top_P_input = gr.Slider(label="top_P", minimum=0.1, maximum=0.9, step=0.05, value=DEFAULT_TOP_P)
|
598 |
-
top_K_input = gr.Slider(label="top_K", minimum=1, maximum=20, step=1, value=DEFAULT_TOP_K)
|
599 |
-
# reset 按钮
|
600 |
-
reset_button = gr.Button("重置")
|
601 |
|
602 |
-
|
603 |
-
|
604 |
-
|
605 |
-
|
606 |
-
|
607 |
-
stream_select = gr.Radio(label="流输出方式",
|
608 |
-
info="真流式为实验功能,播放效果:卡播卡播卡播(⏳🎵⏳🎵⏳🎵);伪流式为分段推理后输出,播放效果:卡卡卡播播播播(⏳⏳🎵🎵🎵🎵)。伪流式批次建议4以上减少卡顿",
|
609 |
-
choices=[("真", "real"), ("伪", "fake")], value="fake", interactive=True, )
|
610 |
|
611 |
-
|
612 |
-
|
613 |
-
|
614 |
-
|
615 |
-
autoplay=True,
|
616 |
-
# disable auto play for Windows, due to https://developer.chrome.com/blog/autoplay#webaudio
|
617 |
-
interactive=False,
|
618 |
-
show_label=True)
|
619 |
|
620 |
-
|
621 |
-
|
622 |
-
|
|
|
|
|
|
|
623 |
|
|
|
624 |
|
625 |
-
|
626 |
-
|
627 |
-
|
628 |
-
|
629 |
-
|
630 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
631 |
}
|
632 |
-
|
633 |
-
|
634 |
-
|
635 |
-
|
636 |
-
|
637 |
-
|
638 |
-
|
639 |
-
|
640 |
-
def do_style_select(x):
|
641 |
-
if x == "小说朗读":
|
642 |
-
return [4, 0, 0, 2]
|
643 |
-
elif x == "对话":
|
644 |
-
return [5, 5, 1, 4]
|
645 |
-
elif x == "中英混合":
|
646 |
-
return [4, 1, 0, 3]
|
647 |
-
else:
|
648 |
-
return [DEFAULT_SPEED, DEFAULT_ORAL, DEFAULT_LAUGH, DEFAULT_BK]
|
649 |
-
|
650 |
-
|
651 |
-
# style_select 选择
|
652 |
-
style_select.change(
|
653 |
-
do_style_select,
|
654 |
-
inputs=style_select,
|
655 |
-
outputs=[speed_input, oral_input, laugh_input, bk_input]
|
656 |
-
)
|
657 |
-
|
658 |
-
# refine 按钮
|
659 |
-
refine_button.click(
|
660 |
-
generate_refine,
|
661 |
-
inputs=[text_file_input, oral_input, laugh_input, bk_input, temperature_input, top_P_input, top_K_input],
|
662 |
-
outputs=text_file_input
|
663 |
-
)
|
664 |
-
# 重置按钮 重置温度等参数
|
665 |
-
reset_button.click(
|
666 |
-
lambda: [0.3, 0.7, 20],
|
667 |
-
inputs=None,
|
668 |
-
outputs=[temperature_input, top_P_input, top_K_input]
|
669 |
-
)
|
670 |
-
|
671 |
-
generate_button.click(
|
672 |
-
fn=generate_tts_audio,
|
673 |
-
inputs=[
|
674 |
-
text_file_input,
|
675 |
-
num_seeds_input,
|
676 |
-
seed_input,
|
677 |
-
speed_input,
|
678 |
-
oral_input,
|
679 |
-
laugh_input,
|
680 |
-
bk_input,
|
681 |
-
min_length_input,
|
682 |
-
batch_size_input,
|
683 |
-
temperature_input,
|
684 |
-
top_P_input,
|
685 |
-
top_K_input,
|
686 |
-
roleid_input,
|
687 |
-
refine_text_input,
|
688 |
-
speaker_stat,
|
689 |
-
pt_input
|
690 |
-
],
|
691 |
-
outputs=[output_audio]
|
692 |
-
)
|
693 |
-
|
694 |
-
generate_button_stream.click(
|
695 |
-
fn=generate_tts_audio_stream,
|
696 |
-
inputs=[
|
697 |
-
text_file_input,
|
698 |
-
num_seeds_input,
|
699 |
-
seed_input,
|
700 |
-
speed_input,
|
701 |
-
oral_input,
|
702 |
-
laugh_input,
|
703 |
-
bk_input,
|
704 |
-
min_length_input,
|
705 |
-
batch_size_input,
|
706 |
-
temperature_input,
|
707 |
-
top_P_input,
|
708 |
-
top_K_input,
|
709 |
-
roleid_input,
|
710 |
-
refine_text_input,
|
711 |
-
speaker_stat,
|
712 |
-
pt_input,
|
713 |
-
stream_select
|
714 |
-
],
|
715 |
-
outputs=[output_audio_stream]
|
716 |
-
)
|
717 |
-
|
718 |
-
break_button.click(
|
719 |
-
inser_token,
|
720 |
-
inputs=[text_file_input, break_button],
|
721 |
-
outputs=text_file_input
|
722 |
-
)
|
723 |
-
|
724 |
-
laugh_button.click(
|
725 |
-
inser_token,
|
726 |
-
inputs=[text_file_input, laugh_button],
|
727 |
-
outputs=text_file_input
|
728 |
-
)
|
729 |
-
|
730 |
-
with gr.Tab("角色扮演"):
|
731 |
-
def txt_2_script(text):
|
732 |
-
lines = text.split("\n")
|
733 |
-
data = []
|
734 |
-
for line in lines:
|
735 |
-
if not line.strip():
|
736 |
-
continue
|
737 |
-
parts = line.split("::")
|
738 |
-
if len(parts) != 2:
|
739 |
-
continue
|
740 |
-
data.append({
|
741 |
-
"character": parts[0],
|
742 |
-
"txt": parts[1]
|
743 |
-
})
|
744 |
-
return data
|
745 |
-
|
746 |
-
|
747 |
-
def script_2_txt(data):
|
748 |
-
assert isinstance(data, list)
|
749 |
-
result = []
|
750 |
-
for item in data:
|
751 |
-
txt = item['txt'].replace('\n', ' ')
|
752 |
-
result.append(f"{item['character']}::{txt}")
|
753 |
-
return "\n".join(result)
|
754 |
-
|
755 |
-
|
756 |
-
def get_characters(lines):
|
757 |
-
assert isinstance(lines, list)
|
758 |
-
characters = list([_["character"] for _ in lines])
|
759 |
-
unique_characters = list(dict.fromkeys(characters))
|
760 |
-
print([[character, 0] for character in unique_characters])
|
761 |
-
return [[character, 0, 5, 2, 0, 4] for character in unique_characters]
|
762 |
-
|
763 |
-
|
764 |
-
def get_txt_characters(text):
|
765 |
-
return get_characters(txt_2_script(text))
|
766 |
-
|
767 |
-
|
768 |
-
def llm_change(model):
|
769 |
-
llm_setting = {
|
770 |
-
"gpt-3.5-turbo-0125": ["https://api.openai.com/v1"],
|
771 |
-
"gpt-4o": ["https://api.openai.com/v1"],
|
772 |
-
"deepseek-chat": ["https://api.deepseek.com"],
|
773 |
-
"yi-large": ["https://api.lingyiwanwu.com/v1"]
|
774 |
-
}
|
775 |
-
if model in llm_setting:
|
776 |
-
return llm_setting[model][0]
|
777 |
-
else:
|
778 |
-
gr.Error("Model not found.")
|
779 |
-
return None
|
780 |
-
|
781 |
-
|
782 |
-
def ai_script_generate(model, api_base, api_key, text, progress=gr.Progress(track_tqdm=True)):
|
783 |
-
from llm_utils import llm_operation
|
784 |
-
from config import LLM_PROMPT
|
785 |
-
scripts = llm_operation(api_base, api_key, model, LLM_PROMPT, text, required_keys=["txt", "character"])
|
786 |
-
return script_2_txt(scripts)
|
787 |
-
|
788 |
-
@spaces.GPU(duration=120)
|
789 |
-
def generate_script_audio(text, models_seeds, progress=gr.Progress()):
|
790 |
-
scripts = txt_2_script(text) # 将文本转换为剧本
|
791 |
-
characters = get_characters(scripts) # 从剧本中提取角色
|
792 |
-
|
793 |
-
#
|
794 |
-
import pandas as pd
|
795 |
-
from collections import defaultdict
|
796 |
-
import itertools
|
797 |
-
from tts_model import generate_audio_for_seed
|
798 |
-
from utils import combine_audio, save_audio, normalize_zh
|
799 |
-
|
800 |
-
assert isinstance(models_seeds, pd.DataFrame)
|
801 |
-
|
802 |
-
# 批次处理函数
|
803 |
-
def batch(iterable, batch_size):
|
804 |
-
it = iter(iterable)
|
805 |
-
while True:
|
806 |
-
batch = list(itertools.islice(it, batch_size))
|
807 |
-
if not batch:
|
808 |
-
break
|
809 |
-
yield batch
|
810 |
-
print('1')
|
811 |
-
column_mapping = {
|
812 |
-
'角色': 'character',
|
813 |
-
'种子': 'seed',
|
814 |
-
'语速': 'speed',
|
815 |
-
'口语': 'oral',
|
816 |
-
'笑声': 'laugh',
|
817 |
-
'停顿': 'break'
|
818 |
-
}
|
819 |
-
# 使用 rename 方法重命名 DataFrame 的列
|
820 |
-
models_seeds = models_seeds.rename(columns=column_mapping).to_dict(orient='records')
|
821 |
-
# models_seeds = models_seeds.to_dict(orient='records')
|
822 |
-
print('2')
|
823 |
-
# 检查每个角色是否都有对应的种子
|
824 |
-
print(models_seeds)
|
825 |
-
seed_lookup = {seed['character']: seed for seed in models_seeds}
|
826 |
|
827 |
character_seeds = {}
|
828 |
missing_seeds = []
|
@@ -886,154 +605,438 @@ with gr.Blocks() as demo:
|
|
886 |
return save_audio(fname, audio)
|
887 |
|
888 |
|
889 |
-
script_example = {
|
890 |
-
"lines": [{
|
891 |
-
"txt": "在一个风和日丽的下午,小红帽准备去森林里看望她的奶奶。",
|
892 |
-
"character": "旁白"
|
893 |
-
}, {
|
894 |
-
"txt": "小红帽说",
|
895 |
-
"character": "旁白"
|
896 |
-
}, {
|
897 |
-
"txt": "我要给奶奶带点好吃的。",
|
898 |
-
"character": "年轻女性"
|
899 |
-
}, {
|
900 |
-
"txt": "在森林里,小红帽遇到了狡猾的大灰狼。",
|
901 |
-
"character": "旁白"
|
902 |
-
}, {
|
903 |
-
"txt": "大灰狼说",
|
904 |
-
"character": "旁白"
|
905 |
-
}, {
|
906 |
-
"txt": "小红帽,你的篮子里装的是什么?",
|
907 |
-
"character": "中年男性"
|
908 |
-
}, {
|
909 |
-
"txt": "小红帽回答",
|
910 |
-
"character": "旁白"
|
911 |
-
}, {
|
912 |
-
"txt": "这是给奶奶的蛋糕和果酱。",
|
913 |
-
"character": "年轻女性"
|
914 |
-
}, {
|
915 |
-
"txt": "大灰狼心生一计,决定先到奶奶家等待小红帽。",
|
916 |
-
"character": "旁白"
|
917 |
-
}, {
|
918 |
-
"txt": "当小红帽到达奶奶家时,她发现大灰狼伪装成了奶奶。",
|
919 |
-
"character": "旁白"
|
920 |
-
}, {
|
921 |
-
"txt": "小红帽疑惑的问",
|
922 |
-
"character": "旁白"
|
923 |
-
}, {
|
924 |
-
"txt": "奶奶,你的耳朵怎么这么尖?",
|
925 |
-
"character": "年轻女性"
|
926 |
-
}, {
|
927 |
-
"txt": "大灰狼慌张地回答",
|
928 |
-
"character": "旁白"
|
929 |
-
}, {
|
930 |
-
"txt": "哦,这是为了更好地听你说话。",
|
931 |
-
"character": "中年男性"
|
932 |
-
}, {
|
933 |
-
"txt": "小红帽越发觉得不对劲,最终发现了大灰狼的诡计。",
|
934 |
-
"character": "旁白"
|
935 |
-
}, {
|
936 |
-
"txt": "她大声呼救,森林里的猎人听到后赶来救了她和奶奶。",
|
937 |
-
"character": "旁白"
|
938 |
-
}, {
|
939 |
-
"txt": "从此,小红帽再也没有单独进入森林,而是和家人一起去看望奶奶。",
|
940 |
-
"character": "旁白"
|
941 |
-
}]
|
942 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
943 |
|
944 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
945 |
|
946 |
-
|
947 |
-
|
948 |
-
|
949 |
-
gr.Markdown("""
|
950 |
-
为确保生成效果稳定,仅支持与 GPT-4 相当的模型,推荐使用 4o yi-large deepseek。
|
951 |
-
如果没有反应,请检查日志中的错误信息。如果提示格式错误,请重试几次。国内模型可能会受到风控影响,建议更换文本内容后再试。
|
952 |
|
953 |
-
申请渠道(免费额度):
|
954 |
|
955 |
-
|
956 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
957 |
|
958 |
-
""")
|
959 |
-
# 申请渠道
|
960 |
|
961 |
-
|
962 |
-
|
963 |
-
|
964 |
-
value="gpt-4o", interactive=True, )
|
965 |
-
with gr.Row(equal_height=True):
|
966 |
-
openai_api_base_input = gr.Textbox(label="OpenAI API Base URL",
|
967 |
-
placeholder="请输入API Base URL",
|
968 |
-
value=r"https://api.openai.com/v1")
|
969 |
-
openai_api_key_input = gr.Textbox(label="OpenAI API Key", placeholder="请输入API Key",
|
970 |
-
value="sk-xxxxxxx", type="password")
|
971 |
-
# AI提示词
|
972 |
-
ai_text_input = gr.Textbox(label="剧情简介或者一段故事", placeholder="请输入文本...", lines=2,
|
973 |
-
value=ai_text_default)
|
974 |
|
975 |
-
# 生成脚本的按钮
|
976 |
-
ai_script_generate_button = gr.Button("AI脚本生成")
|
977 |
|
978 |
-
|
979 |
-
|
980 |
-
|
981 |
-
|
982 |
-
|
983 |
-
|
|
|
|
|
|
|
984 |
|
985 |
-
script_text_input = gr.Textbox(label="脚本格式 “角色::文本 一行为一句” 注意是::",
|
986 |
-
placeholder="请输入文本...",
|
987 |
-
lines=12, value=script_text)
|
988 |
-
script_translate_button = gr.Button("步骤①:提取角色")
|
989 |
|
990 |
-
|
991 |
-
|
992 |
-
|
993 |
-
|
994 |
-
|
995 |
-
|
996 |
-
["年轻女性", 2, 5, 2, 0, 2],
|
997 |
-
["中年男性", 2424, 5, 2, 0, 2]
|
998 |
-
]
|
999 |
|
1000 |
-
|
1001 |
-
|
1002 |
-
|
1003 |
-
|
1004 |
-
|
1005 |
-
|
1006 |
-
|
1007 |
-
|
1008 |
-
|
1009 |
-
|
1010 |
-
|
1011 |
-
|
1012 |
|
1013 |
-
|
1014 |
-
|
1015 |
-
|
1016 |
-
|
1017 |
-
|
1018 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1019 |
)
|
1020 |
-
|
1021 |
-
|
1022 |
-
|
1023 |
-
inputs=[
|
1024 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1025 |
)
|
1026 |
-
|
1027 |
-
|
1028 |
-
|
1029 |
-
inputs=[
|
1030 |
-
outputs=
|
1031 |
)
|
1032 |
-
|
1033 |
-
|
1034 |
-
|
1035 |
-
inputs=[
|
1036 |
-
outputs=
|
1037 |
)
|
1038 |
|
|
|
|
|
1039 |
demo.launch(share=args.share, inbrowser=True)
|
|
|
445 |
# </div>
|
446 |
# """)
|
447 |
gr.Markdown("# Deployed by [chattts.dev](https://chattts.dev?refer=hf-story-telling)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
448 |
|
449 |
+
with gr.Tab("角色扮演"):
|
450 |
+
def txt_2_script(text):
|
451 |
+
lines = text.split("\n")
|
452 |
+
data = []
|
453 |
+
for line in lines:
|
454 |
+
if not line.strip():
|
455 |
+
continue
|
456 |
+
parts = line.split("::")
|
457 |
+
if len(parts) != 2:
|
458 |
+
continue
|
459 |
+
data.append({
|
460 |
+
"character": parts[0],
|
461 |
+
"txt": parts[1]
|
462 |
+
})
|
463 |
+
return data
|
464 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
465 |
|
466 |
+
def script_2_txt(data):
|
467 |
+
assert isinstance(data, list)
|
468 |
+
result = []
|
469 |
+
for item in data:
|
470 |
+
txt = item['txt'].replace('\n', ' ')
|
471 |
+
result.append(f"{item['character']}::{txt}")
|
472 |
+
return "\n".join(result)
|
473 |
|
|
|
|
|
|
|
474 |
|
475 |
+
def get_characters(lines):
|
476 |
+
assert isinstance(lines, list)
|
477 |
+
characters = list([_["character"] for _ in lines])
|
478 |
+
unique_characters = list(dict.fromkeys(characters))
|
479 |
+
print([[character, 0] for character in unique_characters])
|
480 |
+
return [[character, 0, 5, 2, 0, 4] for character in unique_characters]
|
481 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
482 |
|
483 |
+
def get_txt_characters(text):
|
484 |
+
return get_characters(txt_2_script(text))
|
|
|
485 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
486 |
|
487 |
+
def llm_change(model):
|
488 |
+
llm_setting = {
|
489 |
+
"gpt-3.5-turbo-0125": ["https://api.openai.com/v1"],
|
490 |
+
"gpt-4o": ["https://api.openai.com/v1"],
|
491 |
+
"deepseek-chat": ["https://api.deepseek.com"],
|
492 |
+
"yi-large": ["https://api.lingyiwanwu.com/v1"]
|
493 |
+
}
|
494 |
+
if model in llm_setting:
|
495 |
+
return llm_setting[model][0]
|
496 |
+
else:
|
497 |
+
gr.Error("Model not found.")
|
498 |
+
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
499 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
500 |
|
501 |
+
def ai_script_generate(model, api_base, api_key, text, progress=gr.Progress(track_tqdm=True)):
|
502 |
+
from llm_utils import llm_operation
|
503 |
+
from config import LLM_PROMPT
|
504 |
+
scripts = llm_operation(api_base, api_key, model, LLM_PROMPT, text, required_keys=["txt", "character"])
|
505 |
+
return script_2_txt(scripts)
|
|
|
|
|
|
|
506 |
|
507 |
+
@spaces.GPU(duration)
|
508 |
+
def generate_script_audio(text, models_seeds, progress=gr.Progress()):
|
509 |
+
scripts = txt_2_script(text) # 将文本转换为剧本
|
510 |
+
characters = get_characters(scripts) # 从剧本中提取角色
|
|
|
|
|
|
|
|
|
511 |
|
512 |
+
#
|
513 |
+
import pandas as pd
|
514 |
+
from collections import defaultdict
|
515 |
+
import itertools
|
516 |
+
from tts_model import generate_audio_for_seed
|
517 |
+
from utils import combine_audio, save_audio, normalize_zh
|
518 |
|
519 |
+
assert isinstance(models_seeds, pd.DataFrame)
|
520 |
|
521 |
+
# 批次处理函数
|
522 |
+
def batch(iterable, batch_size):
|
523 |
+
it = iter(iterable)
|
524 |
+
while True:
|
525 |
+
batch = list(itertools.islice(it, batch_size))
|
526 |
+
if not batch:
|
527 |
+
break
|
528 |
+
yield batch
|
529 |
+
print('1')
|
530 |
+
column_mapping = {
|
531 |
+
'角色': 'character',
|
532 |
+
'种子': 'seed',
|
533 |
+
'语速': 'speed',
|
534 |
+
'口语': 'oral',
|
535 |
+
'笑声': 'laugh',
|
536 |
+
'停顿': 'break'
|
537 |
}
|
538 |
+
# 使用 rename 方法重命名 DataFrame 的列
|
539 |
+
models_seeds = models_seeds.rename(columns=column_mapping).to_dict(orient='records')
|
540 |
+
# models_seeds = models_seeds.to_dict(orient='records')
|
541 |
+
print('2')
|
542 |
+
# 检查每个角色是否都有对应的种子
|
543 |
+
print(models_seeds)
|
544 |
+
seed_lookup = {seed['character']: seed for seed in models_seeds}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
545 |
|
546 |
character_seeds = {}
|
547 |
missing_seeds = []
|
|
|
605 |
return save_audio(fname, audio)
|
606 |
|
607 |
|
608 |
+
script_example = {
|
609 |
+
"lines": [{
|
610 |
+
"txt": "在一个风和日丽的下午,小红帽准备去森林里看望她的奶奶。",
|
611 |
+
"character": "旁白"
|
612 |
+
}, {
|
613 |
+
"txt": "小红帽说",
|
614 |
+
"character": "旁白"
|
615 |
+
}, {
|
616 |
+
"txt": "我要给奶奶带点好吃的。",
|
617 |
+
"character": "年轻女性"
|
618 |
+
}, {
|
619 |
+
"txt": "在森林里,小红帽遇到了狡猾的大灰狼。",
|
620 |
+
"character": "旁白"
|
621 |
+
}, {
|
622 |
+
"txt": "大灰狼说",
|
623 |
+
"character": "旁白"
|
624 |
+
}, {
|
625 |
+
"txt": "小红帽,你的篮子里装的是什么?",
|
626 |
+
"character": "中年男性"
|
627 |
+
}, {
|
628 |
+
"txt": "小红帽回答",
|
629 |
+
"character": "旁白"
|
630 |
+
}, {
|
631 |
+
"txt": "这是给奶奶的蛋糕和果酱。",
|
632 |
+
"character": "年轻女性"
|
633 |
+
}, {
|
634 |
+
"txt": "大灰狼心生一计,决定先到奶奶家等待小红帽。",
|
635 |
+
"character": "旁白"
|
636 |
+
}, {
|
637 |
+
"txt": "当小红帽到达奶奶家时,她发现大灰狼伪装成了奶奶。",
|
638 |
+
"character": "旁白"
|
639 |
+
}, {
|
640 |
+
"txt": "小红帽疑惑的问",
|
641 |
+
"character": "旁白"
|
642 |
+
}, {
|
643 |
+
"txt": "奶奶,你的耳朵怎么这么尖?",
|
644 |
+
"character": "年轻女性"
|
645 |
+
}, {
|
646 |
+
"txt": "大灰狼慌张地回答",
|
647 |
+
"character": "旁白"
|
648 |
+
}, {
|
649 |
+
"txt": "哦,这是为了更好地听你说话。",
|
650 |
+
"character": "中年男性"
|
651 |
+
}, {
|
652 |
+
"txt": "小红帽越发觉得不对劲,最终发现了大灰狼的诡计。",
|
653 |
+
"character": "旁白"
|
654 |
+
}, {
|
655 |
+
"txt": "她大声呼救,森林里的猎人听到后赶来救了她和奶奶。",
|
656 |
+
"character": "旁白"
|
657 |
+
}, {
|
658 |
+
"txt": "从此,小红帽再也没有单独进入森林,而是和家人一起去看望奶奶。",
|
659 |
+
"character": "旁白"
|
660 |
+
}]
|
661 |
+
}
|
662 |
+
|
663 |
+
ai_text_default = "武侠小说《花木兰大战周树人》 要符合人物背景"
|
664 |
+
|
665 |
+
with gr.Row(equal_height=True):
|
666 |
+
with gr.Column(scale=2):
|
667 |
+
gr.Markdown("### AI脚本")
|
668 |
+
gr.Markdown("""
|
669 |
+
为确保生成效果稳定,仅支持与 GPT-4 相当的模型,推荐使用 4o yi-large deepseek。
|
670 |
+
如果没有反应,请检查日志中的错误信息。如果提示格式错误,请重试几次。国内模型可能会受到风控影响,建议更换文本内容后再试。
|
671 |
+
|
672 |
+
申请渠道(免费额度):
|
673 |
+
|
674 |
+
- [https://platform.deepseek.com/](https://platform.deepseek.com/)
|
675 |
+
- [https://platform.lingyiwanwu.com/](https://platform.lingyiwanwu.com/)
|
676 |
+
|
677 |
+
""")
|
678 |
+
# 申请渠道
|
679 |
+
|
680 |
+
with gr.Row(equal_height=True):
|
681 |
+
# 选择模型 只有 gpt4o deepseek-chat yi-large 三个选项
|
682 |
+
model_select = gr.Radio(label="选择模型", choices=["gpt-4o", "deepseek-chat", "yi-large"],
|
683 |
+
value="gpt-4o", interactive=True, )
|
684 |
+
with gr.Row(equal_height=True):
|
685 |
+
openai_api_base_input = gr.Textbox(label="OpenAI API Base URL",
|
686 |
+
placeholder="请输入API Base URL",
|
687 |
+
value=r"https://api.openai.com/v1")
|
688 |
+
openai_api_key_input = gr.Textbox(label="OpenAI API Key", placeholder="请输入API Key",
|
689 |
+
value="sk-xxxxxxx", type="password")
|
690 |
+
# AI提示词
|
691 |
+
ai_text_input = gr.Textbox(label="剧情简介或者一段故事", placeholder="请输入文本...", lines=2,
|
692 |
+
value=ai_text_default)
|
693 |
+
|
694 |
+
# 生成脚本的按钮
|
695 |
+
ai_script_generate_button = gr.Button("AI脚本生成")
|
696 |
+
|
697 |
+
with gr.Column(scale=3):
|
698 |
+
gr.Markdown("### 脚本")
|
699 |
+
gr.Markdown(
|
700 |
+
"脚本可以手工编写也可以从左侧的AI脚本生成按钮生成。脚本格式 **角色::文本** 一行为一句” 注意是::")
|
701 |
+
script_text = "\n".join(
|
702 |
+
[f"{_.get('character', '')}::{_.get('txt', '')}" for _ in script_example['lines']])
|
703 |
+
|
704 |
+
script_text_input = gr.Textbox(label="脚本格式 “角色::文本 一行为一句” 注意是::",
|
705 |
+
placeholder="请输入文本...",
|
706 |
+
lines=12, value=script_text)
|
707 |
+
script_translate_button = gr.Button("步骤①:提取角色")
|
708 |
+
|
709 |
+
with gr.Column(scale=1):
|
710 |
+
gr.Markdown("### 角色种子")
|
711 |
+
# DataFrame 来存放转换后的脚本
|
712 |
+
# 默认数据 [speed_5][oral_2][laugh_0][break_4]
|
713 |
+
default_data = [
|
714 |
+
["旁白", 2222, 3, 0, 0, 2],
|
715 |
+
["年轻女性", 2, 5, 2, 0, 2],
|
716 |
+
["中年男性", 2424, 5, 2, 0, 2]
|
717 |
+
]
|
718 |
+
|
719 |
+
script_data = gr.DataFrame(
|
720 |
+
value=default_data,
|
721 |
+
label="角色对应的音色种子,从抽卡那获取",
|
722 |
+
headers=["角色", "种子", "语速", "口语", "笑声", "停顿"],
|
723 |
+
datatype=["str", "number", "number", "number", "number", "number"],
|
724 |
+
interactive=True,
|
725 |
+
col_count=(6, "fixed"),
|
726 |
+
)
|
727 |
+
# 生视频按钮
|
728 |
+
script_generate_audio = gr.Button("步骤②:生成音频")
|
729 |
+
# 输出的脚本音频
|
730 |
+
script_audio = gr.Audio(label="AI生成的音频", interactive=False)
|
731 |
+
|
732 |
+
# 脚本相关事件
|
733 |
+
# 脚本转换
|
734 |
+
script_translate_button.click(
|
735 |
+
get_txt_characters,
|
736 |
+
inputs=[script_text_input],
|
737 |
+
outputs=script_data
|
738 |
+
)
|
739 |
+
# 处理模型切换
|
740 |
+
model_select.change(
|
741 |
+
llm_change,
|
742 |
+
inputs=[model_select],
|
743 |
+
outputs=[openai_api_base_input]
|
744 |
+
)
|
745 |
+
# AI脚本生成
|
746 |
+
ai_script_generate_button.click(
|
747 |
+
ai_script_generate,
|
748 |
+
inputs=[model_select, openai_api_base_input, openai_api_key_input, ai_text_input],
|
749 |
+
outputs=[script_text_input]
|
750 |
+
)
|
751 |
+
# 音频生成
|
752 |
+
script_generate_audio.click(
|
753 |
+
generate_script_audio,
|
754 |
+
inputs=[script_text_input, script_data],
|
755 |
+
outputs=[script_audio]
|
756 |
+
)
|
757 |
+
|
758 |
+
with gr.Tab("音色抽卡"):
|
759 |
+
with gr.Row():
|
760 |
+
with gr.Column(scale=1):
|
761 |
+
texts = [
|
762 |
+
"四川美食确实以辣闻名,但也有不辣的选择。比如甜水面、赖汤圆、蛋烘糕、叶儿粑等,这些小吃口味温和,甜而不腻,也很受欢迎。",
|
763 |
+
"我是一个充满活力的人,喜欢运动,喜欢旅行,喜欢尝试新鲜事物。我喜欢挑战自己,不断突破自己的极限,让自己变得更加强大。",
|
764 |
+
"罗森宣布将于7月24日退市,在华门店超6000家!",
|
765 |
+
]
|
766 |
+
# gr.Markdown("### 随机音色抽卡")
|
767 |
+
# gr.Markdown("""
|
768 |
+
# 免抽卡,直接找稳定音色👇
|
769 |
+
|
770 |
+
# [ModelScope ChatTTS Speaker(国内)](https://modelscope.cn/studios/ttwwwaa/ChatTTS_Speaker) | [HuggingFace ChatTTS Speaker(国外)](https://huggingface.co/spaces/taa/ChatTTS_Speaker)
|
771 |
+
|
772 |
+
# 在相同的 seed 和 温度等参数下,音色具有一定的一致性。点击下面的“随机音色生成”按钮将生成多个 seed。找到满意的音色后,点击音频下方“保存”按钮。
|
773 |
+
# **注意:不同机器使用相同种子生成的音频音色可能不同,同一机器使用相同种子多次生成的音频音色也可能变化。**
|
774 |
+
# """)
|
775 |
+
input_text = gr.Textbox(label="测试文本",
|
776 |
+
info="**每行文本**都会生成一段音频,最终输出的音频是将这些音频段合成后的结果。建议使用**多行文本**进行测试,以确保音色稳定性。",
|
777 |
+
lines=4, placeholder="请输入文本...", value='\n'.join(texts))
|
778 |
+
|
779 |
+
num_seeds = gr.Slider(minimum=1, maximum=max_audio_components, step=1, label="seed生成数量",
|
780 |
+
value=num_seeds_default)
|
781 |
+
|
782 |
+
generate_button = gr.Button("随机音色抽卡🎲", variant="primary")
|
783 |
+
|
784 |
+
# 保存的种子
|
785 |
+
gr.Markdown("### 种子管理界面")
|
786 |
+
seed_list = gr.DataFrame(
|
787 |
+
label="种子列表",
|
788 |
+
headers=["Index", "Seed", "Name", "Path"],
|
789 |
+
datatype=["number", "number", "str", "str"],
|
790 |
+
interactive=True,
|
791 |
+
col_count=(4, "fixed"),
|
792 |
+
value=display_seeds
|
793 |
+
)
|
794 |
+
|
795 |
+
with gr.Row():
|
796 |
+
refresh_button = gr.Button("刷新")
|
797 |
+
save_button = gr.Button("保存")
|
798 |
+
del_button = gr.Button("删除")
|
799 |
+
play_button = gr.Button("试听")
|
800 |
+
|
801 |
+
with gr.Row():
|
802 |
+
# 添加已保存的种子音频播放组件
|
803 |
+
audio_player = gr.Audio(label="播放已保存种子音频", visible=False)
|
804 |
+
|
805 |
+
# 绑定按钮和函数
|
806 |
+
refresh_button.click(display_seeds, outputs=seed_list)
|
807 |
+
seed_list.select(seed_change).success(seed_change_btn, outputs=[del_button, play_button])
|
808 |
+
save_button.click(do_save_seeds, inputs=[seed_list], outputs=None)
|
809 |
+
del_button.click(do_delete_seed, inputs=del_button, outputs=seed_list)
|
810 |
+
play_button.click(do_play_seed, inputs=play_button, outputs=audio_player)
|
811 |
+
|
812 |
+
with gr.Column(scale=1):
|
813 |
+
audio_components = []
|
814 |
+
for i in range(max_audio_components):
|
815 |
+
visible = i < num_seeds_default
|
816 |
+
a = gr.Audio(f"Audio {i}", visible=visible)
|
817 |
+
t = gr.Button(f"Seed", visible=visible)
|
818 |
+
s = gr.State(value=None)
|
819 |
+
t.click(do_save_seed, inputs=[t, s], outputs=None).success(display_seeds, outputs=seed_list)
|
820 |
+
audio_components.append(a)
|
821 |
+
audio_components.append(t)
|
822 |
+
audio_components.append(s)
|
823 |
+
|
824 |
+
num_seeds.change(update_audio_components, inputs=num_seeds, outputs=audio_components)
|
825 |
+
# output = gr.Column()
|
826 |
+
# audio = gr.Audio(label="Output Audio")
|
827 |
+
|
828 |
+
generate_button.click(
|
829 |
+
audio_interface_empty,
|
830 |
+
inputs=[num_seeds, input_text],
|
831 |
+
outputs=audio_components
|
832 |
+
).success(audio_interface, inputs=[num_seeds, input_text], outputs=audio_components)
|
833 |
+
with gr.Tab("长音频生成"):
|
834 |
+
with gr.Row():
|
835 |
+
with gr.Column():
|
836 |
+
gr.Markdown("### 文本")
|
837 |
+
# gr.Markdown("请上传要转换的文本文件(.txt 格式)。")
|
838 |
+
# text_file_input = gr.File(label="文本文件", file_types=[".txt"])
|
839 |
+
default_text = "四川美食确实以辣闻名,但也有不辣的选择。比如甜水面、赖汤圆、蛋烘糕、叶儿粑等,这些小吃口味温和,甜而不腻,也很受欢迎。"
|
840 |
+
text_file_input = gr.Textbox(label=f"朗读文本(字数: {len(default_text)})", lines=4,
|
841 |
+
placeholder="Please Input Text...", value=default_text)
|
842 |
+
# 当文本框内容发生变化时调用 update_label 函数
|
843 |
+
text_file_input.change(update_label, inputs=text_file_input, outputs=text_file_input)
|
844 |
+
# 加入停顿按钮
|
845 |
+
with gr.Row():
|
846 |
+
break_button = gr.Button("+停顿", variant="secondary")
|
847 |
+
laugh_button = gr.Button("+笑声", variant="secondary")
|
848 |
+
refine_button = gr.Button("Refine Text(预处理 加入停顿词、笑声等)", variant="secondary")
|
849 |
+
|
850 |
+
with gr.Column():
|
851 |
+
gr.Markdown("### 配置参数")
|
852 |
+
with gr.Row():
|
853 |
+
with gr.Column():
|
854 |
+
gr.Markdown("音色选择")
|
855 |
+
num_seeds_input = gr.Number(label="生成音频的数量", value=1, precision=0, visible=False)
|
856 |
+
speaker_stat = gr.State(value="seed")
|
857 |
+
tab_seed = gr.Tab(label="种子")
|
858 |
+
with tab_seed:
|
859 |
+
with gr.Row():
|
860 |
+
seed_input = gr.Number(label="指定种子", info="种子决定音色 0则随机", value=None,
|
861 |
+
precision=0)
|
862 |
+
generate_audio_seed = gr.Button("\U0001F3B2")
|
863 |
+
tab_roleid = gr.Tab(label="内置音色")
|
864 |
+
with tab_roleid:
|
865 |
+
roleid_input = gr.Dropdown(label="内置音色",
|
866 |
+
choices=[("发姐", "1"),
|
867 |
+
("纯情男大学生", "2"),
|
868 |
+
("阳光开朗大男孩", "3"),
|
869 |
+
("知心小姐姐", "4"),
|
870 |
+
("电视台女主持", "5"),
|
871 |
+
("魅力大叔", "6"),
|
872 |
+
("优雅甜美", "7"),
|
873 |
+
("贴心男宝2", "21"),
|
874 |
+
("正式打工人", "8"),
|
875 |
+
("贴心男宝1", "9")],
|
876 |
+
value="1",
|
877 |
+
info="选择音色后会覆盖种子。感谢 @QuantumDriver 提供音色")
|
878 |
+
tab_pt = gr.Tab(label="上传.PT文件")
|
879 |
+
with tab_pt:
|
880 |
+
pt_input = gr.File(label="上传音色文件", file_types=[".pt"], height=100)
|
881 |
+
|
882 |
+
with gr.Row():
|
883 |
+
style_select = gr.Radio(label="预设参数", info="语速部分可自行更改",
|
884 |
+
choices=["小说朗读", "对话", "中英混合", "默认"], value="默认",
|
885 |
+
interactive=True, )
|
886 |
+
with gr.Row():
|
887 |
+
# refine
|
888 |
+
refine_text_input = gr.Checkbox(label="Refine",
|
889 |
+
info="打开后会自动根据下方参数添加笑声/停顿等。关闭后可自行添加 [uv_break] [laugh] 或者点击下方 Refin按钮先行转换",
|
890 |
+
value=True)
|
891 |
+
speed_input = gr.Slider(label="语速", minimum=1, maximum=10, value=DEFAULT_SPEED, step=1)
|
892 |
+
with gr.Row():
|
893 |
+
oral_input = gr.Slider(label="口语化", minimum=0, maximum=9, value=DEFAULT_ORAL, step=1)
|
894 |
+
laugh_input = gr.Slider(label="笑声", minimum=0, maximum=2, value=DEFAULT_LAUGH, step=1)
|
895 |
+
bk_input = gr.Slider(label="停顿", minimum=0, maximum=7, value=DEFAULT_BK, step=1)
|
896 |
+
# gr.Markdown("### 文本参数")
|
897 |
+
with gr.Row():
|
898 |
+
min_length_input = gr.Number(label="文本分段长度", info="大于这个数值进行分段",
|
899 |
+
value=DEFAULT_SEG_LENGTH, precision=0)
|
900 |
+
batch_size_input = gr.Number(label="批大小", info="越高越快 太高爆显存 4G推荐3 其他酌情",
|
901 |
+
value=DEFAULT_BATCH_SIZE, precision=0)
|
902 |
+
with gr.Accordion("其他参数", open=False):
|
903 |
+
with gr.Row():
|
904 |
+
# 温度 top_P top_K
|
905 |
+
temperature_input = gr.Slider(label="温度", minimum=0.01, maximum=1.0, step=0.01,
|
906 |
+
value=DEFAULT_TEMPERATURE)
|
907 |
+
top_P_input = gr.Slider(label="top_P", minimum=0.1, maximum=0.9, step=0.05, value=DEFAULT_TOP_P)
|
908 |
+
top_K_input = gr.Slider(label="top_K", minimum=1, maximum=20, step=1, value=DEFAULT_TOP_K)
|
909 |
+
# reset 按钮
|
910 |
+
reset_button = gr.Button("重置")
|
911 |
+
|
912 |
+
with gr.Row():
|
913 |
+
with gr.Column():
|
914 |
+
generate_button = gr.Button("生成音频", variant="primary")
|
915 |
+
with gr.Column():
|
916 |
+
generate_button_stream = gr.Button("流式生成音频(一边播放一边推理)", variant="primary")
|
917 |
+
stream_select = gr.Radio(label="流输出方式",
|
918 |
+
info="真流式为实验功能,播放效果:卡播卡播卡播(⏳🎵⏳🎵⏳🎵);伪流式为分段推理后输出,播放效果:卡卡卡播播播播(⏳⏳🎵🎵🎵🎵)。伪流式批次建议4以上减少卡顿",
|
919 |
+
choices=[("真", "real"), ("伪", "fake")], value="fake", interactive=True, )
|
920 |
|
921 |
+
with gr.Row():
|
922 |
+
output_audio = gr.Audio(label="生成的音频文件")
|
923 |
+
output_audio_stream = gr.Audio(label="流式音频", value=None,
|
924 |
+
streaming=True,
|
925 |
+
autoplay=True,
|
926 |
+
# disable auto play for Windows, due to https://developer.chrome.com/blog/autoplay#webaudio
|
927 |
+
interactive=False,
|
928 |
+
show_label=True)
|
929 |
|
930 |
+
generate_audio_seed.click(generate_seed,
|
931 |
+
inputs=[],
|
932 |
+
outputs=seed_input)
|
|
|
|
|
|
|
933 |
|
|
|
934 |
|
935 |
+
def do_tab_change(evt: gr.SelectData):
|
936 |
+
print(evt.selected, evt.index, evt.value, evt.target)
|
937 |
+
kv = {
|
938 |
+
"种子": "seed",
|
939 |
+
"内置音色": "role",
|
940 |
+
"上传.PT文件": "pt"
|
941 |
+
}
|
942 |
+
return kv.get(evt.value, "seed")
|
943 |
|
|
|
|
|
944 |
|
945 |
+
tab_seed.select(do_tab_change, outputs=speaker_stat)
|
946 |
+
tab_roleid.select(do_tab_change, outputs=speaker_stat)
|
947 |
+
tab_pt.select(do_tab_change, outputs=speaker_stat)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
948 |
|
|
|
|
|
949 |
|
950 |
+
def do_style_select(x):
|
951 |
+
if x == "小说朗读":
|
952 |
+
return [4, 0, 0, 2]
|
953 |
+
elif x == "对话":
|
954 |
+
return [5, 5, 1, 4]
|
955 |
+
elif x == "中英混合":
|
956 |
+
return [4, 1, 0, 3]
|
957 |
+
else:
|
958 |
+
return [DEFAULT_SPEED, DEFAULT_ORAL, DEFAULT_LAUGH, DEFAULT_BK]
|
959 |
|
|
|
|
|
|
|
|
|
960 |
|
961 |
+
# style_select 选择
|
962 |
+
style_select.change(
|
963 |
+
do_style_select,
|
964 |
+
inputs=style_select,
|
965 |
+
outputs=[speed_input, oral_input, laugh_input, bk_input]
|
966 |
+
)
|
|
|
|
|
|
|
967 |
|
968 |
+
# refine 按钮
|
969 |
+
refine_button.click(
|
970 |
+
generate_refine,
|
971 |
+
inputs=[text_file_input, oral_input, laugh_input, bk_input, temperature_input, top_P_input, top_K_input],
|
972 |
+
outputs=text_file_input
|
973 |
+
)
|
974 |
+
# 重置按钮 重置温度等参数
|
975 |
+
reset_button.click(
|
976 |
+
lambda: [0.3, 0.7, 20],
|
977 |
+
inputs=None,
|
978 |
+
outputs=[temperature_input, top_P_input, top_K_input]
|
979 |
+
)
|
980 |
|
981 |
+
generate_button.click(
|
982 |
+
fn=generate_tts_audio,
|
983 |
+
inputs=[
|
984 |
+
text_file_input,
|
985 |
+
num_seeds_input,
|
986 |
+
seed_input,
|
987 |
+
speed_input,
|
988 |
+
oral_input,
|
989 |
+
laugh_input,
|
990 |
+
bk_input,
|
991 |
+
min_length_input,
|
992 |
+
batch_size_input,
|
993 |
+
temperature_input,
|
994 |
+
top_P_input,
|
995 |
+
top_K_input,
|
996 |
+
roleid_input,
|
997 |
+
refine_text_input,
|
998 |
+
speaker_stat,
|
999 |
+
pt_input
|
1000 |
+
],
|
1001 |
+
outputs=[output_audio]
|
1002 |
)
|
1003 |
+
|
1004 |
+
generate_button_stream.click(
|
1005 |
+
fn=generate_tts_audio_stream,
|
1006 |
+
inputs=[
|
1007 |
+
text_file_input,
|
1008 |
+
num_seeds_input,
|
1009 |
+
seed_input,
|
1010 |
+
speed_input,
|
1011 |
+
oral_input,
|
1012 |
+
laugh_input,
|
1013 |
+
bk_input,
|
1014 |
+
min_length_input,
|
1015 |
+
batch_size_input,
|
1016 |
+
temperature_input,
|
1017 |
+
top_P_input,
|
1018 |
+
top_K_input,
|
1019 |
+
roleid_input,
|
1020 |
+
refine_text_input,
|
1021 |
+
speaker_stat,
|
1022 |
+
pt_input,
|
1023 |
+
stream_select
|
1024 |
+
],
|
1025 |
+
outputs=[output_audio_stream]
|
1026 |
)
|
1027 |
+
|
1028 |
+
break_button.click(
|
1029 |
+
inser_token,
|
1030 |
+
inputs=[text_file_input, break_button],
|
1031 |
+
outputs=text_file_input
|
1032 |
)
|
1033 |
+
|
1034 |
+
laugh_button.click(
|
1035 |
+
inser_token,
|
1036 |
+
inputs=[text_file_input, laugh_button],
|
1037 |
+
outputs=text_file_input
|
1038 |
)
|
1039 |
|
1040 |
+
|
1041 |
+
|
1042 |
demo.launch(share=args.share, inbrowser=True)
|