import gradio as gr import numpy as np import torch import os import re_matching from tools.sentence import split_by_language, sentence_split import utils from infer import infer, latest_version, get_net_g import gradio as gr import webbrowser from config import config from tools.translate import translate from webui import reload_javascript device = config.webui_config.device if device == "mps": os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" def generate_audio( slices, sdp_ratio, noise_scale, noise_scale_w, length_scale, speaker, language, ): audio_list = [] silence = np.zeros(hps.data.sampling_rate // 2, dtype=np.int16) with torch.no_grad(): for piece in slices: audio = infer( piece, sdp_ratio=sdp_ratio, noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale, sid=speaker, language=language, hps=hps, net_g=net_g, device=device, ) audio16bit = gr.processing_utils.convert_to_16_bit_wav(audio) audio_list.append(audio16bit) audio_list.append(silence) # 将静音添加到列表中 return audio_list def speak( text: str, speaker="TalkFlower_CNzh", sdp_ratio=0.2, # SDP/DP混合比 noise_scale=0.6, # 感情 noise_scale_w=0.6, # 音素长度 length_scale=0.9, # 语速 language="ZH" ): print(text) audio_list = [] audio_list.extend( generate_audio( text.split("|"), sdp_ratio, noise_scale, noise_scale_w, length_scale, speaker, language, ) ) audio_concat = np.concatenate(audio_list) return (hps.data.sampling_rate, audio_concat) with open("./css/style.css", "r", encoding="utf-8") as f: customCSS = f.read() with gr.Blocks(css=customCSS) as demo: # talkingFlowerModel = gr.HTML("""
123
""") talkingFlowerPic = gr.HTML("""TalkingFlowerPic""", elem_id="talking_flower_pic") input_text = gr.Textbox(lines=1, label="Talking Flower will say:", elem_classes="wonder-card", elem_id="input_text") speak_button = gr.Button("Speak!", elem_id="comfirm_button", elem_classes="button wonder-card") audio_output = gr.Audio(label="输出音频", show_label=False, autoplay=True, elem_id="audio_output", elem_classes="wonder-card") input_text.submit( speak, inputs=[input_text], outputs=[audio_output] ) speak_button.click( speak, inputs=[input_text], outputs=[audio_output] ) if __name__ == "__main__": hps = utils.get_hparams_from_file(config.webui_config.config_path) version = hps.version if hasattr(hps, "version") else latest_version net_g = get_net_g( model_path=config.webui_config.model, version=version, device=device, hps=hps ) reload_javascript() demo.queue().launch( allowed_paths=["./assets"], show_api=False, # server_name=server_name, # server_port=server_port, share=True, inbrowser=True, # 禁止在docker下开启inbrowser )