File size: 3,207 Bytes
91e1c8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import tempfile
from typing import Optional
from TTS.config import load_config
import gradio as gr
import numpy as np
from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer


MODELS = {}
SPEAKERS = {}
MAX_TXT_LEN = 100


manager = ModelManager()
MODEL_NAMES = manager.list_tts_models()

# filter out multi-speaker models and slow wavegrad vocoders
filters = ["vctk", "your_tts", "ek1"]
MODEL_NAMES = [model_name for model_name in MODEL_NAMES if not any(f in model_name for f in filters)]

EN = [el for el in MODEL_NAMES if "/en/" in el]
OTHER = [el for el in MODEL_NAMES if "/en/" not in el]
EN[0], EN[5] = EN[5], EN[0]
MODEL_NAMES = EN + OTHER

# reorder models
print(MODEL_NAMES)


def tts(text: str, model_name: str):
    if len(text) > MAX_TXT_LEN:
        text = text[:MAX_TXT_LEN]
        print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
    print(text, model_name)
    # download model
    model_path, config_path, model_item = manager.download_model(model_name)
    vocoder_name: Optional[str] = model_item["default_vocoder"]
    # download vocoder
    vocoder_path = None
    vocoder_config_path = None
    if vocoder_name is not None:
        vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
    # init synthesizer
    synthesizer = Synthesizer(
        model_path, config_path, None, None, vocoder_path, vocoder_config_path,
    )
    # synthesize
    if synthesizer is None:
        raise NameError("model not found")
    wavs = synthesizer.tts(text, None)
    # return output
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
        synthesizer.save_wav(wavs, fp)
        return fp.name


title = """<h1 align="center">🐸💬 CoquiTTS Playground </h1>"""

with gr.Blocks(analytics_enabled=False) as demo:
    with gr.Row():
        with gr.Column():
            gr.Markdown(
                """
                ## <img src="https://huggingface.co/spaces/proxectonos/README/resolve/main/title-card.png" width="100%" style="border-radius: 0.75rem;">
                """
            )
            gr.Markdown(
            """
            <br/>
            💻 Este space mostra algúns dos modelos TTS desenvolvidos polo **[Proxecto Nós](https://huggingface.co/proxectonos)**.
            <br/>
            """
            )

    with gr.Row():
        with gr.Column():
            input_text = gr.inputs.Textbox(
                label="Input Text",
                default="This sentence has been generated by a speech synthesis system.",
            )
            model_select = gr.inputs.Dropdown(
                label="Pick Model: tts_models/<language>/<dataset>/<model_name>",
                choices=MODEL_NAMES,
                default="tts_models/en/jenny/jenny"
            )
            tts_button = gr.Button("Send", elem_id="send-btn", visible=True)

        with gr.Column():
            output_audio = gr.outputs.Audio(label="Output", type="filepath")

    tts_button.click(
        tts,
        inputs=[
            input_text,
            model_select,
        ],
        outputs=[output_audio],
    )

demo.queue(concurrency_count=16).launch(debug=True)