Spaces:

balacoon
/

tts

Running

File size: 3,408 Bytes

53f8a32
 
 
 
 
 
5b5d4c0
 
53f8a32
 
 
 
 
9846d74
53f8a32
 
 
98923fe
9846d74
 
 
5339f1e
9846d74
 
 
53f8a32
 
 
 
 
 
 
 
 
 
 
80ee0e5
53f8a32
 
 
0f11bd1
 
53f8a32
 
fbe7d93
53f8a32
 
 
fbe7d93
babf22d
37e87fa
53f8a32
 
 
 
fbe7d93
53f8a32
 
 
 
 
 
 
 
ef55711
53f8a32
 
 
2d5fa2d
53f8a32
2d5fa2d
53f8a32
 
 
 
fbe7d93
53f8a32
fbe7d93
53f8a32
 
 
 
 
 
 
 
 
 
 
 
 
6ebe60a
 
53f8a32
e0cceab
53f8a32
 
 
4263bcd
53f8a32

"""
Copyright 2022 Balacoon

TTS interactive demo
"""

import os
import glob
import logging
from typing import cast

import gradio as gr
from balacoon_tts import TTS
from huggingface_hub import hf_hub_download, list_repo_files

# global tts module, initialized from a model selected
tts = None
model_repo_dir = "data"
for name in list_repo_files(repo_id="balacoon/tts"):
    hf_hub_download(
        repo_id="balacoon/tts",
        filename=name,
        local_dir=model_repo_dir,
    )


def main():
    logging.basicConfig(level=logging.INFO)

    with gr.Blocks() as demo:
        gr.Markdown(
            """
            <h1 align="center">Balacoon🦝 Text-to-Speech</h1>

            1. Write an utterance to generate,
            2. Select the model to synthesize with
            3. Select speaker
            4. Hit "Generate" and listen to the result!

            You can learn more about models available
            [here](https://huggingface.co/balacoon/tts).
            Visit [Balacoon website](https://balacoon.com/) for more info.
            """
        )
        with gr.Row(variant="panel"):
            text = gr.Textbox(label="Text", placeholder="Type something here...")

        with gr.Row():
            with gr.Column(variant="panel"):
                repo_files = os.listdir(model_repo_dir)
                model_files = [x for x in repo_files if x.endswith("_cpu.addon")]
                model_name = gr.Dropdown(
                    label="Model",
                    choices=model_files,
                )
            with gr.Column(variant="panel"):
                speaker = gr.Dropdown(label="Speaker", choices=[])

            def set_model(model_name_str: str):
                """
                gets value from `model_name`, loads model,
                re-initializes tts object, gets list of
                speakers that model supports and set them to `speaker`
                """
                model_path = os.path.join(model_repo_dir, model_name_str)
                global tts
                tts = TTS(model_path)
                speakers = tts.get_speakers()
                value = speakers[-1]
                return gr.Dropdown.update(
                    choices=speakers, value=value, visible=True
                )

            model_name.change(set_model, inputs=model_name, outputs=speaker)

        with gr.Row(variant="panel"):
            generate = gr.Button("Generate")
        with gr.Row(variant="panel"):
            audio = gr.Audio()

        def synthesize_audio(text_str: str, speaker_str: str = ""):
            """
            gets utterance to synthesize from `text` Textbox
            and speaker name from `speaker` dropdown list.
            speaker name might be empty for single-speaker models.
            Synthesizes the waveform and updates `audio` with it.
            """
            if not text_str:
                logging.info("text or speaker are not provided")
                return None
            global tts
            if len(text_str) > 1024:
                text_str = text_str[:1024]
            samples = cast(TTS, tts).synthesize(text_str, speaker_str)
            return gr.Audio.update(value=(cast(TTS, tts).get_sampling_rate(), samples))

        generate.click(synthesize_audio, inputs=[text, speaker], outputs=audio)

    demo.queue(concurrency_count=1).launch()


if __name__ == "__main__":
    main()