File size: 1,478 Bytes
96094ed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d16d90
96094ed
 
 
 
 
 
 
 
 
 
 
 
 
4a0b304
96094ed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82471a3
96094ed
5c9ecd1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import torch

from transformers import pipeline

import numpy as np
import gradio as gr

def _grab_best_device(use_gpu=True):
    if torch.cuda.device_count() > 0 and use_gpu:
        device = "cuda"
    else:
        device = "cpu"
    return device

device = _grab_best_device()

HUB_PATH = "ylacombe/vits_vctk_welsh_male"
pipe = pipeline("text-to-speech", model=HUB_PATH, device=0)

title = "# 🐶 VITS"

description = """

"""

num_speakers = pipe.model.config.num_speakers

# Inference
def generate_audio(text, spkr_id):

    forward_params = {"spkr_id": spkr_id}
    output = pipe(text, forward_params=forward_params)
    
    return (output["sampling_rate"], output["audio"].squeeze())


# Gradio blocks demo    
with gr.Blocks() as demo_blocks:
    gr.Markdown(title)
    gr.Markdown(description)
    with gr.Row():
        with gr.Column():
            inp_text = gr.Textbox(label="Input Text", info="What would you like bark to synthesise?")
            spkr = gr.Dropdown(
                    [i for i in range(num_speakers)],
                    value=None, 
                    label="Speaker ID", 
                    info="Default: Unconditional Generation"
                    )
            btn = gr.Button("Generate Audio!")
    
        with gr.Column():
            out_audio_vocos = gr.Audio(type="numpy", autoplay=False, label="Generated Audio", show_label=True)
    
    btn.click(generate_audio, [inp_text, spkr], [out_audio_vocos])

demo_blocks.launch()