File size: 1,319 Bytes
b65385c
 
 
 
4275815
12e4911
b65385c
 
 
 
 
 
 
 
 
 
 
12e4911
b65385c
 
 
 
 
 
 
 
 
 
 
 
 
 
32a15fc
b65385c
 
 
 
 
 
f1c1a96
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
from transformers import VitsModel, AutoTokenizer
import soundfile as sf
import tempfile
import gradio as gr
import torch
from datetime import datetime


tts_model = VitsModel.from_pretrained("facebook/mms-tts-pol")
tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-pol")




title = "Polish Text-To-Speech model based on facebook/mms-tts-pol"

def tts(text: str, language: str):
    print(f"When: {datetime.today().strftime('%Y-%m-%d %H:%M:%S')} :", text)
    inputs = tokenizer(text, return_tensors="pt")
    with torch.no_grad():
        output = tts_model(**inputs).waveform.squeeze().numpy()
    sf.write('temp_file.wav', output, tts_model.config.sampling_rate)
    return 'temp_file.wav'



with gr.Blocks() as blocks:
    gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>"
                + title
                + "</h1>")
    with gr.Row():# equal_height=False
        with gr.Column():# variant="panel"
            textbox = gr.Textbox(label="Input", value = "Cześć, co chciałbyś abym Ci przeczytał?")
            with gr.Row():# mobile_collapse=False
                submit = gr.Button("Submit", variant="primary")
        audio = gr.Audio('temp_file.wav', label="Generated Audio (wav)", type='filepath', autoplay=False)
    submit.click(tts, textbox, audio)


blocks.launch()