vietTTS / app.py
ntt123's picture
Create app.py
cd75eda
raw
history blame
1.08 kB
from pathlib import Path
from vietTTS.hifigan.mel2wave import mel2wave
from vietTTS.nat.text2mel import text2mel
from vietTTS import nat_normalize_text
import numpy as np
import os
os.system("gdown --id 16UhN8QBxG1YYwUh8smdEeVnKo9qZhvZj -O duration_latest_ckpt.pickle")
os.system("gdown --id 1-8Ig65S3irNHSzcskT37SLgeyuUhjKdj -O acoustic_latest_ckpt.pickle")
os.system("gdown --id 19cRNDC6IrHFAAE4U9I7K0mzLMgPsi5zb -O hk_hifi.pickle")
def text_to_speech(text):
text = nat_normalize_text(text)
mel = text2mel(
text,
"lexicon.txt",
0.2,
Path("acoustic_latest_ckpt.pickle"),
Path("duration_latest_ckpt.pickle")
)
wave = mel2wave(mel, Path("config.json"), Path("hk_hifi.pickle"))
return (wave * (2**15)).astype(np.int16)
import gradio as gr
def speak(text):
y = text_to_speech(text)
return 16_000, y
title = "vietTTS"
description = "A vietnamese text-to-speech demo."
iface = gr.Interface(
fn=speak,
inputs="text",
outputs="audio",
title = title,
description=description
)
iface.launch()