Spaces:
Running
Running
import os | |
from TTS.utils.download import download_url | |
from TTS.utils.synthesizer import Synthesizer | |
import gradio as gr | |
import tempfile | |
import torch | |
import json | |
from TTS.tts.utils.synthesis import synthesis | |
from TTS.tts.configs.vits_config import VitsConfig | |
from TTS.tts.models.vits import Vits, VitsCharacters | |
from TTS.tts.utils.text.tokenizer import TTSTokenizer | |
import numpy as np | |
from TTS.utils.audio.numpy_transforms import save_wav | |
MAX_TXT_LEN = 800 | |
BASE_DIR = "kbd-vits-tts-{}" | |
MALE_MODEL_URL = "https://huggingface.co/anzorq/kbd-vits-tts-male/resolve/main/checkpoint_56000.pth" | |
MALE_CONFIG_URL = "https://huggingface.co/anzorq/kbd-vits-tts-male/resolve/main/config_35000.json" | |
FEMALE_MODEL_URL = "https://huggingface.co/anzorq/kbd-vits-tts-female/resolve/main/best_model_56351.pth" | |
FEMALE_CONFIG_URL = "https://huggingface.co/anzorq/kbd-vits-tts-female/resolve/main/config.json" | |
MALE_ONNX_MODEL_URL = "https://huggingface.co/anzorq/kbd-vits-tts-male/resolve/main/onnx/kbd_vits_male.onnx" | |
FEMALE_ONNX_MODEL_URL = "https://huggingface.co/anzorq/kbd-vits-tts-female/resolve/main/onnx/kbd_vits_female.onnx" | |
def download_model_and_config(gender): | |
dir_path = BASE_DIR.format(gender) | |
if not os.path.exists(dir_path): | |
os.makedirs(dir_path) | |
model_url = MALE_MODEL_URL if gender == "male" else FEMALE_MODEL_URL | |
config_url = MALE_CONFIG_URL if gender == "male" else FEMALE_CONFIG_URL | |
onnx_model_url = MALE_ONNX_MODEL_URL if gender == "male" else FEMALE_ONNX_MODEL_URL | |
download_url(model_url, dir_path, "model.pth") | |
download_url(config_url, dir_path, "config.json") | |
download_url(onnx_model_url, dir_path, "model.onnx") | |
return dir_path | |
download_model_and_config("male") | |
download_model_and_config("female") | |
def tts(text: str, voice: str = "Male", use_onnx: bool = True): | |
if len(text) > MAX_TXT_LEN: | |
text = text[:MAX_TXT_LEN] | |
print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.") | |
model_dir = BASE_DIR.format("male" if voice == "Male" else "female") | |
config_file = f"{model_dir}/config.json" | |
text = text.replace("I", "ำ") # Replace capital "I" with "Palochka" symbol | |
text = text.lower() | |
if use_onnx: | |
onnx_model_path = f"{model_dir}/model.onnx" | |
config = VitsConfig() | |
config.load_json(config_file) | |
tokenizer = TTSTokenizer( | |
use_phonemes=False, | |
text_cleaner=config.text_cleaner, | |
characters=VitsCharacters(), | |
phonemizer=None, | |
add_blank=config.add_blank, | |
) | |
vits = Vits.init_from_config(config) | |
vits.load_onnx(onnx_model_path) | |
text_inputs = np.asarray( | |
vits.tokenizer.text_to_ids(text), | |
dtype=np.int64, | |
)[None, :] | |
audio = vits.inference_onnx(text_inputs) | |
# Create a temporary WAV file | |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: | |
out_path = temp_file.name | |
save_wav(wav=audio[0], path=out_path, sample_rate=24000) | |
else: | |
# Synthesize | |
synthesizer = Synthesizer(f"{model_dir}/model.pth", config_file) | |
wavs = synthesizer.tts(text) | |
# Create a temporary WAV file | |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: | |
out_path = temp_file.name | |
synthesizer.save_wav(wavs, out_path) | |
return out_path | |
iface = gr.Interface( | |
fn=tts, | |
inputs=[ | |
gr.Textbox( | |
label="Text", | |
value="ะะฐัั ัััั?", | |
), | |
gr.Radio( | |
choices=["Male", "Female"], | |
value="Male", | |
label="Voice" | |
), | |
gr.Checkbox( | |
label="Use ONNX", | |
value=True, | |
), | |
], | |
outputs=gr.Audio(label="Output", type='filepath'), | |
title="KBD TTS", | |
live=False | |
) | |
iface.launch(share=False) |