Spaces:
Runtime error
Runtime error
from engine import Piper | |
import tempfile | |
from typing import Optional | |
from TTS.config import load_config | |
import gradio as gr | |
import numpy as np | |
import os | |
import json | |
from TTS.utils.manage import ModelManager | |
from TTS.utils.synthesizer import Synthesizer | |
MAX_TXT_LEN = 100 | |
SPEAKERS = ['f_cen_05', 'f_cen_81', 'f_occ_31', 'f_occ_de', 'f_sep_31', 'm_cen_08', 'm_occ_44', 'm_val_89'] | |
def carrega_bsc(): | |
model_path = os.getcwd() + "/models/bsc/best_model.pth" | |
config_path = os.getcwd() + "/models/bsc/config.json" | |
speakers_file_path = os.getcwd() + "/models/bsc/speakers.pth" | |
vocoder_path = None | |
vocoder_config_path = None | |
synthesizer = Synthesizer( | |
model_path, config_path, speakers_file_path, None, vocoder_path, vocoder_config_path, | |
) | |
return synthesizer | |
def carrega_collectivat(): | |
model_path = os.getcwd() + "/models/collectivat/fast-speech_best_model.pth" | |
config_path = os.getcwd() + "/models/collectivat/fast-speech_config.json" | |
vocoder_path = os.getcwd() + "/models/collectivat/ljspeech--hifigan_v2_model_file.pth" | |
vocoder_config_path = os.getcwd() + "/models/collectivat/ljspeech--hifigan_v2_config.json" | |
synthesizer = Synthesizer( | |
model_path, config_path, None, None, vocoder_path, vocoder_config_path | |
) | |
return synthesizer | |
model_bsc = carrega_bsc() | |
SPEAKERS = model_bsc.speakers | |
model_collectivat = carrega_collectivat() | |
def tts(text, speaker_idx): | |
if len(text) > MAX_TXT_LEN: | |
text = text[:MAX_TXT_LEN] | |
print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.") | |
print(text) | |
speakers_maping_path = os.getcwd() + "/speaker_map.json" | |
# Map speaker aliases to speaker ids | |
with open(speakers_maping_path, 'r') as fp: | |
maping = json.load(fp) | |
#speaker_idx = maping[speaker_idx] | |
# synthesize | |
wavs = model_bsc.tts(text, speaker_idx) | |
# return output | |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: | |
model_bsc.save_wav(wavs, fp) | |
return fp.name | |
description=""" | |
1️⃣ Introdueix el text a sintetitzar. | |
2️⃣ Selecciona una veu en el desplegable. | |
3️⃣ Gaudeix! | |
""" | |
article= "" | |
iface = gr.Interface( | |
fn=tts, | |
inputs=[ | |
gr.inputs.Textbox( | |
label="Text", | |
default="L'Èlia i l'Alí a l'aula. L'oli i l'ou. Lulú olorava la lila.", | |
), | |
gr.inputs.Dropdown(label="Selecciona un parlant", choices=SPEAKERS, default=None) | |
], | |
outputs=gr.outputs.Audio(label="Output",type="filepath"), | |
title="🗣️ TTS Català Multi Parlant - VITS 🗣️", | |
description=description, | |
article=article, | |
allow_flagging="never", | |
layout="vertical", | |
live=False | |
) | |
iface.launch(share=False) | |