ccoreilly's picture
load collectivat
b8920a0
raw
history blame
2.76 kB
from engine import Piper
import tempfile
from typing import Optional
from TTS.config import load_config
import gradio as gr
import numpy as np
import os
import json
from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer
MAX_TXT_LEN = 100
SPEAKERS = ['f_cen_05', 'f_cen_81', 'f_occ_31', 'f_occ_de', 'f_sep_31', 'm_cen_08', 'm_occ_44', 'm_val_89']
def carrega_bsc():
model_path = os.getcwd() + "/models/bsc/best_model.pth"
config_path = os.getcwd() + "/models/bsc/config.json"
speakers_file_path = os.getcwd() + "/models/bsc/speakers.pth"
vocoder_path = None
vocoder_config_path = None
synthesizer = Synthesizer(
model_path, config_path, speakers_file_path, None, vocoder_path, vocoder_config_path,
)
return synthesizer
def carrega_collectivat():
model_path = os.getcwd() + "/models/collectivat/fast-speech_best_model.pth"
config_path = os.getcwd() + "/models/collectivat/fast-speech_config.json"
vocoder_path = os.getcwd() + "/models/collectivat/ljspeech--hifigan_v2_model_file.pth"
vocoder_config_path = os.getcwd() + "/models/collectivat/ljspeech--hifigan_v2_config.json"
synthesizer = Synthesizer(
model_path, config_path, None, None, vocoder_path, vocoder_config_path
)
return synthesizer
model_bsc = carrega_bsc()
SPEAKERS = model_bsc.speakers
model_collectivat = carrega_collectivat()
def tts(text, speaker_idx):
if len(text) > MAX_TXT_LEN:
text = text[:MAX_TXT_LEN]
print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
print(text)
speakers_maping_path = os.getcwd() + "/speaker_map.json"
# Map speaker aliases to speaker ids
with open(speakers_maping_path, 'r') as fp:
maping = json.load(fp)
#speaker_idx = maping[speaker_idx]
# synthesize
wavs = model_bsc.tts(text, speaker_idx)
# return output
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
model_bsc.save_wav(wavs, fp)
return fp.name
description="""
1️⃣ Introdueix el text a sintetitzar.
2️⃣ Selecciona una veu en el desplegable.
3️⃣ Gaudeix!
"""
article= ""
iface = gr.Interface(
fn=tts,
inputs=[
gr.inputs.Textbox(
label="Text",
default="L'Èlia i l'Alí a l'aula. L'oli i l'ou. Lulú olorava la lila.",
),
gr.inputs.Dropdown(label="Selecciona un parlant", choices=SPEAKERS, default=None)
],
outputs=gr.outputs.Audio(label="Output",type="filepath"),
title="🗣️ TTS Català Multi Parlant - VITS 🗣️",
description=description,
article=article,
allow_flagging="never",
layout="vertical",
live=False
)
iface.launch(share=False)