Spaces:
Runtime error
Runtime error
File size: 2,759 Bytes
7ae4987 7f0cc16 7514dcc 41b7aed 7514dcc b8920a0 7514dcc b8920a0 7514dcc 7f0cc16 7514dcc 7f0cc16 7514dcc 7f0cc16 7514dcc 7f0cc16 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
from engine import Piper
import tempfile
from typing import Optional
from TTS.config import load_config
import gradio as gr
import numpy as np
import os
import json
from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer
MAX_TXT_LEN = 100
SPEAKERS = ['f_cen_05', 'f_cen_81', 'f_occ_31', 'f_occ_de', 'f_sep_31', 'm_cen_08', 'm_occ_44', 'm_val_89']
def carrega_bsc():
model_path = os.getcwd() + "/models/bsc/best_model.pth"
config_path = os.getcwd() + "/models/bsc/config.json"
speakers_file_path = os.getcwd() + "/models/bsc/speakers.pth"
vocoder_path = None
vocoder_config_path = None
synthesizer = Synthesizer(
model_path, config_path, speakers_file_path, None, vocoder_path, vocoder_config_path,
)
return synthesizer
def carrega_collectivat():
model_path = os.getcwd() + "/models/collectivat/fast-speech_best_model.pth"
config_path = os.getcwd() + "/models/collectivat/fast-speech_config.json"
vocoder_path = os.getcwd() + "/models/collectivat/ljspeech--hifigan_v2_model_file.pth"
vocoder_config_path = os.getcwd() + "/models/collectivat/ljspeech--hifigan_v2_config.json"
synthesizer = Synthesizer(
model_path, config_path, None, None, vocoder_path, vocoder_config_path
)
return synthesizer
model_bsc = carrega_bsc()
SPEAKERS = model_bsc.speakers
model_collectivat = carrega_collectivat()
def tts(text, speaker_idx):
if len(text) > MAX_TXT_LEN:
text = text[:MAX_TXT_LEN]
print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
print(text)
speakers_maping_path = os.getcwd() + "/speaker_map.json"
# Map speaker aliases to speaker ids
with open(speakers_maping_path, 'r') as fp:
maping = json.load(fp)
#speaker_idx = maping[speaker_idx]
# synthesize
wavs = model_bsc.tts(text, speaker_idx)
# return output
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
model_bsc.save_wav(wavs, fp)
return fp.name
description="""
1️⃣ Introdueix el text a sintetitzar.
2️⃣ Selecciona una veu en el desplegable.
3️⃣ Gaudeix!
"""
article= ""
iface = gr.Interface(
fn=tts,
inputs=[
gr.inputs.Textbox(
label="Text",
default="L'Èlia i l'Alí a l'aula. L'oli i l'ou. Lulú olorava la lila.",
),
gr.inputs.Dropdown(label="Selecciona un parlant", choices=SPEAKERS, default=None)
],
outputs=gr.outputs.Audio(label="Output",type="filepath"),
title="🗣️ TTS Català Multi Parlant - VITS 🗣️",
description=description,
article=article,
allow_flagging="never",
layout="vertical",
live=False
)
iface.launch(share=False)
|