File size: 1,320 Bytes
8582fd7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import torch
import torchaudio
from speechbrain.inference.TTS import Tacotron2
from speechbrain.inference.vocoders import HIFIGAN
from speechbrain.inference.TTS import MSTacotron2
#%%

def TTS(INPUT_TEXT: object,CHOİCE:object) -> object:
    ms_tacotron2 = MSTacotron2.from_hparams(source="speechbrain/tts-mstacotron2-libritts", savedir="pretrained_models/tts-mstacotron2-libritts")
    hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-libritts-22050Hz", savedir="pretrained_models/tts-hifigan-libritts-22050Hz")

    if CHOİCE == "Female":
        tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmpdir_tts")
        hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
        mel_output, mel_length, alignment = tacotron2.encode_text(INPUT_TEXT)
        waveforms = hifi_gan.decode_batch(mel_output)
        torchaudio.save('Output/base-TTS.wav',waveforms.squeeze(1), 22050)
    elif CHOİCE == "Male":
        REFERENCE_SPEECH = "Voice Samples/natural_m.wav"
        mel_outputs, mel_lengths, alignments = ms_tacotron2.clone_voice(INPUT_TEXT, REFERENCE_SPEECH)
        waveforms = hifi_gan.decode_batch(mel_outputs)
        torchaudio.save("Output/base-TTS.wav", waveforms[0], 22050)