speechbrain
/

tts-tacotron2-ljspeech

speech-synthesis

Model card Files Files and versions Community

tts-tacotron2-ljspeech / hyperparams.yaml

Mirco's picture

model upload

cd2eec6 over 2 years ago

2.19 kB

	# ################################
	# Model: Tacotroon2 for TTS
	# Authors: Artem Ploujnikov, Yingzhi Wang
	# ################################

	mask_padding: True
	n_mel_channels: 80
	n_symbols: 148
	symbols_embedding_dim: 512
	encoder_kernel_size: 5
	encoder_n_convolutions: 3
	encoder_embedding_dim: 512
	attention_rnn_dim: 1024
	attention_dim: 128
	attention_location_n_filters: 32
	attention_location_kernel_size: 31
	n_frames_per_step: 1
	decoder_rnn_dim: 1024
	prenet_dim: 256
	max_decoder_steps: 1000
	gate_threshold: 0.5
	p_attention_dropout: 0.1
	p_decoder_dropout: 0.1
	postnet_embedding_dim: 512
	postnet_kernel_size: 5
	postnet_n_convolutions: 5
	decoder_no_early_stopping: False

	# Model
	model: !new:speechbrain.lobes.models.Tacotron2.Tacotron2
	mask_padding: !ref <mask_padding>
	n_mel_channels: !ref <n_mel_channels>
	# symbols
	n_symbols: !ref <n_symbols>
	symbols_embedding_dim: !ref <symbols_embedding_dim>
	# encoder
	encoder_kernel_size: !ref <encoder_kernel_size>
	encoder_n_convolutions: !ref <encoder_n_convolutions>
	encoder_embedding_dim: !ref <encoder_embedding_dim>
	# attention
	attention_rnn_dim: !ref <attention_rnn_dim>
	attention_dim: !ref <attention_dim>
	# attention location
	attention_location_n_filters: !ref <attention_location_n_filters>
	attention_location_kernel_size: !ref <attention_location_kernel_size>
	# decoder
	n_frames_per_step: !ref <n_frames_per_step>
	decoder_rnn_dim: !ref <decoder_rnn_dim>
	prenet_dim: !ref <prenet_dim>
	max_decoder_steps: !ref <max_decoder_steps>
	gate_threshold: !ref <gate_threshold>
	p_attention_dropout: !ref <p_attention_dropout>
	p_decoder_dropout: !ref <p_decoder_dropout>
	# postnet
	postnet_embedding_dim: !ref <postnet_embedding_dim>
	postnet_kernel_size: !ref <postnet_kernel_size>
	postnet_n_convolutions: !ref <postnet_n_convolutions>
	decoder_no_early_stopping: !ref <decoder_no_early_stopping>

	# Function that converts the text into a sequence of valid characters.
	text_to_sequence: !name:recipes.LJSpeech.TTS.tacotron2.text_to_sequence.text_to_sequence

	modules:
	model: !ref <model>

	pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
	loadables:
	model: !ref <model>