|
# python3 -m espnet2.bin.gan_tts_train --collect_stats true --write_collected_feats false --use_preprocessor true --token_type char --token_list dump/token_list/char/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/train_nodev/text,text,text --train_data_path_and_name_and_type dump/raw/train_nodev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/train_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/train_dev/wav.scp,speech,sound --train_shape_file exp-vits-lr-3e-4/tts_stats_raw_char/logdir/train.2.scp --valid_shape_file exp-vits-lr-3e-4/tts_stats_raw_char/logdir/valid.2.scp --output_dir exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.2 --config conf/train_vits.yaml --feats_extract fbank --feats_extract_conf n_fft=1024 --feats_extract_conf hop_length=256 --feats_extract_conf win_length=null --feats_extract_conf fs=22050 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=22050 --pitch_extract_conf n_fft=1024 --pitch_extract_conf hop_length=256 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=22050 --energy_extract_conf n_fft=1024 --energy_extract_conf hop_length=256 --energy_extract_conf win_length=null --train_data_path_and_name_and_type dump/raw/train_nodev/utt2sid,sids,text_int --valid_data_path_and_name_and_type dump/raw/train_dev/utt2sid,sids,text_int --use_wandb true --wandb_project GROTTS --wandb_name VITS_lr_3.0e-4 --init_param downloads/espnet/kan-bayashi_ljspeech_vits/exp/tts_train_vits_raw_phn_tacotron_g2p_en_no_space/train.total_count.ave_10best.pth:tts:tts:tts.generator.text_encoder,tts.generator.posterior_encoder.input_conv --batch_size 40 --batch_bins 10000000 |
|
# Started at Fri Dec 1 15:58:34 UTC 2023 |
|
# |
|
/data2/p280965/tts/espnet/tools/venv/bin/python3 /data2/p280965/tts/espnet/espnet2/bin/gan_tts_train.py --collect_stats true --write_collected_feats false --use_preprocessor true --token_type char --token_list dump/token_list/char/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/train_nodev/text,text,text --train_data_path_and_name_and_type dump/raw/train_nodev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/train_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/train_dev/wav.scp,speech,sound --train_shape_file exp-vits-lr-3e-4/tts_stats_raw_char/logdir/train.2.scp --valid_shape_file exp-vits-lr-3e-4/tts_stats_raw_char/logdir/valid.2.scp --output_dir exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.2 --config conf/train_vits.yaml --feats_extract fbank --feats_extract_conf n_fft=1024 --feats_extract_conf hop_length=256 --feats_extract_conf win_length=null --feats_extract_conf fs=22050 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=22050 --pitch_extract_conf n_fft=1024 --pitch_extract_conf hop_length=256 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=22050 --energy_extract_conf n_fft=1024 --energy_extract_conf hop_length=256 --energy_extract_conf win_length=null --train_data_path_and_name_and_type dump/raw/train_nodev/utt2sid,sids,text_int --valid_data_path_and_name_and_type dump/raw/train_dev/utt2sid,sids,text_int --use_wandb true --wandb_project GROTTS --wandb_name VITS_lr_3.0e-4 --init_param downloads/espnet/kan-bayashi_ljspeech_vits/exp/tts_train_vits_raw_phn_tacotron_g2p_en_no_space/train.total_count.ave_10best.pth:tts:tts:tts.generator.text_encoder,tts.generator.posterior_encoder.input_conv --batch_size 40 --batch_bins 10000000 |
|
[wieling-3-a100] 2023-12-01 15:58:40,885 (gan_tts:293) INFO: Vocabulary size: 46 |
|
[wieling-3-a100] 2023-12-01 15:58:41,134 (encoder:174) INFO: encoder self-attention layer type = relative self-attention |
|
/data2/p280965/tts/espnet/tools/venv/lib/python3.9/site-packages/torch/nn/utils/weight_norm.py:30: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm. |
|
warnings.warn("torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.") |
|
/data2/p280965/tts/espnet/espnet2/gan_tts/vits/monotonic_align/__init__.py:19: UserWarning: Cython version is not available. Fallback to 'EXPERIMETAL' numba version. If you want to use the cython version, please build it as follows: `cd espnet2/gan_tts/vits/monotonic_align; python setup.py build_ext --inplace` |
|
warnings.warn( |
|
[wieling-3-a100] 2023-12-01 15:58:43,055 (abs_task:1268) INFO: pytorch.version=2.1.0+cu121, cuda.available=True, cudnn.version=8902, cudnn.benchmark=False, cudnn.deterministic=False |
|
[wieling-3-a100] 2023-12-01 15:58:43,087 (abs_task:1269) INFO: Model structure: |
|
ESPnetGANTTSModel( |
|
(feats_extract): LogMelFbank( |
|
(stft): Stft(n_fft=1024, win_length=1024, hop_length=256, center=True, normalized=False, onesided=True) |
|
(logmel): LogMel(sr=22050, n_fft=1024, n_mels=80, fmin=80, fmax=7600, htk=False) |
|
) |
|
(tts): VITS( |
|
(generator): VITSGenerator( |
|
(text_encoder): TextEncoder( |
|
(emb): Embedding(46, 192) |
|
(encoder): Encoder( |
|
(embed): Sequential( |
|
(0): RelPositionalEncoding( |
|
(dropout): Dropout(p=0.0, inplace=False) |
|
) |
|
) |
|
(encoders): MultiSequential( |
|
(0): EncoderLayer( |
|
(self_attn): RelPositionMultiHeadedAttention( |
|
(linear_q): Linear(in_features=192, out_features=192, bias=True) |
|
(linear_k): Linear(in_features=192, out_features=192, bias=True) |
|
(linear_v): Linear(in_features=192, out_features=192, bias=True) |
|
(linear_out): Linear(in_features=192, out_features=192, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(linear_pos): Linear(in_features=192, out_features=192, bias=False) |
|
) |
|
(feed_forward): MultiLayeredConv1d( |
|
(w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,)) |
|
(w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,)) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(feed_forward_macaron): MultiLayeredConv1d( |
|
(w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,)) |
|
(w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,)) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(norm_ff): LayerNorm((192,), eps=1e-12, elementwise_affine=True) |
|
(norm_mha): LayerNorm((192,), eps=1e-12, elementwise_affine=True) |
|
(norm_ff_macaron): LayerNorm((192,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(1): EncoderLayer( |
|
(self_attn): RelPositionMultiHeadedAttention( |
|
(linear_q): Linear(in_features=192, out_features=192, bias=True) |
|
(linear_k): Linear(in_features=192, out_features=192, bias=True) |
|
(linear_v): Linear(in_features=192, out_features=192, bias=True) |
|
(linear_out): Linear(in_features=192, out_features=192, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(linear_pos): Linear(in_features=192, out_features=192, bias=False) |
|
) |
|
(feed_forward): MultiLayeredConv1d( |
|
(w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,)) |
|
(w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,)) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(feed_forward_macaron): MultiLayeredConv1d( |
|
(w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,)) |
|
(w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,)) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(norm_ff): LayerNorm((192,), eps=1e-12, elementwise_affine=True) |
|
(norm_mha): LayerNorm((192,), eps=1e-12, elementwise_affine=True) |
|
(norm_ff_macaron): LayerNorm((192,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(2): EncoderLayer( |
|
(self_attn): RelPositionMultiHeadedAttention( |
|
(linear_q): Linear(in_features=192, out_features=192, bias=True) |
|
(linear_k): Linear(in_features=192, out_features=192, bias=True) |
|
(linear_v): Linear(in_features=192, out_features=192, bias=True) |
|
(linear_out): Linear(in_features=192, out_features=192, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(linear_pos): Linear(in_features=192, out_features=192, bias=False) |
|
) |
|
(feed_forward): MultiLayeredConv1d( |
|
(w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,)) |
|
(w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,)) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(feed_forward_macaron): MultiLayeredConv1d( |
|
(w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,)) |
|
(w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,)) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(norm_ff): LayerNorm((192,), eps=1e-12, elementwise_affine=True) |
|
(norm_mha): LayerNorm((192,), eps=1e-12, elementwise_affine=True) |
|
(norm_ff_macaron): LayerNorm((192,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(3): EncoderLayer( |
|
(self_attn): RelPositionMultiHeadedAttention( |
|
(linear_q): Linear(in_features=192, out_features=192, bias=True) |
|
(linear_k): Linear(in_features=192, out_features=192, bias=True) |
|
(linear_v): Linear(in_features=192, out_features=192, bias=True) |
|
(linear_out): Linear(in_features=192, out_features=192, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(linear_pos): Linear(in_features=192, out_features=192, bias=False) |
|
) |
|
(feed_forward): MultiLayeredConv1d( |
|
(w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,)) |
|
(w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,)) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(feed_forward_macaron): MultiLayeredConv1d( |
|
(w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,)) |
|
(w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,)) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(norm_ff): LayerNorm((192,), eps=1e-12, elementwise_affine=True) |
|
(norm_mha): LayerNorm((192,), eps=1e-12, elementwise_affine=True) |
|
(norm_ff_macaron): LayerNorm((192,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(4): EncoderLayer( |
|
(self_attn): RelPositionMultiHeadedAttention( |
|
(linear_q): Linear(in_features=192, out_features=192, bias=True) |
|
(linear_k): Linear(in_features=192, out_features=192, bias=True) |
|
(linear_v): Linear(in_features=192, out_features=192, bias=True) |
|
(linear_out): Linear(in_features=192, out_features=192, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(linear_pos): Linear(in_features=192, out_features=192, bias=False) |
|
) |
|
(feed_forward): MultiLayeredConv1d( |
|
(w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,)) |
|
(w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,)) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(feed_forward_macaron): MultiLayeredConv1d( |
|
(w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,)) |
|
(w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,)) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(norm_ff): LayerNorm((192,), eps=1e-12, elementwise_affine=True) |
|
(norm_mha): LayerNorm((192,), eps=1e-12, elementwise_affine=True) |
|
(norm_ff_macaron): LayerNorm((192,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(5): EncoderLayer( |
|
(self_attn): RelPositionMultiHeadedAttention( |
|
(linear_q): Linear(in_features=192, out_features=192, bias=True) |
|
(linear_k): Linear(in_features=192, out_features=192, bias=True) |
|
(linear_v): Linear(in_features=192, out_features=192, bias=True) |
|
(linear_out): Linear(in_features=192, out_features=192, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(linear_pos): Linear(in_features=192, out_features=192, bias=False) |
|
) |
|
(feed_forward): MultiLayeredConv1d( |
|
(w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,)) |
|
(w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,)) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(feed_forward_macaron): MultiLayeredConv1d( |
|
(w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,)) |
|
(w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,)) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(norm_ff): LayerNorm((192,), eps=1e-12, elementwise_affine=True) |
|
(norm_mha): LayerNorm((192,), eps=1e-12, elementwise_affine=True) |
|
(norm_ff_macaron): LayerNorm((192,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(after_norm): LayerNorm((192,), eps=1e-12, elementwise_affine=True) |
|
) |
|
(proj): Conv1d(192, 384, kernel_size=(1,), stride=(1,)) |
|
) |
|
(decoder): HiFiGANGenerator( |
|
(input_conv): Conv1d(192, 512, kernel_size=(7,), stride=(1,), padding=(3,)) |
|
(upsamples): ModuleList( |
|
(0): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): ConvTranspose1d(512, 256, kernel_size=(16,), stride=(8,), padding=(4,)) |
|
) |
|
(1): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): ConvTranspose1d(256, 128, kernel_size=(16,), stride=(8,), padding=(4,)) |
|
) |
|
(2): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): ConvTranspose1d(128, 64, kernel_size=(4,), stride=(2,), padding=(1,)) |
|
) |
|
(3): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): ConvTranspose1d(64, 32, kernel_size=(4,), stride=(2,), padding=(1,)) |
|
) |
|
) |
|
(blocks): ModuleList( |
|
(0): ResidualBlock( |
|
(convs1): ModuleList( |
|
(0): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,)) |
|
) |
|
(1): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,)) |
|
) |
|
(2): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,)) |
|
) |
|
) |
|
(convs2): ModuleList( |
|
(0-2): 3 x Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,)) |
|
) |
|
) |
|
) |
|
(1): ResidualBlock( |
|
(convs1): ModuleList( |
|
(0): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(3,)) |
|
) |
|
(1): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,)) |
|
) |
|
(2): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,)) |
|
) |
|
) |
|
(convs2): ModuleList( |
|
(0-2): 3 x Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(3,)) |
|
) |
|
) |
|
) |
|
(2): ResidualBlock( |
|
(convs1): ModuleList( |
|
(0): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(5,)) |
|
) |
|
(1): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,)) |
|
) |
|
(2): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,)) |
|
) |
|
) |
|
(convs2): ModuleList( |
|
(0-2): 3 x Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(5,)) |
|
) |
|
) |
|
) |
|
(3): ResidualBlock( |
|
(convs1): ModuleList( |
|
(0): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,)) |
|
) |
|
(1): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,)) |
|
) |
|
(2): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,)) |
|
) |
|
) |
|
(convs2): ModuleList( |
|
(0-2): 3 x Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,)) |
|
) |
|
) |
|
) |
|
(4): ResidualBlock( |
|
(convs1): ModuleList( |
|
(0): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(3,)) |
|
) |
|
(1): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,)) |
|
) |
|
(2): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,)) |
|
) |
|
) |
|
(convs2): ModuleList( |
|
(0-2): 3 x Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(3,)) |
|
) |
|
) |
|
) |
|
(5): ResidualBlock( |
|
(convs1): ModuleList( |
|
(0): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(5,)) |
|
) |
|
(1): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,)) |
|
) |
|
(2): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,)) |
|
) |
|
) |
|
(convs2): ModuleList( |
|
(0-2): 3 x Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(5,)) |
|
) |
|
) |
|
) |
|
(6): ResidualBlock( |
|
(convs1): ModuleList( |
|
(0): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,)) |
|
) |
|
(1): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,)) |
|
) |
|
(2): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,)) |
|
) |
|
) |
|
(convs2): ModuleList( |
|
(0-2): 3 x Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,)) |
|
) |
|
) |
|
) |
|
(7): ResidualBlock( |
|
(convs1): ModuleList( |
|
(0): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(3,)) |
|
) |
|
(1): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,)) |
|
) |
|
(2): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,)) |
|
) |
|
) |
|
(convs2): ModuleList( |
|
(0-2): 3 x Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(3,)) |
|
) |
|
) |
|
) |
|
(8): ResidualBlock( |
|
(convs1): ModuleList( |
|
(0): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(5,)) |
|
) |
|
(1): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,)) |
|
) |
|
(2): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,)) |
|
) |
|
) |
|
(convs2): ModuleList( |
|
(0-2): 3 x Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(5,)) |
|
) |
|
) |
|
) |
|
(9): ResidualBlock( |
|
(convs1): ModuleList( |
|
(0): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,)) |
|
) |
|
(1): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,)) |
|
) |
|
(2): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,)) |
|
) |
|
) |
|
(convs2): ModuleList( |
|
(0-2): 3 x Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,)) |
|
) |
|
) |
|
) |
|
(10): ResidualBlock( |
|
(convs1): ModuleList( |
|
(0): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(3,)) |
|
) |
|
(1): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,)) |
|
) |
|
(2): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,)) |
|
) |
|
) |
|
(convs2): ModuleList( |
|
(0-2): 3 x Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(3,)) |
|
) |
|
) |
|
) |
|
(11): ResidualBlock( |
|
(convs1): ModuleList( |
|
(0): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(5,)) |
|
) |
|
(1): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,)) |
|
) |
|
(2): Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,)) |
|
) |
|
) |
|
(convs2): ModuleList( |
|
(0-2): 3 x Sequential( |
|
(0): LeakyReLU(negative_slope=0.1) |
|
(1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(5,)) |
|
) |
|
) |
|
) |
|
) |
|
(output_conv): Sequential( |
|
(0): LeakyReLU(negative_slope=0.01) |
|
(1): Conv1d(32, 1, kernel_size=(7,), stride=(1,), padding=(3,)) |
|
(2): Tanh() |
|
) |
|
(global_conv): Conv1d(256, 512, kernel_size=(1,), stride=(1,)) |
|
) |
|
(posterior_encoder): PosteriorEncoder( |
|
(input_conv): Conv1d(80, 192, kernel_size=(1,), stride=(1,)) |
|
(encoder): WaveNet( |
|
(conv_layers): ModuleList( |
|
(0-15): 16 x ResidualBlock( |
|
(conv): Conv1d(192, 384, kernel_size=(5,), stride=(1,), padding=(2,)) |
|
(conv1x1_glo): Conv1d1x1(256, 384, kernel_size=(1,), stride=(1,), bias=False) |
|
(conv1x1_out): Conv1d1x1(192, 384, kernel_size=(1,), stride=(1,)) |
|
) |
|
) |
|
) |
|
(proj): Conv1d(192, 384, kernel_size=(1,), stride=(1,)) |
|
) |
|
(flow): ResidualAffineCouplingBlock( |
|
(flows): ModuleList( |
|
(0): ResidualAffineCouplingLayer( |
|
(input_conv): Conv1d(96, 192, kernel_size=(1,), stride=(1,)) |
|
(encoder): WaveNet( |
|
(conv_layers): ModuleList( |
|
(0-3): 4 x ResidualBlock( |
|
(conv): Conv1d(192, 384, kernel_size=(5,), stride=(1,), padding=(2,)) |
|
(conv1x1_glo): Conv1d1x1(256, 384, kernel_size=(1,), stride=(1,), bias=False) |
|
(conv1x1_out): Conv1d1x1(192, 384, kernel_size=(1,), stride=(1,)) |
|
) |
|
) |
|
) |
|
(proj): Conv1d(192, 96, kernel_size=(1,), stride=(1,)) |
|
) |
|
(1): FlipFlow() |
|
(2): ResidualAffineCouplingLayer( |
|
(input_conv): Conv1d(96, 192, kernel_size=(1,), stride=(1,)) |
|
(encoder): WaveNet( |
|
(conv_layers): ModuleList( |
|
(0-3): 4 x ResidualBlock( |
|
(conv): Conv1d(192, 384, kernel_size=(5,), stride=(1,), padding=(2,)) |
|
(conv1x1_glo): Conv1d1x1(256, 384, kernel_size=(1,), stride=(1,), bias=False) |
|
(conv1x1_out): Conv1d1x1(192, 384, kernel_size=(1,), stride=(1,)) |
|
) |
|
) |
|
) |
|
(proj): Conv1d(192, 96, kernel_size=(1,), stride=(1,)) |
|
) |
|
(3): FlipFlow() |
|
(4): ResidualAffineCouplingLayer( |
|
(input_conv): Conv1d(96, 192, kernel_size=(1,), stride=(1,)) |
|
(encoder): WaveNet( |
|
(conv_layers): ModuleList( |
|
(0-3): 4 x ResidualBlock( |
|
(conv): Conv1d(192, 384, kernel_size=(5,), stride=(1,), padding=(2,)) |
|
(conv1x1_glo): Conv1d1x1(256, 384, kernel_size=(1,), stride=(1,), bias=False) |
|
(conv1x1_out): Conv1d1x1(192, 384, kernel_size=(1,), stride=(1,)) |
|
) |
|
) |
|
) |
|
(proj): Conv1d(192, 96, kernel_size=(1,), stride=(1,)) |
|
) |
|
(5): FlipFlow() |
|
(6): ResidualAffineCouplingLayer( |
|
(input_conv): Conv1d(96, 192, kernel_size=(1,), stride=(1,)) |
|
(encoder): WaveNet( |
|
(conv_layers): ModuleList( |
|
(0-3): 4 x ResidualBlock( |
|
(conv): Conv1d(192, 384, kernel_size=(5,), stride=(1,), padding=(2,)) |
|
(conv1x1_glo): Conv1d1x1(256, 384, kernel_size=(1,), stride=(1,), bias=False) |
|
(conv1x1_out): Conv1d1x1(192, 384, kernel_size=(1,), stride=(1,)) |
|
) |
|
) |
|
) |
|
(proj): Conv1d(192, 96, kernel_size=(1,), stride=(1,)) |
|
) |
|
(7): FlipFlow() |
|
) |
|
) |
|
(duration_predictor): StochasticDurationPredictor( |
|
(pre): Conv1d(192, 192, kernel_size=(1,), stride=(1,)) |
|
(dds): DilatedDepthSeparableConv( |
|
(convs): ModuleList( |
|
(0): Sequential( |
|
(0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192) |
|
(1): Transpose() |
|
(2): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(3): Transpose() |
|
(4): GELU(approximate='none') |
|
(5): Conv1d(192, 192, kernel_size=(1,), stride=(1,)) |
|
(6): Transpose() |
|
(7): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(8): Transpose() |
|
(9): GELU(approximate='none') |
|
(10): Dropout(p=0.5, inplace=False) |
|
) |
|
(1): Sequential( |
|
(0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192) |
|
(1): Transpose() |
|
(2): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(3): Transpose() |
|
(4): GELU(approximate='none') |
|
(5): Conv1d(192, 192, kernel_size=(1,), stride=(1,)) |
|
(6): Transpose() |
|
(7): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(8): Transpose() |
|
(9): GELU(approximate='none') |
|
(10): Dropout(p=0.5, inplace=False) |
|
) |
|
(2): Sequential( |
|
(0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192) |
|
(1): Transpose() |
|
(2): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(3): Transpose() |
|
(4): GELU(approximate='none') |
|
(5): Conv1d(192, 192, kernel_size=(1,), stride=(1,)) |
|
(6): Transpose() |
|
(7): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(8): Transpose() |
|
(9): GELU(approximate='none') |
|
(10): Dropout(p=0.5, inplace=False) |
|
) |
|
) |
|
) |
|
(proj): Conv1d(192, 192, kernel_size=(1,), stride=(1,)) |
|
(log_flow): LogFlow() |
|
(flows): ModuleList( |
|
(0): ElementwiseAffineFlow() |
|
(1): ConvFlow( |
|
(input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,)) |
|
(dds_conv): DilatedDepthSeparableConv( |
|
(convs): ModuleList( |
|
(0): Sequential( |
|
(0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192) |
|
(1): Transpose() |
|
(2): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(3): Transpose() |
|
(4): GELU(approximate='none') |
|
(5): Conv1d(192, 192, kernel_size=(1,), stride=(1,)) |
|
(6): Transpose() |
|
(7): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(8): Transpose() |
|
(9): GELU(approximate='none') |
|
(10): Dropout(p=0.0, inplace=False) |
|
) |
|
(1): Sequential( |
|
(0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192) |
|
(1): Transpose() |
|
(2): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(3): Transpose() |
|
(4): GELU(approximate='none') |
|
(5): Conv1d(192, 192, kernel_size=(1,), stride=(1,)) |
|
(6): Transpose() |
|
(7): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(8): Transpose() |
|
(9): GELU(approximate='none') |
|
(10): Dropout(p=0.0, inplace=False) |
|
) |
|
(2): Sequential( |
|
(0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192) |
|
(1): Transpose() |
|
(2): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(3): Transpose() |
|
(4): GELU(approximate='none') |
|
(5): Conv1d(192, 192, kernel_size=(1,), stride=(1,)) |
|
(6): Transpose() |
|
(7): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(8): Transpose() |
|
(9): GELU(approximate='none') |
|
(10): Dropout(p=0.0, inplace=False) |
|
) |
|
) |
|
) |
|
(proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,)) |
|
) |
|
(2): FlipFlow() |
|
(3): ConvFlow( |
|
(input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,)) |
|
(dds_conv): DilatedDepthSeparableConv( |
|
(convs): ModuleList( |
|
(0): Sequential( |
|
(0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192) |
|
(1): Transpose() |
|
(2): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(3): Transpose() |
|
(4): GELU(approximate='none') |
|
(5): Conv1d(192, 192, kernel_size=(1,), stride=(1,)) |
|
(6): Transpose() |
|
(7): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(8): Transpose() |
|
(9): GELU(approximate='none') |
|
(10): Dropout(p=0.0, inplace=False) |
|
) |
|
(1): Sequential( |
|
(0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192) |
|
(1): Transpose() |
|
(2): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(3): Transpose() |
|
(4): GELU(approximate='none') |
|
(5): Conv1d(192, 192, kernel_size=(1,), stride=(1,)) |
|
(6): Transpose() |
|
(7): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(8): Transpose() |
|
(9): GELU(approximate='none') |
|
(10): Dropout(p=0.0, inplace=False) |
|
) |
|
(2): Sequential( |
|
(0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192) |
|
(1): Transpose() |
|
(2): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(3): Transpose() |
|
(4): GELU(approximate='none') |
|
(5): Conv1d(192, 192, kernel_size=(1,), stride=(1,)) |
|
(6): Transpose() |
|
(7): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(8): Transpose() |
|
(9): GELU(approximate='none') |
|
(10): Dropout(p=0.0, inplace=False) |
|
) |
|
) |
|
) |
|
(proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,)) |
|
) |
|
(4): FlipFlow() |
|
(5): ConvFlow( |
|
(input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,)) |
|
(dds_conv): DilatedDepthSeparableConv( |
|
(convs): ModuleList( |
|
(0): Sequential( |
|
(0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192) |
|
(1): Transpose() |
|
(2): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(3): Transpose() |
|
(4): GELU(approximate='none') |
|
(5): Conv1d(192, 192, kernel_size=(1,), stride=(1,)) |
|
(6): Transpose() |
|
(7): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(8): Transpose() |
|
(9): GELU(approximate='none') |
|
(10): Dropout(p=0.0, inplace=False) |
|
) |
|
(1): Sequential( |
|
(0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192) |
|
(1): Transpose() |
|
(2): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(3): Transpose() |
|
(4): GELU(approximate='none') |
|
(5): Conv1d(192, 192, kernel_size=(1,), stride=(1,)) |
|
(6): Transpose() |
|
(7): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(8): Transpose() |
|
(9): GELU(approximate='none') |
|
(10): Dropout(p=0.0, inplace=False) |
|
) |
|
(2): Sequential( |
|
(0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192) |
|
(1): Transpose() |
|
(2): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(3): Transpose() |
|
(4): GELU(approximate='none') |
|
(5): Conv1d(192, 192, kernel_size=(1,), stride=(1,)) |
|
(6): Transpose() |
|
(7): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(8): Transpose() |
|
(9): GELU(approximate='none') |
|
(10): Dropout(p=0.0, inplace=False) |
|
) |
|
) |
|
) |
|
(proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,)) |
|
) |
|
(6): FlipFlow() |
|
(7): ConvFlow( |
|
(input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,)) |
|
(dds_conv): DilatedDepthSeparableConv( |
|
(convs): ModuleList( |
|
(0): Sequential( |
|
(0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192) |
|
(1): Transpose() |
|
(2): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(3): Transpose() |
|
(4): GELU(approximate='none') |
|
(5): Conv1d(192, 192, kernel_size=(1,), stride=(1,)) |
|
(6): Transpose() |
|
(7): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(8): Transpose() |
|
(9): GELU(approximate='none') |
|
(10): Dropout(p=0.0, inplace=False) |
|
) |
|
(1): Sequential( |
|
(0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192) |
|
(1): Transpose() |
|
(2): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(3): Transpose() |
|
(4): GELU(approximate='none') |
|
(5): Conv1d(192, 192, kernel_size=(1,), stride=(1,)) |
|
(6): Transpose() |
|
(7): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(8): Transpose() |
|
(9): GELU(approximate='none') |
|
(10): Dropout(p=0.0, inplace=False) |
|
) |
|
(2): Sequential( |
|
(0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192) |
|
(1): Transpose() |
|
(2): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(3): Transpose() |
|
(4): GELU(approximate='none') |
|
(5): Conv1d(192, 192, kernel_size=(1,), stride=(1,)) |
|
(6): Transpose() |
|
(7): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(8): Transpose() |
|
(9): GELU(approximate='none') |
|
(10): Dropout(p=0.0, inplace=False) |
|
) |
|
) |
|
) |
|
(proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,)) |
|
) |
|
(8): FlipFlow() |
|
) |
|
(post_pre): Conv1d(1, 192, kernel_size=(1,), stride=(1,)) |
|
(post_dds): DilatedDepthSeparableConv( |
|
(convs): ModuleList( |
|
(0): Sequential( |
|
(0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192) |
|
(1): Transpose() |
|
(2): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(3): Transpose() |
|
(4): GELU(approximate='none') |
|
(5): Conv1d(192, 192, kernel_size=(1,), stride=(1,)) |
|
(6): Transpose() |
|
(7): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(8): Transpose() |
|
(9): GELU(approximate='none') |
|
(10): Dropout(p=0.5, inplace=False) |
|
) |
|
(1): Sequential( |
|
(0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192) |
|
(1): Transpose() |
|
(2): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(3): Transpose() |
|
(4): GELU(approximate='none') |
|
(5): Conv1d(192, 192, kernel_size=(1,), stride=(1,)) |
|
(6): Transpose() |
|
(7): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(8): Transpose() |
|
(9): GELU(approximate='none') |
|
(10): Dropout(p=0.5, inplace=False) |
|
) |
|
(2): Sequential( |
|
(0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192) |
|
(1): Transpose() |
|
(2): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(3): Transpose() |
|
(4): GELU(approximate='none') |
|
(5): Conv1d(192, 192, kernel_size=(1,), stride=(1,)) |
|
(6): Transpose() |
|
(7): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(8): Transpose() |
|
(9): GELU(approximate='none') |
|
(10): Dropout(p=0.5, inplace=False) |
|
) |
|
) |
|
) |
|
(post_proj): Conv1d(192, 192, kernel_size=(1,), stride=(1,)) |
|
(post_flows): ModuleList( |
|
(0): ElementwiseAffineFlow() |
|
(1): ConvFlow( |
|
(input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,)) |
|
(dds_conv): DilatedDepthSeparableConv( |
|
(convs): ModuleList( |
|
(0): Sequential( |
|
(0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192) |
|
(1): Transpose() |
|
(2): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(3): Transpose() |
|
(4): GELU(approximate='none') |
|
(5): Conv1d(192, 192, kernel_size=(1,), stride=(1,)) |
|
(6): Transpose() |
|
(7): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(8): Transpose() |
|
(9): GELU(approximate='none') |
|
(10): Dropout(p=0.0, inplace=False) |
|
) |
|
(1): Sequential( |
|
(0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192) |
|
(1): Transpose() |
|
(2): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(3): Transpose() |
|
(4): GELU(approximate='none') |
|
(5): Conv1d(192, 192, kernel_size=(1,), stride=(1,)) |
|
(6): Transpose() |
|
(7): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(8): Transpose() |
|
(9): GELU(approximate='none') |
|
(10): Dropout(p=0.0, inplace=False) |
|
) |
|
(2): Sequential( |
|
(0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192) |
|
(1): Transpose() |
|
(2): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(3): Transpose() |
|
(4): GELU(approximate='none') |
|
(5): Conv1d(192, 192, kernel_size=(1,), stride=(1,)) |
|
(6): Transpose() |
|
(7): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(8): Transpose() |
|
(9): GELU(approximate='none') |
|
(10): Dropout(p=0.0, inplace=False) |
|
) |
|
) |
|
) |
|
(proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,)) |
|
) |
|
(2): FlipFlow() |
|
(3): ConvFlow( |
|
(input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,)) |
|
(dds_conv): DilatedDepthSeparableConv( |
|
(convs): ModuleList( |
|
(0): Sequential( |
|
(0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192) |
|
(1): Transpose() |
|
(2): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(3): Transpose() |
|
(4): GELU(approximate='none') |
|
(5): Conv1d(192, 192, kernel_size=(1,), stride=(1,)) |
|
(6): Transpose() |
|
(7): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(8): Transpose() |
|
(9): GELU(approximate='none') |
|
(10): Dropout(p=0.0, inplace=False) |
|
) |
|
(1): Sequential( |
|
(0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192) |
|
(1): Transpose() |
|
(2): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(3): Transpose() |
|
(4): GELU(approximate='none') |
|
(5): Conv1d(192, 192, kernel_size=(1,), stride=(1,)) |
|
(6): Transpose() |
|
(7): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(8): Transpose() |
|
(9): GELU(approximate='none') |
|
(10): Dropout(p=0.0, inplace=False) |
|
) |
|
(2): Sequential( |
|
(0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192) |
|
(1): Transpose() |
|
(2): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(3): Transpose() |
|
(4): GELU(approximate='none') |
|
(5): Conv1d(192, 192, kernel_size=(1,), stride=(1,)) |
|
(6): Transpose() |
|
(7): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(8): Transpose() |
|
(9): GELU(approximate='none') |
|
(10): Dropout(p=0.0, inplace=False) |
|
) |
|
) |
|
) |
|
(proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,)) |
|
) |
|
(4): FlipFlow() |
|
(5): ConvFlow( |
|
(input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,)) |
|
(dds_conv): DilatedDepthSeparableConv( |
|
(convs): ModuleList( |
|
(0): Sequential( |
|
(0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192) |
|
(1): Transpose() |
|
(2): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(3): Transpose() |
|
(4): GELU(approximate='none') |
|
(5): Conv1d(192, 192, kernel_size=(1,), stride=(1,)) |
|
(6): Transpose() |
|
(7): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(8): Transpose() |
|
(9): GELU(approximate='none') |
|
(10): Dropout(p=0.0, inplace=False) |
|
) |
|
(1): Sequential( |
|
(0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192) |
|
(1): Transpose() |
|
(2): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(3): Transpose() |
|
(4): GELU(approximate='none') |
|
(5): Conv1d(192, 192, kernel_size=(1,), stride=(1,)) |
|
(6): Transpose() |
|
(7): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(8): Transpose() |
|
(9): GELU(approximate='none') |
|
(10): Dropout(p=0.0, inplace=False) |
|
) |
|
(2): Sequential( |
|
(0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192) |
|
(1): Transpose() |
|
(2): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(3): Transpose() |
|
(4): GELU(approximate='none') |
|
(5): Conv1d(192, 192, kernel_size=(1,), stride=(1,)) |
|
(6): Transpose() |
|
(7): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(8): Transpose() |
|
(9): GELU(approximate='none') |
|
(10): Dropout(p=0.0, inplace=False) |
|
) |
|
) |
|
) |
|
(proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,)) |
|
) |
|
(6): FlipFlow() |
|
(7): ConvFlow( |
|
(input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,)) |
|
(dds_conv): DilatedDepthSeparableConv( |
|
(convs): ModuleList( |
|
(0): Sequential( |
|
(0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192) |
|
(1): Transpose() |
|
(2): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(3): Transpose() |
|
(4): GELU(approximate='none') |
|
(5): Conv1d(192, 192, kernel_size=(1,), stride=(1,)) |
|
(6): Transpose() |
|
(7): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(8): Transpose() |
|
(9): GELU(approximate='none') |
|
(10): Dropout(p=0.0, inplace=False) |
|
) |
|
(1): Sequential( |
|
(0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192) |
|
(1): Transpose() |
|
(2): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(3): Transpose() |
|
(4): GELU(approximate='none') |
|
(5): Conv1d(192, 192, kernel_size=(1,), stride=(1,)) |
|
(6): Transpose() |
|
(7): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(8): Transpose() |
|
(9): GELU(approximate='none') |
|
(10): Dropout(p=0.0, inplace=False) |
|
) |
|
(2): Sequential( |
|
(0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192) |
|
(1): Transpose() |
|
(2): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(3): Transpose() |
|
(4): GELU(approximate='none') |
|
(5): Conv1d(192, 192, kernel_size=(1,), stride=(1,)) |
|
(6): Transpose() |
|
(7): LayerNorm((192,), eps=1e-05, elementwise_affine=True) |
|
(8): Transpose() |
|
(9): GELU(approximate='none') |
|
(10): Dropout(p=0.0, inplace=False) |
|
) |
|
) |
|
) |
|
(proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,)) |
|
) |
|
(8): FlipFlow() |
|
) |
|
(global_conv): Conv1d(256, 192, kernel_size=(1,), stride=(1,)) |
|
) |
|
(global_emb): Embedding(4, 256) |
|
) |
|
(discriminator): HiFiGANMultiScaleMultiPeriodDiscriminator( |
|
(msd): HiFiGANMultiScaleDiscriminator( |
|
(discriminators): ModuleList( |
|
(0): HiFiGANScaleDiscriminator( |
|
(layers): ModuleList( |
|
(0): Sequential( |
|
(0): Conv1d(1, 128, kernel_size=(15,), stride=(1,), padding=(7,)) |
|
(1): LeakyReLU(negative_slope=0.1) |
|
) |
|
(1): Sequential( |
|
(0): Conv1d(128, 128, kernel_size=(41,), stride=(2,), padding=(20,), groups=4) |
|
(1): LeakyReLU(negative_slope=0.1) |
|
) |
|
(2): Sequential( |
|
(0): Conv1d(128, 256, kernel_size=(41,), stride=(2,), padding=(20,), groups=16) |
|
(1): LeakyReLU(negative_slope=0.1) |
|
) |
|
(3): Sequential( |
|
(0): Conv1d(256, 512, kernel_size=(41,), stride=(4,), padding=(20,), groups=16) |
|
(1): LeakyReLU(negative_slope=0.1) |
|
) |
|
(4): Sequential( |
|
(0): Conv1d(512, 1024, kernel_size=(41,), stride=(4,), padding=(20,), groups=16) |
|
(1): LeakyReLU(negative_slope=0.1) |
|
) |
|
(5): Sequential( |
|
(0): Conv1d(1024, 1024, kernel_size=(41,), stride=(1,), padding=(20,), groups=16) |
|
(1): LeakyReLU(negative_slope=0.1) |
|
) |
|
(6): Sequential( |
|
(0): Conv1d(1024, 1024, kernel_size=(5,), stride=(1,), padding=(2,)) |
|
(1): LeakyReLU(negative_slope=0.1) |
|
) |
|
(7): Conv1d(1024, 1, kernel_size=(3,), stride=(1,), padding=(1,)) |
|
) |
|
) |
|
) |
|
) |
|
(mpd): HiFiGANMultiPeriodDiscriminator( |
|
(discriminators): ModuleList( |
|
(0-4): 5 x HiFiGANPeriodDiscriminator( |
|
(convs): ModuleList( |
|
(0): Sequential( |
|
(0): Conv2d(1, 32, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0)) |
|
(1): LeakyReLU(negative_slope=0.1) |
|
) |
|
(1): Sequential( |
|
(0): Conv2d(32, 128, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0)) |
|
(1): LeakyReLU(negative_slope=0.1) |
|
) |
|
(2): Sequential( |
|
(0): Conv2d(128, 512, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0)) |
|
(1): LeakyReLU(negative_slope=0.1) |
|
) |
|
(3): Sequential( |
|
(0): Conv2d(512, 1024, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0)) |
|
(1): LeakyReLU(negative_slope=0.1) |
|
) |
|
(4): Sequential( |
|
(0): Conv2d(1024, 1024, kernel_size=(5, 1), stride=(1, 1), padding=(2, 0)) |
|
(1): LeakyReLU(negative_slope=0.1) |
|
) |
|
) |
|
(output_conv): Conv2d(1024, 1, kernel_size=(2, 1), stride=(1, 1), padding=(1, 0)) |
|
) |
|
) |
|
) |
|
) |
|
(generator_adv_loss): GeneratorAdversarialLoss() |
|
(discriminator_adv_loss): DiscriminatorAdversarialLoss() |
|
(feat_match_loss): FeatureMatchLoss() |
|
(mel_loss): MelSpectrogramLoss( |
|
(wav_to_mel): LogMelFbank( |
|
(stft): Stft(n_fft=1024, win_length=1024, hop_length=256, center=True, normalized=False, onesided=True) |
|
(logmel): LogMel(sr=22050, n_fft=1024, n_mels=80, fmin=0, fmax=11025.0, htk=False) |
|
) |
|
) |
|
(kl_loss): KLDivergenceLoss() |
|
) |
|
) |
|
|
|
Model summary: |
|
Class Name: ESPnetGANTTSModel |
|
Total Number of model parameters: 96.24 M |
|
Number of trainable parameters: 96.24 M (100.0%) |
|
Size: 384.96 MB |
|
Type: torch.float32 |
|
[wieling-3-a100] 2023-12-01 15:58:43,087 (abs_task:1272) INFO: Optimizer: |
|
AdamW ( |
|
Parameter Group 0 |
|
amsgrad: False |
|
betas: [0.8, 0.99] |
|
capturable: False |
|
differentiable: False |
|
eps: 1e-09 |
|
foreach: None |
|
fused: None |
|
initial_lr: 0.0003 |
|
lr: 0.0003 |
|
maximize: False |
|
weight_decay: 0.0 |
|
) |
|
[wieling-3-a100] 2023-12-01 15:58:43,087 (abs_task:1273) INFO: Scheduler: <torch.optim.lr_scheduler.ExponentialLR object at 0x7f51a53248b0> |
|
[wieling-3-a100] 2023-12-01 15:58:43,087 (abs_task:1272) INFO: Optimizer2: |
|
AdamW ( |
|
Parameter Group 0 |
|
amsgrad: False |
|
betas: [0.8, 0.99] |
|
capturable: False |
|
differentiable: False |
|
eps: 1e-09 |
|
foreach: None |
|
fused: None |
|
initial_lr: 0.0003 |
|
lr: 0.0003 |
|
maximize: False |
|
weight_decay: 0.0 |
|
) |
|
[wieling-3-a100] 2023-12-01 15:58:43,087 (abs_task:1273) INFO: Scheduler2: <torch.optim.lr_scheduler.ExponentialLR object at 0x7f51a5324850> |
|
[wieling-3-a100] 2023-12-01 15:58:43,087 (abs_task:1282) INFO: Saving the configuration in exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.2/config.yaml |
|
[wieling-3-a100] 2023-12-01 15:58:43,141 (abs_task:1293) INFO: Namespace(config='conf/train_vits.yaml', print_config=False, log_level='INFO', drop_last_iter=False, dry_run=False, iterator_type='sequence', valid_iterator_type=None, output_dir='exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.2', ngpu=0, seed=67823, num_workers=4, num_att_plot=3, dist_backend='nccl', dist_init_method='env://', dist_world_size=None, dist_rank=None, local_rank=None, dist_master_addr=None, dist_master_port=None, dist_launcher=None, multiprocessing_distributed=False, unused_parameters=True, sharded_ddp=False, cudnn_enabled=True, cudnn_benchmark=False, cudnn_deterministic=False, collect_stats=True, write_collected_feats=False, max_epoch=1000, patience=None, val_scheduler_criterion=('valid', 'loss'), early_stopping_criterion=('valid', 'loss', 'min'), best_model_criterion=[['train', 'total_count', 'max']], keep_nbest_models=10, nbest_averaging_interval=0, grad_clip=-1, grad_clip_type=2.0, grad_noise=False, accum_grad=1, no_forward_run=False, resume=False, train_dtype='float32', use_amp=False, log_interval=50, use_matplotlib=True, use_tensorboard=True, create_graph_in_tensorboard=False, use_wandb=True, wandb_project='GROTTS', wandb_id=None, wandb_entity=None, wandb_name='VITS_lr_3.0e-4', wandb_model_log_interval=-1, detect_anomaly=False, use_lora=False, save_lora_only=True, lora_conf={}, pretrain_path=None, init_param=['downloads/espnet/kan-bayashi_ljspeech_vits/exp/tts_train_vits_raw_phn_tacotron_g2p_en_no_space/train.total_count.ave_10best.pth:tts:tts:tts.generator.text_encoder,tts.generator.posterior_encoder.input_conv'], ignore_init_mismatch=False, freeze_param=[], num_iters_per_epoch=1000, batch_size=40, valid_batch_size=None, batch_bins=10000000, valid_batch_bins=None, train_shape_file=['exp-vits-lr-3e-4/tts_stats_raw_char/logdir/train.2.scp'], valid_shape_file=['exp-vits-lr-3e-4/tts_stats_raw_char/logdir/valid.2.scp'], batch_type='numel', valid_batch_type=None, fold_length=[], sort_in_batch='descending', shuffle_within_batch=False, sort_batch='descending', multiple_iterator=False, chunk_length=500, chunk_shift_ratio=0.5, num_cache_chunks=1024, chunk_excluded_key_prefixes=[], chunk_default_fs=None, train_data_path_and_name_and_type=[('dump/raw/train_nodev/text', 'text', 'text'), ('dump/raw/train_nodev/wav.scp', 'speech', 'sound'), ('dump/raw/train_nodev/utt2sid', 'sids', 'text_int')], valid_data_path_and_name_and_type=[('dump/raw/train_dev/text', 'text', 'text'), ('dump/raw/train_dev/wav.scp', 'speech', 'sound'), ('dump/raw/train_dev/utt2sid', 'sids', 'text_int')], allow_variable_data_keys=False, max_cache_size=0.0, max_cache_fd=32, allow_multi_rates=False, valid_max_cache_size=None, exclude_weight_decay=False, exclude_weight_decay_conf={}, optim='adamw', optim_conf={'lr': 0.0003, 'betas': [0.8, 0.99], 'eps': 1e-09, 'weight_decay': 0.0}, scheduler='exponentiallr', scheduler_conf={'gamma': 0.999875}, optim2='adamw', optim2_conf={'lr': 0.0003, 'betas': [0.8, 0.99], 'eps': 1e-09, 'weight_decay': 0.0}, scheduler2='exponentiallr', scheduler2_conf={'gamma': 0.999875}, generator_first=False, token_list=['<blank>', '<unk>', '<space>', 'e', 'n', 'a', 'o', 't', 'i', 'r', 'd', 's', 'k', 'l', 'm', 'u', 'g', 'h', 'w', 'v', '.', 'z', 'b', 'p', ',', 'j', 'c', 'f', '‘', '’', ':', '?', 'ö', "'", '!', '-', ';', 'ò', 'è', 'ì', 'é', 'y', 'ë', 'x', 'q', '<sos/eos>'], odim=None, model_conf={}, use_preprocessor=True, token_type='char', bpemodel=None, non_linguistic_symbols=None, cleaner=None, g2p=None, feats_extract='fbank', feats_extract_conf={'n_fft': 1024, 'hop_length': 256, 'win_length': None, 'fs': 22050, 'fmin': 80, 'fmax': 7600, 'n_mels': 80}, normalize=None, normalize_conf={}, tts='vits', tts_conf={'generator_type': 'vits_generator', 'generator_params': {'hidden_channels': 192, 'spks': 4, 'global_channels': 256, 'segment_size': 32, 'text_encoder_attention_heads': 2, 'text_encoder_ffn_expand': 4, 'text_encoder_blocks': 6, 'text_encoder_positionwise_layer_type': 'conv1d', 'text_encoder_positionwise_conv_kernel_size': 3, 'text_encoder_positional_encoding_layer_type': 'rel_pos', 'text_encoder_self_attention_layer_type': 'rel_selfattn', 'text_encoder_activation_type': 'swish', 'text_encoder_normalize_before': True, 'text_encoder_dropout_rate': 0.1, 'text_encoder_positional_dropout_rate': 0.0, 'text_encoder_attention_dropout_rate': 0.1, 'use_macaron_style_in_text_encoder': True, 'use_conformer_conv_in_text_encoder': False, 'text_encoder_conformer_kernel_size': -1, 'decoder_kernel_size': 7, 'decoder_channels': 512, 'decoder_upsample_scales': [8, 8, 2, 2], 'decoder_upsample_kernel_sizes': [16, 16, 4, 4], 'decoder_resblock_kernel_sizes': [3, 7, 11], 'decoder_resblock_dilations': [[1, 3, 5], [1, 3, 5], [1, 3, 5]], 'use_weight_norm_in_decoder': True, 'posterior_encoder_kernel_size': 5, 'posterior_encoder_layers': 16, 'posterior_encoder_stacks': 1, 'posterior_encoder_base_dilation': 1, 'posterior_encoder_dropout_rate': 0.0, 'use_weight_norm_in_posterior_encoder': True, 'flow_flows': 4, 'flow_kernel_size': 5, 'flow_base_dilation': 1, 'flow_layers': 4, 'flow_dropout_rate': 0.0, 'use_weight_norm_in_flow': True, 'use_only_mean_in_flow': True, 'stochastic_duration_predictor_kernel_size': 3, 'stochastic_duration_predictor_dropout_rate': 0.5, 'stochastic_duration_predictor_flows': 4, 'stochastic_duration_predictor_dds_conv_layers': 3, 'vocabs': 46, 'aux_channels': 80}, 'discriminator_type': 'hifigan_multi_scale_multi_period_discriminator', 'discriminator_params': {'scales': 1, 'scale_downsample_pooling': 'AvgPool1d', 'scale_downsample_pooling_params': {'kernel_size': 4, 'stride': 2, 'padding': 2}, 'scale_discriminator_params': {'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [15, 41, 5, 3], 'channels': 128, 'max_downsample_channels': 1024, 'max_groups': 16, 'bias': True, 'downsample_scales': [2, 2, 4, 4, 1], 'nonlinear_activation': 'LeakyReLU', 'nonlinear_activation_params': {'negative_slope': 0.1}, 'use_weight_norm': False, 'use_spectral_norm': False}, 'follow_official_norm': False, 'periods': [2, 3, 5, 7, 11], 'period_discriminator_params': {'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'downsample_scales': [3, 3, 3, 3, 1], 'max_downsample_channels': 1024, 'bias': True, 'nonlinear_activation': 'LeakyReLU', 'nonlinear_activation_params': {'negative_slope': 0.1}, 'use_weight_norm': True, 'use_spectral_norm': False}}, 'generator_adv_loss_params': {'average_by_discriminators': False, 'loss_type': 'mse'}, 'discriminator_adv_loss_params': {'average_by_discriminators': False, 'loss_type': 'mse'}, 'feat_match_loss_params': {'average_by_discriminators': False, 'average_by_layers': False, 'include_final_outputs': True}, 'mel_loss_params': {'fs': 22050, 'n_fft': 1024, 'hop_length': 256, 'win_length': None, 'window': 'hann', 'n_mels': 80, 'fmin': 0, 'fmax': None, 'log_base': None}, 'lambda_adv': 1.0, 'lambda_mel': 45.0, 'lambda_feat_match': 2.0, 'lambda_dur': 1.0, 'lambda_kl': 1.0, 'sampling_rate': 22050, 'cache_generator_outputs': True}, pitch_extract=None, pitch_extract_conf={'fs': 22050, 'n_fft': 1024, 'hop_length': 256, 'f0max': 400, 'f0min': 80}, pitch_normalize=None, pitch_normalize_conf={}, energy_extract=None, energy_extract_conf={'fs': 22050, 'n_fft': 1024, 'hop_length': 256, 'win_length': None}, energy_normalize=None, energy_normalize_conf={}, required=['output_dir', 'token_list'], version='202310', distributed=False) |
|
# Accounting: time=17 threads=1 |
|
# Ended (code 0) at Fri Dec 1 15:58:51 UTC 2023, elapsed time 17 seconds |
|
|