bartelds commited on
Commit
eb64913
·
1 Parent(s): 9c87244

Delete exp-vits-lr-3e-4

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1.log +0 -1152
  2. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1/config.yaml +0 -383
  3. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1/train/batch_keys +0 -3
  4. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1/train/feats_lengths_stats.npz +0 -3
  5. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1/train/feats_stats.npz +0 -3
  6. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1/train/sids_shape +0 -249
  7. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1/train/speech_shape +0 -249
  8. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1/train/stats_keys +0 -2
  9. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1/train/text_shape +0 -249
  10. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1/valid/batch_keys +0 -3
  11. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1/valid/feats_lengths_stats.npz +0 -3
  12. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1/valid/feats_stats.npz +0 -3
  13. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1/valid/sids_shape +0 -5
  14. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1/valid/speech_shape +0 -5
  15. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1/valid/stats_keys +0 -2
  16. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1/valid/text_shape +0 -5
  17. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10.log +0 -1152
  18. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10/config.yaml +0 -383
  19. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10/train/batch_keys +0 -3
  20. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10/train/feats_lengths_stats.npz +0 -3
  21. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10/train/feats_stats.npz +0 -3
  22. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10/train/sids_shape +0 -249
  23. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10/train/speech_shape +0 -249
  24. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10/train/stats_keys +0 -2
  25. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10/train/text_shape +0 -249
  26. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10/valid/batch_keys +0 -3
  27. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10/valid/feats_lengths_stats.npz +0 -3
  28. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10/valid/feats_stats.npz +0 -3
  29. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10/valid/sids_shape +0 -5
  30. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10/valid/speech_shape +0 -5
  31. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10/valid/stats_keys +0 -2
  32. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10/valid/text_shape +0 -5
  33. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11.log +0 -1152
  34. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11/config.yaml +0 -383
  35. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11/train/batch_keys +0 -3
  36. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11/train/feats_lengths_stats.npz +0 -3
  37. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11/train/feats_stats.npz +0 -3
  38. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11/train/sids_shape +0 -249
  39. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11/train/speech_shape +0 -249
  40. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11/train/stats_keys +0 -2
  41. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11/train/text_shape +0 -249
  42. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11/valid/batch_keys +0 -3
  43. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11/valid/feats_lengths_stats.npz +0 -3
  44. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11/valid/feats_stats.npz +0 -3
  45. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11/valid/sids_shape +0 -5
  46. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11/valid/speech_shape +0 -5
  47. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11/valid/stats_keys +0 -2
  48. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11/valid/text_shape +0 -5
  49. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.12.log +0 -1152
  50. exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.12/config.yaml +0 -383
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1.log DELETED
@@ -1,1152 +0,0 @@
1
- # python3 -m espnet2.bin.gan_tts_train --collect_stats true --write_collected_feats false --use_preprocessor true --token_type char --token_list dump/token_list/char/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/train_nodev/text,text,text --train_data_path_and_name_and_type dump/raw/train_nodev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/train_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/train_dev/wav.scp,speech,sound --train_shape_file exp-vits-lr-3e-4/tts_stats_raw_char/logdir/train.1.scp --valid_shape_file exp-vits-lr-3e-4/tts_stats_raw_char/logdir/valid.1.scp --output_dir exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1 --config conf/train_vits.yaml --feats_extract fbank --feats_extract_conf n_fft=1024 --feats_extract_conf hop_length=256 --feats_extract_conf win_length=null --feats_extract_conf fs=22050 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=22050 --pitch_extract_conf n_fft=1024 --pitch_extract_conf hop_length=256 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=22050 --energy_extract_conf n_fft=1024 --energy_extract_conf hop_length=256 --energy_extract_conf win_length=null --train_data_path_and_name_and_type dump/raw/train_nodev/utt2sid,sids,text_int --valid_data_path_and_name_and_type dump/raw/train_dev/utt2sid,sids,text_int --use_wandb true --wandb_project GROTTS --wandb_name VITS_lr_3.0e-4 --init_param downloads/espnet/kan-bayashi_ljspeech_vits/exp/tts_train_vits_raw_phn_tacotron_g2p_en_no_space/train.total_count.ave_10best.pth:tts:tts:tts.generator.text_encoder,tts.generator.posterior_encoder.input_conv --batch_size 40 --batch_bins 10000000
2
- # Started at Fri Dec 1 15:58:34 UTC 2023
3
- #
4
- /data2/p280965/tts/espnet/tools/venv/bin/python3 /data2/p280965/tts/espnet/espnet2/bin/gan_tts_train.py --collect_stats true --write_collected_feats false --use_preprocessor true --token_type char --token_list dump/token_list/char/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/train_nodev/text,text,text --train_data_path_and_name_and_type dump/raw/train_nodev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/train_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/train_dev/wav.scp,speech,sound --train_shape_file exp-vits-lr-3e-4/tts_stats_raw_char/logdir/train.1.scp --valid_shape_file exp-vits-lr-3e-4/tts_stats_raw_char/logdir/valid.1.scp --output_dir exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1 --config conf/train_vits.yaml --feats_extract fbank --feats_extract_conf n_fft=1024 --feats_extract_conf hop_length=256 --feats_extract_conf win_length=null --feats_extract_conf fs=22050 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=22050 --pitch_extract_conf n_fft=1024 --pitch_extract_conf hop_length=256 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=22050 --energy_extract_conf n_fft=1024 --energy_extract_conf hop_length=256 --energy_extract_conf win_length=null --train_data_path_and_name_and_type dump/raw/train_nodev/utt2sid,sids,text_int --valid_data_path_and_name_and_type dump/raw/train_dev/utt2sid,sids,text_int --use_wandb true --wandb_project GROTTS --wandb_name VITS_lr_3.0e-4 --init_param downloads/espnet/kan-bayashi_ljspeech_vits/exp/tts_train_vits_raw_phn_tacotron_g2p_en_no_space/train.total_count.ave_10best.pth:tts:tts:tts.generator.text_encoder,tts.generator.posterior_encoder.input_conv --batch_size 40 --batch_bins 10000000
5
- [wieling-3-a100] 2023-12-01 15:58:40,398 (gan_tts:293) INFO: Vocabulary size: 46
6
- [wieling-3-a100] 2023-12-01 15:58:40,545 (encoder:174) INFO: encoder self-attention layer type = relative self-attention
7
- /data2/p280965/tts/espnet/tools/venv/lib/python3.9/site-packages/torch/nn/utils/weight_norm.py:30: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.
8
- warnings.warn("torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.")
9
- /data2/p280965/tts/espnet/espnet2/gan_tts/vits/monotonic_align/__init__.py:19: UserWarning: Cython version is not available. Fallback to 'EXPERIMETAL' numba version. If you want to use the cython version, please build it as follows: `cd espnet2/gan_tts/vits/monotonic_align; python setup.py build_ext --inplace`
10
- warnings.warn(
11
- [wieling-3-a100] 2023-12-01 15:58:41,774 (abs_task:1268) INFO: pytorch.version=2.1.0+cu121, cuda.available=True, cudnn.version=8902, cudnn.benchmark=False, cudnn.deterministic=False
12
- [wieling-3-a100] 2023-12-01 15:58:41,789 (abs_task:1269) INFO: Model structure:
13
- ESPnetGANTTSModel(
14
- (feats_extract): LogMelFbank(
15
- (stft): Stft(n_fft=1024, win_length=1024, hop_length=256, center=True, normalized=False, onesided=True)
16
- (logmel): LogMel(sr=22050, n_fft=1024, n_mels=80, fmin=80, fmax=7600, htk=False)
17
- )
18
- (tts): VITS(
19
- (generator): VITSGenerator(
20
- (text_encoder): TextEncoder(
21
- (emb): Embedding(46, 192)
22
- (encoder): Encoder(
23
- (embed): Sequential(
24
- (0): RelPositionalEncoding(
25
- (dropout): Dropout(p=0.0, inplace=False)
26
- )
27
- )
28
- (encoders): MultiSequential(
29
- (0): EncoderLayer(
30
- (self_attn): RelPositionMultiHeadedAttention(
31
- (linear_q): Linear(in_features=192, out_features=192, bias=True)
32
- (linear_k): Linear(in_features=192, out_features=192, bias=True)
33
- (linear_v): Linear(in_features=192, out_features=192, bias=True)
34
- (linear_out): Linear(in_features=192, out_features=192, bias=True)
35
- (dropout): Dropout(p=0.1, inplace=False)
36
- (linear_pos): Linear(in_features=192, out_features=192, bias=False)
37
- )
38
- (feed_forward): MultiLayeredConv1d(
39
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
40
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
41
- (dropout): Dropout(p=0.1, inplace=False)
42
- )
43
- (feed_forward_macaron): MultiLayeredConv1d(
44
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
45
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
46
- (dropout): Dropout(p=0.1, inplace=False)
47
- )
48
- (norm_ff): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
49
- (norm_mha): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
50
- (norm_ff_macaron): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
51
- (dropout): Dropout(p=0.1, inplace=False)
52
- )
53
- (1): EncoderLayer(
54
- (self_attn): RelPositionMultiHeadedAttention(
55
- (linear_q): Linear(in_features=192, out_features=192, bias=True)
56
- (linear_k): Linear(in_features=192, out_features=192, bias=True)
57
- (linear_v): Linear(in_features=192, out_features=192, bias=True)
58
- (linear_out): Linear(in_features=192, out_features=192, bias=True)
59
- (dropout): Dropout(p=0.1, inplace=False)
60
- (linear_pos): Linear(in_features=192, out_features=192, bias=False)
61
- )
62
- (feed_forward): MultiLayeredConv1d(
63
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
64
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
65
- (dropout): Dropout(p=0.1, inplace=False)
66
- )
67
- (feed_forward_macaron): MultiLayeredConv1d(
68
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
69
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
70
- (dropout): Dropout(p=0.1, inplace=False)
71
- )
72
- (norm_ff): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
73
- (norm_mha): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
74
- (norm_ff_macaron): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
75
- (dropout): Dropout(p=0.1, inplace=False)
76
- )
77
- (2): EncoderLayer(
78
- (self_attn): RelPositionMultiHeadedAttention(
79
- (linear_q): Linear(in_features=192, out_features=192, bias=True)
80
- (linear_k): Linear(in_features=192, out_features=192, bias=True)
81
- (linear_v): Linear(in_features=192, out_features=192, bias=True)
82
- (linear_out): Linear(in_features=192, out_features=192, bias=True)
83
- (dropout): Dropout(p=0.1, inplace=False)
84
- (linear_pos): Linear(in_features=192, out_features=192, bias=False)
85
- )
86
- (feed_forward): MultiLayeredConv1d(
87
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
88
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
89
- (dropout): Dropout(p=0.1, inplace=False)
90
- )
91
- (feed_forward_macaron): MultiLayeredConv1d(
92
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
93
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
94
- (dropout): Dropout(p=0.1, inplace=False)
95
- )
96
- (norm_ff): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
97
- (norm_mha): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
98
- (norm_ff_macaron): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
99
- (dropout): Dropout(p=0.1, inplace=False)
100
- )
101
- (3): EncoderLayer(
102
- (self_attn): RelPositionMultiHeadedAttention(
103
- (linear_q): Linear(in_features=192, out_features=192, bias=True)
104
- (linear_k): Linear(in_features=192, out_features=192, bias=True)
105
- (linear_v): Linear(in_features=192, out_features=192, bias=True)
106
- (linear_out): Linear(in_features=192, out_features=192, bias=True)
107
- (dropout): Dropout(p=0.1, inplace=False)
108
- (linear_pos): Linear(in_features=192, out_features=192, bias=False)
109
- )
110
- (feed_forward): MultiLayeredConv1d(
111
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
112
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
113
- (dropout): Dropout(p=0.1, inplace=False)
114
- )
115
- (feed_forward_macaron): MultiLayeredConv1d(
116
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
117
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
118
- (dropout): Dropout(p=0.1, inplace=False)
119
- )
120
- (norm_ff): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
121
- (norm_mha): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
122
- (norm_ff_macaron): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
123
- (dropout): Dropout(p=0.1, inplace=False)
124
- )
125
- (4): EncoderLayer(
126
- (self_attn): RelPositionMultiHeadedAttention(
127
- (linear_q): Linear(in_features=192, out_features=192, bias=True)
128
- (linear_k): Linear(in_features=192, out_features=192, bias=True)
129
- (linear_v): Linear(in_features=192, out_features=192, bias=True)
130
- (linear_out): Linear(in_features=192, out_features=192, bias=True)
131
- (dropout): Dropout(p=0.1, inplace=False)
132
- (linear_pos): Linear(in_features=192, out_features=192, bias=False)
133
- )
134
- (feed_forward): MultiLayeredConv1d(
135
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
136
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
137
- (dropout): Dropout(p=0.1, inplace=False)
138
- )
139
- (feed_forward_macaron): MultiLayeredConv1d(
140
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
141
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
142
- (dropout): Dropout(p=0.1, inplace=False)
143
- )
144
- (norm_ff): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
145
- (norm_mha): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
146
- (norm_ff_macaron): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
147
- (dropout): Dropout(p=0.1, inplace=False)
148
- )
149
- (5): EncoderLayer(
150
- (self_attn): RelPositionMultiHeadedAttention(
151
- (linear_q): Linear(in_features=192, out_features=192, bias=True)
152
- (linear_k): Linear(in_features=192, out_features=192, bias=True)
153
- (linear_v): Linear(in_features=192, out_features=192, bias=True)
154
- (linear_out): Linear(in_features=192, out_features=192, bias=True)
155
- (dropout): Dropout(p=0.1, inplace=False)
156
- (linear_pos): Linear(in_features=192, out_features=192, bias=False)
157
- )
158
- (feed_forward): MultiLayeredConv1d(
159
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
160
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
161
- (dropout): Dropout(p=0.1, inplace=False)
162
- )
163
- (feed_forward_macaron): MultiLayeredConv1d(
164
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
165
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
166
- (dropout): Dropout(p=0.1, inplace=False)
167
- )
168
- (norm_ff): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
169
- (norm_mha): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
170
- (norm_ff_macaron): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
171
- (dropout): Dropout(p=0.1, inplace=False)
172
- )
173
- )
174
- (after_norm): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
175
- )
176
- (proj): Conv1d(192, 384, kernel_size=(1,), stride=(1,))
177
- )
178
- (decoder): HiFiGANGenerator(
179
- (input_conv): Conv1d(192, 512, kernel_size=(7,), stride=(1,), padding=(3,))
180
- (upsamples): ModuleList(
181
- (0): Sequential(
182
- (0): LeakyReLU(negative_slope=0.1)
183
- (1): ConvTranspose1d(512, 256, kernel_size=(16,), stride=(8,), padding=(4,))
184
- )
185
- (1): Sequential(
186
- (0): LeakyReLU(negative_slope=0.1)
187
- (1): ConvTranspose1d(256, 128, kernel_size=(16,), stride=(8,), padding=(4,))
188
- )
189
- (2): Sequential(
190
- (0): LeakyReLU(negative_slope=0.1)
191
- (1): ConvTranspose1d(128, 64, kernel_size=(4,), stride=(2,), padding=(1,))
192
- )
193
- (3): Sequential(
194
- (0): LeakyReLU(negative_slope=0.1)
195
- (1): ConvTranspose1d(64, 32, kernel_size=(4,), stride=(2,), padding=(1,))
196
- )
197
- )
198
- (blocks): ModuleList(
199
- (0): ResidualBlock(
200
- (convs1): ModuleList(
201
- (0): Sequential(
202
- (0): LeakyReLU(negative_slope=0.1)
203
- (1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
204
- )
205
- (1): Sequential(
206
- (0): LeakyReLU(negative_slope=0.1)
207
- (1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,))
208
- )
209
- (2): Sequential(
210
- (0): LeakyReLU(negative_slope=0.1)
211
- (1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,))
212
- )
213
- )
214
- (convs2): ModuleList(
215
- (0-2): 3 x Sequential(
216
- (0): LeakyReLU(negative_slope=0.1)
217
- (1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
218
- )
219
- )
220
- )
221
- (1): ResidualBlock(
222
- (convs1): ModuleList(
223
- (0): Sequential(
224
- (0): LeakyReLU(negative_slope=0.1)
225
- (1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(3,))
226
- )
227
- (1): Sequential(
228
- (0): LeakyReLU(negative_slope=0.1)
229
- (1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,))
230
- )
231
- (2): Sequential(
232
- (0): LeakyReLU(negative_slope=0.1)
233
- (1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,))
234
- )
235
- )
236
- (convs2): ModuleList(
237
- (0-2): 3 x Sequential(
238
- (0): LeakyReLU(negative_slope=0.1)
239
- (1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(3,))
240
- )
241
- )
242
- )
243
- (2): ResidualBlock(
244
- (convs1): ModuleList(
245
- (0): Sequential(
246
- (0): LeakyReLU(negative_slope=0.1)
247
- (1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(5,))
248
- )
249
- (1): Sequential(
250
- (0): LeakyReLU(negative_slope=0.1)
251
- (1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,))
252
- )
253
- (2): Sequential(
254
- (0): LeakyReLU(negative_slope=0.1)
255
- (1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,))
256
- )
257
- )
258
- (convs2): ModuleList(
259
- (0-2): 3 x Sequential(
260
- (0): LeakyReLU(negative_slope=0.1)
261
- (1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(5,))
262
- )
263
- )
264
- )
265
- (3): ResidualBlock(
266
- (convs1): ModuleList(
267
- (0): Sequential(
268
- (0): LeakyReLU(negative_slope=0.1)
269
- (1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
270
- )
271
- (1): Sequential(
272
- (0): LeakyReLU(negative_slope=0.1)
273
- (1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,))
274
- )
275
- (2): Sequential(
276
- (0): LeakyReLU(negative_slope=0.1)
277
- (1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,))
278
- )
279
- )
280
- (convs2): ModuleList(
281
- (0-2): 3 x Sequential(
282
- (0): LeakyReLU(negative_slope=0.1)
283
- (1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
284
- )
285
- )
286
- )
287
- (4): ResidualBlock(
288
- (convs1): ModuleList(
289
- (0): Sequential(
290
- (0): LeakyReLU(negative_slope=0.1)
291
- (1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(3,))
292
- )
293
- (1): Sequential(
294
- (0): LeakyReLU(negative_slope=0.1)
295
- (1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,))
296
- )
297
- (2): Sequential(
298
- (0): LeakyReLU(negative_slope=0.1)
299
- (1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,))
300
- )
301
- )
302
- (convs2): ModuleList(
303
- (0-2): 3 x Sequential(
304
- (0): LeakyReLU(negative_slope=0.1)
305
- (1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(3,))
306
- )
307
- )
308
- )
309
- (5): ResidualBlock(
310
- (convs1): ModuleList(
311
- (0): Sequential(
312
- (0): LeakyReLU(negative_slope=0.1)
313
- (1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(5,))
314
- )
315
- (1): Sequential(
316
- (0): LeakyReLU(negative_slope=0.1)
317
- (1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,))
318
- )
319
- (2): Sequential(
320
- (0): LeakyReLU(negative_slope=0.1)
321
- (1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,))
322
- )
323
- )
324
- (convs2): ModuleList(
325
- (0-2): 3 x Sequential(
326
- (0): LeakyReLU(negative_slope=0.1)
327
- (1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(5,))
328
- )
329
- )
330
- )
331
- (6): ResidualBlock(
332
- (convs1): ModuleList(
333
- (0): Sequential(
334
- (0): LeakyReLU(negative_slope=0.1)
335
- (1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
336
- )
337
- (1): Sequential(
338
- (0): LeakyReLU(negative_slope=0.1)
339
- (1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,))
340
- )
341
- (2): Sequential(
342
- (0): LeakyReLU(negative_slope=0.1)
343
- (1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,))
344
- )
345
- )
346
- (convs2): ModuleList(
347
- (0-2): 3 x Sequential(
348
- (0): LeakyReLU(negative_slope=0.1)
349
- (1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
350
- )
351
- )
352
- )
353
- (7): ResidualBlock(
354
- (convs1): ModuleList(
355
- (0): Sequential(
356
- (0): LeakyReLU(negative_slope=0.1)
357
- (1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(3,))
358
- )
359
- (1): Sequential(
360
- (0): LeakyReLU(negative_slope=0.1)
361
- (1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,))
362
- )
363
- (2): Sequential(
364
- (0): LeakyReLU(negative_slope=0.1)
365
- (1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,))
366
- )
367
- )
368
- (convs2): ModuleList(
369
- (0-2): 3 x Sequential(
370
- (0): LeakyReLU(negative_slope=0.1)
371
- (1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(3,))
372
- )
373
- )
374
- )
375
- (8): ResidualBlock(
376
- (convs1): ModuleList(
377
- (0): Sequential(
378
- (0): LeakyReLU(negative_slope=0.1)
379
- (1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(5,))
380
- )
381
- (1): Sequential(
382
- (0): LeakyReLU(negative_slope=0.1)
383
- (1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,))
384
- )
385
- (2): Sequential(
386
- (0): LeakyReLU(negative_slope=0.1)
387
- (1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,))
388
- )
389
- )
390
- (convs2): ModuleList(
391
- (0-2): 3 x Sequential(
392
- (0): LeakyReLU(negative_slope=0.1)
393
- (1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(5,))
394
- )
395
- )
396
- )
397
- (9): ResidualBlock(
398
- (convs1): ModuleList(
399
- (0): Sequential(
400
- (0): LeakyReLU(negative_slope=0.1)
401
- (1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
402
- )
403
- (1): Sequential(
404
- (0): LeakyReLU(negative_slope=0.1)
405
- (1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,))
406
- )
407
- (2): Sequential(
408
- (0): LeakyReLU(negative_slope=0.1)
409
- (1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,))
410
- )
411
- )
412
- (convs2): ModuleList(
413
- (0-2): 3 x Sequential(
414
- (0): LeakyReLU(negative_slope=0.1)
415
- (1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
416
- )
417
- )
418
- )
419
- (10): ResidualBlock(
420
- (convs1): ModuleList(
421
- (0): Sequential(
422
- (0): LeakyReLU(negative_slope=0.1)
423
- (1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(3,))
424
- )
425
- (1): Sequential(
426
- (0): LeakyReLU(negative_slope=0.1)
427
- (1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,))
428
- )
429
- (2): Sequential(
430
- (0): LeakyReLU(negative_slope=0.1)
431
- (1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,))
432
- )
433
- )
434
- (convs2): ModuleList(
435
- (0-2): 3 x Sequential(
436
- (0): LeakyReLU(negative_slope=0.1)
437
- (1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(3,))
438
- )
439
- )
440
- )
441
- (11): ResidualBlock(
442
- (convs1): ModuleList(
443
- (0): Sequential(
444
- (0): LeakyReLU(negative_slope=0.1)
445
- (1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(5,))
446
- )
447
- (1): Sequential(
448
- (0): LeakyReLU(negative_slope=0.1)
449
- (1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,))
450
- )
451
- (2): Sequential(
452
- (0): LeakyReLU(negative_slope=0.1)
453
- (1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,))
454
- )
455
- )
456
- (convs2): ModuleList(
457
- (0-2): 3 x Sequential(
458
- (0): LeakyReLU(negative_slope=0.1)
459
- (1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(5,))
460
- )
461
- )
462
- )
463
- )
464
- (output_conv): Sequential(
465
- (0): LeakyReLU(negative_slope=0.01)
466
- (1): Conv1d(32, 1, kernel_size=(7,), stride=(1,), padding=(3,))
467
- (2): Tanh()
468
- )
469
- (global_conv): Conv1d(256, 512, kernel_size=(1,), stride=(1,))
470
- )
471
- (posterior_encoder): PosteriorEncoder(
472
- (input_conv): Conv1d(80, 192, kernel_size=(1,), stride=(1,))
473
- (encoder): WaveNet(
474
- (conv_layers): ModuleList(
475
- (0-15): 16 x ResidualBlock(
476
- (conv): Conv1d(192, 384, kernel_size=(5,), stride=(1,), padding=(2,))
477
- (conv1x1_glo): Conv1d1x1(256, 384, kernel_size=(1,), stride=(1,), bias=False)
478
- (conv1x1_out): Conv1d1x1(192, 384, kernel_size=(1,), stride=(1,))
479
- )
480
- )
481
- )
482
- (proj): Conv1d(192, 384, kernel_size=(1,), stride=(1,))
483
- )
484
- (flow): ResidualAffineCouplingBlock(
485
- (flows): ModuleList(
486
- (0): ResidualAffineCouplingLayer(
487
- (input_conv): Conv1d(96, 192, kernel_size=(1,), stride=(1,))
488
- (encoder): WaveNet(
489
- (conv_layers): ModuleList(
490
- (0-3): 4 x ResidualBlock(
491
- (conv): Conv1d(192, 384, kernel_size=(5,), stride=(1,), padding=(2,))
492
- (conv1x1_glo): Conv1d1x1(256, 384, kernel_size=(1,), stride=(1,), bias=False)
493
- (conv1x1_out): Conv1d1x1(192, 384, kernel_size=(1,), stride=(1,))
494
- )
495
- )
496
- )
497
- (proj): Conv1d(192, 96, kernel_size=(1,), stride=(1,))
498
- )
499
- (1): FlipFlow()
500
- (2): ResidualAffineCouplingLayer(
501
- (input_conv): Conv1d(96, 192, kernel_size=(1,), stride=(1,))
502
- (encoder): WaveNet(
503
- (conv_layers): ModuleList(
504
- (0-3): 4 x ResidualBlock(
505
- (conv): Conv1d(192, 384, kernel_size=(5,), stride=(1,), padding=(2,))
506
- (conv1x1_glo): Conv1d1x1(256, 384, kernel_size=(1,), stride=(1,), bias=False)
507
- (conv1x1_out): Conv1d1x1(192, 384, kernel_size=(1,), stride=(1,))
508
- )
509
- )
510
- )
511
- (proj): Conv1d(192, 96, kernel_size=(1,), stride=(1,))
512
- )
513
- (3): FlipFlow()
514
- (4): ResidualAffineCouplingLayer(
515
- (input_conv): Conv1d(96, 192, kernel_size=(1,), stride=(1,))
516
- (encoder): WaveNet(
517
- (conv_layers): ModuleList(
518
- (0-3): 4 x ResidualBlock(
519
- (conv): Conv1d(192, 384, kernel_size=(5,), stride=(1,), padding=(2,))
520
- (conv1x1_glo): Conv1d1x1(256, 384, kernel_size=(1,), stride=(1,), bias=False)
521
- (conv1x1_out): Conv1d1x1(192, 384, kernel_size=(1,), stride=(1,))
522
- )
523
- )
524
- )
525
- (proj): Conv1d(192, 96, kernel_size=(1,), stride=(1,))
526
- )
527
- (5): FlipFlow()
528
- (6): ResidualAffineCouplingLayer(
529
- (input_conv): Conv1d(96, 192, kernel_size=(1,), stride=(1,))
530
- (encoder): WaveNet(
531
- (conv_layers): ModuleList(
532
- (0-3): 4 x ResidualBlock(
533
- (conv): Conv1d(192, 384, kernel_size=(5,), stride=(1,), padding=(2,))
534
- (conv1x1_glo): Conv1d1x1(256, 384, kernel_size=(1,), stride=(1,), bias=False)
535
- (conv1x1_out): Conv1d1x1(192, 384, kernel_size=(1,), stride=(1,))
536
- )
537
- )
538
- )
539
- (proj): Conv1d(192, 96, kernel_size=(1,), stride=(1,))
540
- )
541
- (7): FlipFlow()
542
- )
543
- )
544
- (duration_predictor): StochasticDurationPredictor(
545
- (pre): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
546
- (dds): DilatedDepthSeparableConv(
547
- (convs): ModuleList(
548
- (0): Sequential(
549
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
550
- (1): Transpose()
551
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
552
- (3): Transpose()
553
- (4): GELU(approximate='none')
554
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
555
- (6): Transpose()
556
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
557
- (8): Transpose()
558
- (9): GELU(approximate='none')
559
- (10): Dropout(p=0.5, inplace=False)
560
- )
561
- (1): Sequential(
562
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
563
- (1): Transpose()
564
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
565
- (3): Transpose()
566
- (4): GELU(approximate='none')
567
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
568
- (6): Transpose()
569
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
570
- (8): Transpose()
571
- (9): GELU(approximate='none')
572
- (10): Dropout(p=0.5, inplace=False)
573
- )
574
- (2): Sequential(
575
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
576
- (1): Transpose()
577
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
578
- (3): Transpose()
579
- (4): GELU(approximate='none')
580
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
581
- (6): Transpose()
582
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
583
- (8): Transpose()
584
- (9): GELU(approximate='none')
585
- (10): Dropout(p=0.5, inplace=False)
586
- )
587
- )
588
- )
589
- (proj): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
590
- (log_flow): LogFlow()
591
- (flows): ModuleList(
592
- (0): ElementwiseAffineFlow()
593
- (1): ConvFlow(
594
- (input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
595
- (dds_conv): DilatedDepthSeparableConv(
596
- (convs): ModuleList(
597
- (0): Sequential(
598
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
599
- (1): Transpose()
600
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
601
- (3): Transpose()
602
- (4): GELU(approximate='none')
603
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
604
- (6): Transpose()
605
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
606
- (8): Transpose()
607
- (9): GELU(approximate='none')
608
- (10): Dropout(p=0.0, inplace=False)
609
- )
610
- (1): Sequential(
611
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
612
- (1): Transpose()
613
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
614
- (3): Transpose()
615
- (4): GELU(approximate='none')
616
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
617
- (6): Transpose()
618
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
619
- (8): Transpose()
620
- (9): GELU(approximate='none')
621
- (10): Dropout(p=0.0, inplace=False)
622
- )
623
- (2): Sequential(
624
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
625
- (1): Transpose()
626
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
627
- (3): Transpose()
628
- (4): GELU(approximate='none')
629
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
630
- (6): Transpose()
631
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
632
- (8): Transpose()
633
- (9): GELU(approximate='none')
634
- (10): Dropout(p=0.0, inplace=False)
635
- )
636
- )
637
- )
638
- (proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,))
639
- )
640
- (2): FlipFlow()
641
- (3): ConvFlow(
642
- (input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
643
- (dds_conv): DilatedDepthSeparableConv(
644
- (convs): ModuleList(
645
- (0): Sequential(
646
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
647
- (1): Transpose()
648
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
649
- (3): Transpose()
650
- (4): GELU(approximate='none')
651
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
652
- (6): Transpose()
653
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
654
- (8): Transpose()
655
- (9): GELU(approximate='none')
656
- (10): Dropout(p=0.0, inplace=False)
657
- )
658
- (1): Sequential(
659
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
660
- (1): Transpose()
661
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
662
- (3): Transpose()
663
- (4): GELU(approximate='none')
664
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
665
- (6): Transpose()
666
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
667
- (8): Transpose()
668
- (9): GELU(approximate='none')
669
- (10): Dropout(p=0.0, inplace=False)
670
- )
671
- (2): Sequential(
672
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
673
- (1): Transpose()
674
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
675
- (3): Transpose()
676
- (4): GELU(approximate='none')
677
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
678
- (6): Transpose()
679
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
680
- (8): Transpose()
681
- (9): GELU(approximate='none')
682
- (10): Dropout(p=0.0, inplace=False)
683
- )
684
- )
685
- )
686
- (proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,))
687
- )
688
- (4): FlipFlow()
689
- (5): ConvFlow(
690
- (input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
691
- (dds_conv): DilatedDepthSeparableConv(
692
- (convs): ModuleList(
693
- (0): Sequential(
694
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
695
- (1): Transpose()
696
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
697
- (3): Transpose()
698
- (4): GELU(approximate='none')
699
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
700
- (6): Transpose()
701
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
702
- (8): Transpose()
703
- (9): GELU(approximate='none')
704
- (10): Dropout(p=0.0, inplace=False)
705
- )
706
- (1): Sequential(
707
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
708
- (1): Transpose()
709
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
710
- (3): Transpose()
711
- (4): GELU(approximate='none')
712
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
713
- (6): Transpose()
714
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
715
- (8): Transpose()
716
- (9): GELU(approximate='none')
717
- (10): Dropout(p=0.0, inplace=False)
718
- )
719
- (2): Sequential(
720
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
721
- (1): Transpose()
722
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
723
- (3): Transpose()
724
- (4): GELU(approximate='none')
725
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
726
- (6): Transpose()
727
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
728
- (8): Transpose()
729
- (9): GELU(approximate='none')
730
- (10): Dropout(p=0.0, inplace=False)
731
- )
732
- )
733
- )
734
- (proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,))
735
- )
736
- (6): FlipFlow()
737
- (7): ConvFlow(
738
- (input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
739
- (dds_conv): DilatedDepthSeparableConv(
740
- (convs): ModuleList(
741
- (0): Sequential(
742
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
743
- (1): Transpose()
744
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
745
- (3): Transpose()
746
- (4): GELU(approximate='none')
747
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
748
- (6): Transpose()
749
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
750
- (8): Transpose()
751
- (9): GELU(approximate='none')
752
- (10): Dropout(p=0.0, inplace=False)
753
- )
754
- (1): Sequential(
755
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
756
- (1): Transpose()
757
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
758
- (3): Transpose()
759
- (4): GELU(approximate='none')
760
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
761
- (6): Transpose()
762
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
763
- (8): Transpose()
764
- (9): GELU(approximate='none')
765
- (10): Dropout(p=0.0, inplace=False)
766
- )
767
- (2): Sequential(
768
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
769
- (1): Transpose()
770
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
771
- (3): Transpose()
772
- (4): GELU(approximate='none')
773
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
774
- (6): Transpose()
775
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
776
- (8): Transpose()
777
- (9): GELU(approximate='none')
778
- (10): Dropout(p=0.0, inplace=False)
779
- )
780
- )
781
- )
782
- (proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,))
783
- )
784
- (8): FlipFlow()
785
- )
786
- (post_pre): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
787
- (post_dds): DilatedDepthSeparableConv(
788
- (convs): ModuleList(
789
- (0): Sequential(
790
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
791
- (1): Transpose()
792
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
793
- (3): Transpose()
794
- (4): GELU(approximate='none')
795
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
796
- (6): Transpose()
797
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
798
- (8): Transpose()
799
- (9): GELU(approximate='none')
800
- (10): Dropout(p=0.5, inplace=False)
801
- )
802
- (1): Sequential(
803
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
804
- (1): Transpose()
805
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
806
- (3): Transpose()
807
- (4): GELU(approximate='none')
808
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
809
- (6): Transpose()
810
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
811
- (8): Transpose()
812
- (9): GELU(approximate='none')
813
- (10): Dropout(p=0.5, inplace=False)
814
- )
815
- (2): Sequential(
816
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
817
- (1): Transpose()
818
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
819
- (3): Transpose()
820
- (4): GELU(approximate='none')
821
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
822
- (6): Transpose()
823
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
824
- (8): Transpose()
825
- (9): GELU(approximate='none')
826
- (10): Dropout(p=0.5, inplace=False)
827
- )
828
- )
829
- )
830
- (post_proj): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
831
- (post_flows): ModuleList(
832
- (0): ElementwiseAffineFlow()
833
- (1): ConvFlow(
834
- (input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
835
- (dds_conv): DilatedDepthSeparableConv(
836
- (convs): ModuleList(
837
- (0): Sequential(
838
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
839
- (1): Transpose()
840
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
841
- (3): Transpose()
842
- (4): GELU(approximate='none')
843
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
844
- (6): Transpose()
845
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
846
- (8): Transpose()
847
- (9): GELU(approximate='none')
848
- (10): Dropout(p=0.0, inplace=False)
849
- )
850
- (1): Sequential(
851
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
852
- (1): Transpose()
853
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
854
- (3): Transpose()
855
- (4): GELU(approximate='none')
856
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
857
- (6): Transpose()
858
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
859
- (8): Transpose()
860
- (9): GELU(approximate='none')
861
- (10): Dropout(p=0.0, inplace=False)
862
- )
863
- (2): Sequential(
864
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
865
- (1): Transpose()
866
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
867
- (3): Transpose()
868
- (4): GELU(approximate='none')
869
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
870
- (6): Transpose()
871
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
872
- (8): Transpose()
873
- (9): GELU(approximate='none')
874
- (10): Dropout(p=0.0, inplace=False)
875
- )
876
- )
877
- )
878
- (proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,))
879
- )
880
- (2): FlipFlow()
881
- (3): ConvFlow(
882
- (input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
883
- (dds_conv): DilatedDepthSeparableConv(
884
- (convs): ModuleList(
885
- (0): Sequential(
886
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
887
- (1): Transpose()
888
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
889
- (3): Transpose()
890
- (4): GELU(approximate='none')
891
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
892
- (6): Transpose()
893
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
894
- (8): Transpose()
895
- (9): GELU(approximate='none')
896
- (10): Dropout(p=0.0, inplace=False)
897
- )
898
- (1): Sequential(
899
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
900
- (1): Transpose()
901
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
902
- (3): Transpose()
903
- (4): GELU(approximate='none')
904
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
905
- (6): Transpose()
906
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
907
- (8): Transpose()
908
- (9): GELU(approximate='none')
909
- (10): Dropout(p=0.0, inplace=False)
910
- )
911
- (2): Sequential(
912
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
913
- (1): Transpose()
914
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
915
- (3): Transpose()
916
- (4): GELU(approximate='none')
917
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
918
- (6): Transpose()
919
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
920
- (8): Transpose()
921
- (9): GELU(approximate='none')
922
- (10): Dropout(p=0.0, inplace=False)
923
- )
924
- )
925
- )
926
- (proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,))
927
- )
928
- (4): FlipFlow()
929
- (5): ConvFlow(
930
- (input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
931
- (dds_conv): DilatedDepthSeparableConv(
932
- (convs): ModuleList(
933
- (0): Sequential(
934
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
935
- (1): Transpose()
936
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
937
- (3): Transpose()
938
- (4): GELU(approximate='none')
939
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
940
- (6): Transpose()
941
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
942
- (8): Transpose()
943
- (9): GELU(approximate='none')
944
- (10): Dropout(p=0.0, inplace=False)
945
- )
946
- (1): Sequential(
947
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
948
- (1): Transpose()
949
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
950
- (3): Transpose()
951
- (4): GELU(approximate='none')
952
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
953
- (6): Transpose()
954
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
955
- (8): Transpose()
956
- (9): GELU(approximate='none')
957
- (10): Dropout(p=0.0, inplace=False)
958
- )
959
- (2): Sequential(
960
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
961
- (1): Transpose()
962
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
963
- (3): Transpose()
964
- (4): GELU(approximate='none')
965
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
966
- (6): Transpose()
967
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
968
- (8): Transpose()
969
- (9): GELU(approximate='none')
970
- (10): Dropout(p=0.0, inplace=False)
971
- )
972
- )
973
- )
974
- (proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,))
975
- )
976
- (6): FlipFlow()
977
- (7): ConvFlow(
978
- (input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
979
- (dds_conv): DilatedDepthSeparableConv(
980
- (convs): ModuleList(
981
- (0): Sequential(
982
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
983
- (1): Transpose()
984
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
985
- (3): Transpose()
986
- (4): GELU(approximate='none')
987
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
988
- (6): Transpose()
989
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
990
- (8): Transpose()
991
- (9): GELU(approximate='none')
992
- (10): Dropout(p=0.0, inplace=False)
993
- )
994
- (1): Sequential(
995
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
996
- (1): Transpose()
997
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
998
- (3): Transpose()
999
- (4): GELU(approximate='none')
1000
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
1001
- (6): Transpose()
1002
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
1003
- (8): Transpose()
1004
- (9): GELU(approximate='none')
1005
- (10): Dropout(p=0.0, inplace=False)
1006
- )
1007
- (2): Sequential(
1008
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
1009
- (1): Transpose()
1010
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
1011
- (3): Transpose()
1012
- (4): GELU(approximate='none')
1013
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
1014
- (6): Transpose()
1015
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
1016
- (8): Transpose()
1017
- (9): GELU(approximate='none')
1018
- (10): Dropout(p=0.0, inplace=False)
1019
- )
1020
- )
1021
- )
1022
- (proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,))
1023
- )
1024
- (8): FlipFlow()
1025
- )
1026
- (global_conv): Conv1d(256, 192, kernel_size=(1,), stride=(1,))
1027
- )
1028
- (global_emb): Embedding(4, 256)
1029
- )
1030
- (discriminator): HiFiGANMultiScaleMultiPeriodDiscriminator(
1031
- (msd): HiFiGANMultiScaleDiscriminator(
1032
- (discriminators): ModuleList(
1033
- (0): HiFiGANScaleDiscriminator(
1034
- (layers): ModuleList(
1035
- (0): Sequential(
1036
- (0): Conv1d(1, 128, kernel_size=(15,), stride=(1,), padding=(7,))
1037
- (1): LeakyReLU(negative_slope=0.1)
1038
- )
1039
- (1): Sequential(
1040
- (0): Conv1d(128, 128, kernel_size=(41,), stride=(2,), padding=(20,), groups=4)
1041
- (1): LeakyReLU(negative_slope=0.1)
1042
- )
1043
- (2): Sequential(
1044
- (0): Conv1d(128, 256, kernel_size=(41,), stride=(2,), padding=(20,), groups=16)
1045
- (1): LeakyReLU(negative_slope=0.1)
1046
- )
1047
- (3): Sequential(
1048
- (0): Conv1d(256, 512, kernel_size=(41,), stride=(4,), padding=(20,), groups=16)
1049
- (1): LeakyReLU(negative_slope=0.1)
1050
- )
1051
- (4): Sequential(
1052
- (0): Conv1d(512, 1024, kernel_size=(41,), stride=(4,), padding=(20,), groups=16)
1053
- (1): LeakyReLU(negative_slope=0.1)
1054
- )
1055
- (5): Sequential(
1056
- (0): Conv1d(1024, 1024, kernel_size=(41,), stride=(1,), padding=(20,), groups=16)
1057
- (1): LeakyReLU(negative_slope=0.1)
1058
- )
1059
- (6): Sequential(
1060
- (0): Conv1d(1024, 1024, kernel_size=(5,), stride=(1,), padding=(2,))
1061
- (1): LeakyReLU(negative_slope=0.1)
1062
- )
1063
- (7): Conv1d(1024, 1, kernel_size=(3,), stride=(1,), padding=(1,))
1064
- )
1065
- )
1066
- )
1067
- )
1068
- (mpd): HiFiGANMultiPeriodDiscriminator(
1069
- (discriminators): ModuleList(
1070
- (0-4): 5 x HiFiGANPeriodDiscriminator(
1071
- (convs): ModuleList(
1072
- (0): Sequential(
1073
- (0): Conv2d(1, 32, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
1074
- (1): LeakyReLU(negative_slope=0.1)
1075
- )
1076
- (1): Sequential(
1077
- (0): Conv2d(32, 128, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
1078
- (1): LeakyReLU(negative_slope=0.1)
1079
- )
1080
- (2): Sequential(
1081
- (0): Conv2d(128, 512, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
1082
- (1): LeakyReLU(negative_slope=0.1)
1083
- )
1084
- (3): Sequential(
1085
- (0): Conv2d(512, 1024, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
1086
- (1): LeakyReLU(negative_slope=0.1)
1087
- )
1088
- (4): Sequential(
1089
- (0): Conv2d(1024, 1024, kernel_size=(5, 1), stride=(1, 1), padding=(2, 0))
1090
- (1): LeakyReLU(negative_slope=0.1)
1091
- )
1092
- )
1093
- (output_conv): Conv2d(1024, 1, kernel_size=(2, 1), stride=(1, 1), padding=(1, 0))
1094
- )
1095
- )
1096
- )
1097
- )
1098
- (generator_adv_loss): GeneratorAdversarialLoss()
1099
- (discriminator_adv_loss): DiscriminatorAdversarialLoss()
1100
- (feat_match_loss): FeatureMatchLoss()
1101
- (mel_loss): MelSpectrogramLoss(
1102
- (wav_to_mel): LogMelFbank(
1103
- (stft): Stft(n_fft=1024, win_length=1024, hop_length=256, center=True, normalized=False, onesided=True)
1104
- (logmel): LogMel(sr=22050, n_fft=1024, n_mels=80, fmin=0, fmax=11025.0, htk=False)
1105
- )
1106
- )
1107
- (kl_loss): KLDivergenceLoss()
1108
- )
1109
- )
1110
-
1111
- Model summary:
1112
- Class Name: ESPnetGANTTSModel
1113
- Total Number of model parameters: 96.24 M
1114
- Number of trainable parameters: 96.24 M (100.0%)
1115
- Size: 384.96 MB
1116
- Type: torch.float32
1117
- [wieling-3-a100] 2023-12-01 15:58:41,789 (abs_task:1272) INFO: Optimizer:
1118
- AdamW (
1119
- Parameter Group 0
1120
- amsgrad: False
1121
- betas: [0.8, 0.99]
1122
- capturable: False
1123
- differentiable: False
1124
- eps: 1e-09
1125
- foreach: None
1126
- fused: None
1127
- initial_lr: 0.0003
1128
- lr: 0.0003
1129
- maximize: False
1130
- weight_decay: 0.0
1131
- )
1132
- [wieling-3-a100] 2023-12-01 15:58:41,789 (abs_task:1273) INFO: Scheduler: <torch.optim.lr_scheduler.ExponentialLR object at 0x7ff08e5c38b0>
1133
- [wieling-3-a100] 2023-12-01 15:58:41,790 (abs_task:1272) INFO: Optimizer2:
1134
- AdamW (
1135
- Parameter Group 0
1136
- amsgrad: False
1137
- betas: [0.8, 0.99]
1138
- capturable: False
1139
- differentiable: False
1140
- eps: 1e-09
1141
- foreach: None
1142
- fused: None
1143
- initial_lr: 0.0003
1144
- lr: 0.0003
1145
- maximize: False
1146
- weight_decay: 0.0
1147
- )
1148
- [wieling-3-a100] 2023-12-01 15:58:41,790 (abs_task:1273) INFO: Scheduler2: <torch.optim.lr_scheduler.ExponentialLR object at 0x7ff08e5c3850>
1149
- [wieling-3-a100] 2023-12-01 15:58:41,790 (abs_task:1282) INFO: Saving the configuration in exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1/config.yaml
1150
- [wieling-3-a100] 2023-12-01 15:58:41,807 (abs_task:1293) INFO: Namespace(config='conf/train_vits.yaml', print_config=False, log_level='INFO', drop_last_iter=False, dry_run=False, iterator_type='sequence', valid_iterator_type=None, output_dir='exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1', ngpu=0, seed=67823, num_workers=4, num_att_plot=3, dist_backend='nccl', dist_init_method='env://', dist_world_size=None, dist_rank=None, local_rank=None, dist_master_addr=None, dist_master_port=None, dist_launcher=None, multiprocessing_distributed=False, unused_parameters=True, sharded_ddp=False, cudnn_enabled=True, cudnn_benchmark=False, cudnn_deterministic=False, collect_stats=True, write_collected_feats=False, max_epoch=1000, patience=None, val_scheduler_criterion=('valid', 'loss'), early_stopping_criterion=('valid', 'loss', 'min'), best_model_criterion=[['train', 'total_count', 'max']], keep_nbest_models=10, nbest_averaging_interval=0, grad_clip=-1, grad_clip_type=2.0, grad_noise=False, accum_grad=1, no_forward_run=False, resume=False, train_dtype='float32', use_amp=False, log_interval=50, use_matplotlib=True, use_tensorboard=True, create_graph_in_tensorboard=False, use_wandb=True, wandb_project='GROTTS', wandb_id=None, wandb_entity=None, wandb_name='VITS_lr_3.0e-4', wandb_model_log_interval=-1, detect_anomaly=False, use_lora=False, save_lora_only=True, lora_conf={}, pretrain_path=None, init_param=['downloads/espnet/kan-bayashi_ljspeech_vits/exp/tts_train_vits_raw_phn_tacotron_g2p_en_no_space/train.total_count.ave_10best.pth:tts:tts:tts.generator.text_encoder,tts.generator.posterior_encoder.input_conv'], ignore_init_mismatch=False, freeze_param=[], num_iters_per_epoch=1000, batch_size=40, valid_batch_size=None, batch_bins=10000000, valid_batch_bins=None, train_shape_file=['exp-vits-lr-3e-4/tts_stats_raw_char/logdir/train.1.scp'], valid_shape_file=['exp-vits-lr-3e-4/tts_stats_raw_char/logdir/valid.1.scp'], batch_type='numel', valid_batch_type=None, fold_length=[], sort_in_batch='descending', shuffle_within_batch=False, sort_batch='descending', multiple_iterator=False, chunk_length=500, chunk_shift_ratio=0.5, num_cache_chunks=1024, chunk_excluded_key_prefixes=[], chunk_default_fs=None, train_data_path_and_name_and_type=[('dump/raw/train_nodev/text', 'text', 'text'), ('dump/raw/train_nodev/wav.scp', 'speech', 'sound'), ('dump/raw/train_nodev/utt2sid', 'sids', 'text_int')], valid_data_path_and_name_and_type=[('dump/raw/train_dev/text', 'text', 'text'), ('dump/raw/train_dev/wav.scp', 'speech', 'sound'), ('dump/raw/train_dev/utt2sid', 'sids', 'text_int')], allow_variable_data_keys=False, max_cache_size=0.0, max_cache_fd=32, allow_multi_rates=False, valid_max_cache_size=None, exclude_weight_decay=False, exclude_weight_decay_conf={}, optim='adamw', optim_conf={'lr': 0.0003, 'betas': [0.8, 0.99], 'eps': 1e-09, 'weight_decay': 0.0}, scheduler='exponentiallr', scheduler_conf={'gamma': 0.999875}, optim2='adamw', optim2_conf={'lr': 0.0003, 'betas': [0.8, 0.99], 'eps': 1e-09, 'weight_decay': 0.0}, scheduler2='exponentiallr', scheduler2_conf={'gamma': 0.999875}, generator_first=False, token_list=['<blank>', '<unk>', '<space>', 'e', 'n', 'a', 'o', 't', 'i', 'r', 'd', 's', 'k', 'l', 'm', 'u', 'g', 'h', 'w', 'v', '.', 'z', 'b', 'p', ',', 'j', 'c', 'f', '‘', '’', ':', '?', 'ö', "'", '!', '-', ';', 'ò', 'è', 'ì', 'é', 'y', 'ë', 'x', 'q', '<sos/eos>'], odim=None, model_conf={}, use_preprocessor=True, token_type='char', bpemodel=None, non_linguistic_symbols=None, cleaner=None, g2p=None, feats_extract='fbank', feats_extract_conf={'n_fft': 1024, 'hop_length': 256, 'win_length': None, 'fs': 22050, 'fmin': 80, 'fmax': 7600, 'n_mels': 80}, normalize=None, normalize_conf={}, tts='vits', tts_conf={'generator_type': 'vits_generator', 'generator_params': {'hidden_channels': 192, 'spks': 4, 'global_channels': 256, 'segment_size': 32, 'text_encoder_attention_heads': 2, 'text_encoder_ffn_expand': 4, 'text_encoder_blocks': 6, 'text_encoder_positionwise_layer_type': 'conv1d', 'text_encoder_positionwise_conv_kernel_size': 3, 'text_encoder_positional_encoding_layer_type': 'rel_pos', 'text_encoder_self_attention_layer_type': 'rel_selfattn', 'text_encoder_activation_type': 'swish', 'text_encoder_normalize_before': True, 'text_encoder_dropout_rate': 0.1, 'text_encoder_positional_dropout_rate': 0.0, 'text_encoder_attention_dropout_rate': 0.1, 'use_macaron_style_in_text_encoder': True, 'use_conformer_conv_in_text_encoder': False, 'text_encoder_conformer_kernel_size': -1, 'decoder_kernel_size': 7, 'decoder_channels': 512, 'decoder_upsample_scales': [8, 8, 2, 2], 'decoder_upsample_kernel_sizes': [16, 16, 4, 4], 'decoder_resblock_kernel_sizes': [3, 7, 11], 'decoder_resblock_dilations': [[1, 3, 5], [1, 3, 5], [1, 3, 5]], 'use_weight_norm_in_decoder': True, 'posterior_encoder_kernel_size': 5, 'posterior_encoder_layers': 16, 'posterior_encoder_stacks': 1, 'posterior_encoder_base_dilation': 1, 'posterior_encoder_dropout_rate': 0.0, 'use_weight_norm_in_posterior_encoder': True, 'flow_flows': 4, 'flow_kernel_size': 5, 'flow_base_dilation': 1, 'flow_layers': 4, 'flow_dropout_rate': 0.0, 'use_weight_norm_in_flow': True, 'use_only_mean_in_flow': True, 'stochastic_duration_predictor_kernel_size': 3, 'stochastic_duration_predictor_dropout_rate': 0.5, 'stochastic_duration_predictor_flows': 4, 'stochastic_duration_predictor_dds_conv_layers': 3, 'vocabs': 46, 'aux_channels': 80}, 'discriminator_type': 'hifigan_multi_scale_multi_period_discriminator', 'discriminator_params': {'scales': 1, 'scale_downsample_pooling': 'AvgPool1d', 'scale_downsample_pooling_params': {'kernel_size': 4, 'stride': 2, 'padding': 2}, 'scale_discriminator_params': {'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [15, 41, 5, 3], 'channels': 128, 'max_downsample_channels': 1024, 'max_groups': 16, 'bias': True, 'downsample_scales': [2, 2, 4, 4, 1], 'nonlinear_activation': 'LeakyReLU', 'nonlinear_activation_params': {'negative_slope': 0.1}, 'use_weight_norm': False, 'use_spectral_norm': False}, 'follow_official_norm': False, 'periods': [2, 3, 5, 7, 11], 'period_discriminator_params': {'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'downsample_scales': [3, 3, 3, 3, 1], 'max_downsample_channels': 1024, 'bias': True, 'nonlinear_activation': 'LeakyReLU', 'nonlinear_activation_params': {'negative_slope': 0.1}, 'use_weight_norm': True, 'use_spectral_norm': False}}, 'generator_adv_loss_params': {'average_by_discriminators': False, 'loss_type': 'mse'}, 'discriminator_adv_loss_params': {'average_by_discriminators': False, 'loss_type': 'mse'}, 'feat_match_loss_params': {'average_by_discriminators': False, 'average_by_layers': False, 'include_final_outputs': True}, 'mel_loss_params': {'fs': 22050, 'n_fft': 1024, 'hop_length': 256, 'win_length': None, 'window': 'hann', 'n_mels': 80, 'fmin': 0, 'fmax': None, 'log_base': None}, 'lambda_adv': 1.0, 'lambda_mel': 45.0, 'lambda_feat_match': 2.0, 'lambda_dur': 1.0, 'lambda_kl': 1.0, 'sampling_rate': 22050, 'cache_generator_outputs': True}, pitch_extract=None, pitch_extract_conf={'fs': 22050, 'n_fft': 1024, 'hop_length': 256, 'f0max': 400, 'f0min': 80}, pitch_normalize=None, pitch_normalize_conf={}, energy_extract=None, energy_extract_conf={'fs': 22050, 'n_fft': 1024, 'hop_length': 256, 'win_length': None}, energy_normalize=None, energy_normalize_conf={}, required=['output_dir', 'token_list'], version='202310', distributed=False)
1151
- # Accounting: time=16 threads=1
1152
- # Ended (code 0) at Fri Dec 1 15:58:50 UTC 2023, elapsed time 16 seconds
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1/config.yaml DELETED
@@ -1,383 +0,0 @@
1
- config: conf/train_vits.yaml
2
- print_config: false
3
- log_level: INFO
4
- drop_last_iter: false
5
- dry_run: false
6
- iterator_type: sequence
7
- valid_iterator_type: null
8
- output_dir: exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1
9
- ngpu: 0
10
- seed: 67823
11
- num_workers: 4
12
- num_att_plot: 3
13
- dist_backend: nccl
14
- dist_init_method: env://
15
- dist_world_size: null
16
- dist_rank: null
17
- local_rank: null
18
- dist_master_addr: null
19
- dist_master_port: null
20
- dist_launcher: null
21
- multiprocessing_distributed: false
22
- unused_parameters: true
23
- sharded_ddp: false
24
- cudnn_enabled: true
25
- cudnn_benchmark: false
26
- cudnn_deterministic: false
27
- collect_stats: true
28
- write_collected_feats: false
29
- max_epoch: 1000
30
- patience: null
31
- val_scheduler_criterion:
32
- - valid
33
- - loss
34
- early_stopping_criterion:
35
- - valid
36
- - loss
37
- - min
38
- best_model_criterion:
39
- - - train
40
- - total_count
41
- - max
42
- keep_nbest_models: 10
43
- nbest_averaging_interval: 0
44
- grad_clip: -1
45
- grad_clip_type: 2.0
46
- grad_noise: false
47
- accum_grad: 1
48
- no_forward_run: false
49
- resume: false
50
- train_dtype: float32
51
- use_amp: false
52
- log_interval: 50
53
- use_matplotlib: true
54
- use_tensorboard: true
55
- create_graph_in_tensorboard: false
56
- use_wandb: true
57
- wandb_project: GROTTS
58
- wandb_id: null
59
- wandb_entity: null
60
- wandb_name: VITS_lr_3.0e-4
61
- wandb_model_log_interval: -1
62
- detect_anomaly: false
63
- use_lora: false
64
- save_lora_only: true
65
- lora_conf: {}
66
- pretrain_path: null
67
- init_param:
68
- - downloads/espnet/kan-bayashi_ljspeech_vits/exp/tts_train_vits_raw_phn_tacotron_g2p_en_no_space/train.total_count.ave_10best.pth:tts:tts:tts.generator.text_encoder,tts.generator.posterior_encoder.input_conv
69
- ignore_init_mismatch: false
70
- freeze_param: []
71
- num_iters_per_epoch: 1000
72
- batch_size: 40
73
- valid_batch_size: null
74
- batch_bins: 10000000
75
- valid_batch_bins: null
76
- train_shape_file:
77
- - exp-vits-lr-3e-4/tts_stats_raw_char/logdir/train.1.scp
78
- valid_shape_file:
79
- - exp-vits-lr-3e-4/tts_stats_raw_char/logdir/valid.1.scp
80
- batch_type: numel
81
- valid_batch_type: null
82
- fold_length: []
83
- sort_in_batch: descending
84
- shuffle_within_batch: false
85
- sort_batch: descending
86
- multiple_iterator: false
87
- chunk_length: 500
88
- chunk_shift_ratio: 0.5
89
- num_cache_chunks: 1024
90
- chunk_excluded_key_prefixes: []
91
- chunk_default_fs: null
92
- train_data_path_and_name_and_type:
93
- - - dump/raw/train_nodev/text
94
- - text
95
- - text
96
- - - dump/raw/train_nodev/wav.scp
97
- - speech
98
- - sound
99
- - - dump/raw/train_nodev/utt2sid
100
- - sids
101
- - text_int
102
- valid_data_path_and_name_and_type:
103
- - - dump/raw/train_dev/text
104
- - text
105
- - text
106
- - - dump/raw/train_dev/wav.scp
107
- - speech
108
- - sound
109
- - - dump/raw/train_dev/utt2sid
110
- - sids
111
- - text_int
112
- allow_variable_data_keys: false
113
- max_cache_size: 0.0
114
- max_cache_fd: 32
115
- allow_multi_rates: false
116
- valid_max_cache_size: null
117
- exclude_weight_decay: false
118
- exclude_weight_decay_conf: {}
119
- optim: adamw
120
- optim_conf:
121
- lr: 0.0003
122
- betas:
123
- - 0.8
124
- - 0.99
125
- eps: 1.0e-09
126
- weight_decay: 0.0
127
- scheduler: exponentiallr
128
- scheduler_conf:
129
- gamma: 0.999875
130
- optim2: adamw
131
- optim2_conf:
132
- lr: 0.0003
133
- betas:
134
- - 0.8
135
- - 0.99
136
- eps: 1.0e-09
137
- weight_decay: 0.0
138
- scheduler2: exponentiallr
139
- scheduler2_conf:
140
- gamma: 0.999875
141
- generator_first: false
142
- token_list:
143
- - <blank>
144
- - <unk>
145
- - <space>
146
- - e
147
- - n
148
- - a
149
- - o
150
- - t
151
- - i
152
- - r
153
- - d
154
- - s
155
- - k
156
- - l
157
- - m
158
- - u
159
- - g
160
- - h
161
- - w
162
- - v
163
- - .
164
- - z
165
- - b
166
- - p
167
- - ','
168
- - j
169
- - c
170
- - f
171
- - ‘
172
- - ’
173
- - ':'
174
- - '?'
175
- - ö
176
- - ''''
177
- - '!'
178
- - '-'
179
- - ;
180
- - ò
181
- - è
182
- - ì
183
- - é
184
- - y
185
- - ë
186
- - x
187
- - q
188
- - <sos/eos>
189
- odim: null
190
- model_conf: {}
191
- use_preprocessor: true
192
- token_type: char
193
- bpemodel: null
194
- non_linguistic_symbols: null
195
- cleaner: null
196
- g2p: null
197
- feats_extract: fbank
198
- feats_extract_conf:
199
- n_fft: 1024
200
- hop_length: 256
201
- win_length: null
202
- fs: 22050
203
- fmin: 80
204
- fmax: 7600
205
- n_mels: 80
206
- normalize: null
207
- normalize_conf: {}
208
- tts: vits
209
- tts_conf:
210
- generator_type: vits_generator
211
- generator_params:
212
- hidden_channels: 192
213
- spks: 4
214
- global_channels: 256
215
- segment_size: 32
216
- text_encoder_attention_heads: 2
217
- text_encoder_ffn_expand: 4
218
- text_encoder_blocks: 6
219
- text_encoder_positionwise_layer_type: conv1d
220
- text_encoder_positionwise_conv_kernel_size: 3
221
- text_encoder_positional_encoding_layer_type: rel_pos
222
- text_encoder_self_attention_layer_type: rel_selfattn
223
- text_encoder_activation_type: swish
224
- text_encoder_normalize_before: true
225
- text_encoder_dropout_rate: 0.1
226
- text_encoder_positional_dropout_rate: 0.0
227
- text_encoder_attention_dropout_rate: 0.1
228
- use_macaron_style_in_text_encoder: true
229
- use_conformer_conv_in_text_encoder: false
230
- text_encoder_conformer_kernel_size: -1
231
- decoder_kernel_size: 7
232
- decoder_channels: 512
233
- decoder_upsample_scales:
234
- - 8
235
- - 8
236
- - 2
237
- - 2
238
- decoder_upsample_kernel_sizes:
239
- - 16
240
- - 16
241
- - 4
242
- - 4
243
- decoder_resblock_kernel_sizes:
244
- - 3
245
- - 7
246
- - 11
247
- decoder_resblock_dilations:
248
- - - 1
249
- - 3
250
- - 5
251
- - - 1
252
- - 3
253
- - 5
254
- - - 1
255
- - 3
256
- - 5
257
- use_weight_norm_in_decoder: true
258
- posterior_encoder_kernel_size: 5
259
- posterior_encoder_layers: 16
260
- posterior_encoder_stacks: 1
261
- posterior_encoder_base_dilation: 1
262
- posterior_encoder_dropout_rate: 0.0
263
- use_weight_norm_in_posterior_encoder: true
264
- flow_flows: 4
265
- flow_kernel_size: 5
266
- flow_base_dilation: 1
267
- flow_layers: 4
268
- flow_dropout_rate: 0.0
269
- use_weight_norm_in_flow: true
270
- use_only_mean_in_flow: true
271
- stochastic_duration_predictor_kernel_size: 3
272
- stochastic_duration_predictor_dropout_rate: 0.5
273
- stochastic_duration_predictor_flows: 4
274
- stochastic_duration_predictor_dds_conv_layers: 3
275
- vocabs: 46
276
- aux_channels: 80
277
- discriminator_type: hifigan_multi_scale_multi_period_discriminator
278
- discriminator_params:
279
- scales: 1
280
- scale_downsample_pooling: AvgPool1d
281
- scale_downsample_pooling_params:
282
- kernel_size: 4
283
- stride: 2
284
- padding: 2
285
- scale_discriminator_params:
286
- in_channels: 1
287
- out_channels: 1
288
- kernel_sizes:
289
- - 15
290
- - 41
291
- - 5
292
- - 3
293
- channels: 128
294
- max_downsample_channels: 1024
295
- max_groups: 16
296
- bias: true
297
- downsample_scales:
298
- - 2
299
- - 2
300
- - 4
301
- - 4
302
- - 1
303
- nonlinear_activation: LeakyReLU
304
- nonlinear_activation_params:
305
- negative_slope: 0.1
306
- use_weight_norm: false
307
- use_spectral_norm: false
308
- follow_official_norm: false
309
- periods:
310
- - 2
311
- - 3
312
- - 5
313
- - 7
314
- - 11
315
- period_discriminator_params:
316
- in_channels: 1
317
- out_channels: 1
318
- kernel_sizes:
319
- - 5
320
- - 3
321
- channels: 32
322
- downsample_scales:
323
- - 3
324
- - 3
325
- - 3
326
- - 3
327
- - 1
328
- max_downsample_channels: 1024
329
- bias: true
330
- nonlinear_activation: LeakyReLU
331
- nonlinear_activation_params:
332
- negative_slope: 0.1
333
- use_weight_norm: true
334
- use_spectral_norm: false
335
- generator_adv_loss_params:
336
- average_by_discriminators: false
337
- loss_type: mse
338
- discriminator_adv_loss_params:
339
- average_by_discriminators: false
340
- loss_type: mse
341
- feat_match_loss_params:
342
- average_by_discriminators: false
343
- average_by_layers: false
344
- include_final_outputs: true
345
- mel_loss_params:
346
- fs: 22050
347
- n_fft: 1024
348
- hop_length: 256
349
- win_length: null
350
- window: hann
351
- n_mels: 80
352
- fmin: 0
353
- fmax: null
354
- log_base: null
355
- lambda_adv: 1.0
356
- lambda_mel: 45.0
357
- lambda_feat_match: 2.0
358
- lambda_dur: 1.0
359
- lambda_kl: 1.0
360
- sampling_rate: 22050
361
- cache_generator_outputs: true
362
- pitch_extract: null
363
- pitch_extract_conf:
364
- fs: 22050
365
- n_fft: 1024
366
- hop_length: 256
367
- f0max: 400
368
- f0min: 80
369
- pitch_normalize: null
370
- pitch_normalize_conf: {}
371
- energy_extract: null
372
- energy_extract_conf:
373
- fs: 22050
374
- n_fft: 1024
375
- hop_length: 256
376
- win_length: null
377
- energy_normalize: null
378
- energy_normalize_conf: {}
379
- required:
380
- - output_dir
381
- - token_list
382
- version: '202310'
383
- distributed: false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1/train/batch_keys DELETED
@@ -1,3 +0,0 @@
1
- text
2
- speech
3
- sids
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1/train/feats_lengths_stats.npz DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddfdb5fa7d569f4cfd147a3b2c063b468bc622eef7ac57458ca7e4c3afded949
3
- size 778
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1/train/feats_stats.npz DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4410bc31eace8e651b586b137f0b39fafa8920aaf121ca372393331ef4abeadf
3
- size 1402
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1/train/sids_shape DELETED
@@ -1,249 +0,0 @@
1
- Hoogelaandsters-0002 1
2
- Hoogelaandsters-0006 1
3
- Hoogelaandsters-0010 1
4
- Hoogelaandsters-0014 1
5
- Hoogelaandsters-0018 1
6
- Hoogelaandsters-0022 1
7
- Hoogelaandsters-0026 1
8
- Hoogelaandsters-0030 1
9
- Hoogelaandsters-0034 1
10
- Hoogelaandsters-0038 1
11
- Hoogelaandsters-0042 1
12
- Hoogelaandsters-0046 1
13
- Hoogelaandsters-0050 1
14
- Hoogelaandsters-0054 1
15
- Hoogelaandsters-0058 1
16
- Hoogelaandsters-0062 1
17
- Hoogelaandsters-0066 1
18
- Hoogelaandsters-0071 1
19
- Hoogelaandsters-0075 1
20
- Hoogelaandsters-0079 1
21
- Hoogelaandsters-0083 1
22
- Hoogelaandsters-0087 1
23
- Hoogelaandsters-0091 1
24
- Hoogelaandsters-0095 1
25
- Hoogelaandsters-0099 1
26
- Hoogelaandsters-0103 1
27
- Hoogelaandsters-0107 1
28
- Hoogelaandsters-0111 1
29
- Hoogelaandsters-0115 1
30
- Hoogelaandsters-0119 1
31
- Hoogelaandsters-0123 1
32
- Hoogelaandsters-0127 1
33
- Hoogelaandsters-0131 1
34
- Hoogelaandsters-0136 1
35
- Hoogelaandsters-0140 1
36
- Hoogelaandsters-0144 1
37
- Hoogelaandsters-0148 1
38
- Hoogelaandsters-0152 1
39
- Hoogelaandsters-0156 1
40
- Hoogelaandsters-0160 1
41
- Hoogelaandsters-0003 1
42
- Hoogelaandsters-0007 1
43
- Hoogelaandsters-0011 1
44
- Hoogelaandsters-0015 1
45
- Hoogelaandsters-0019 1
46
- Hoogelaandsters-0023 1
47
- Hoogelaandsters-0027 1
48
- Hoogelaandsters-0031 1
49
- Hoogelaandsters-0035 1
50
- Hoogelaandsters-0039 1
51
- Hoogelaandsters-0043 1
52
- Hoogelaandsters-0047 1
53
- Hoogelaandsters-0051 1
54
- Hoogelaandsters-0055 1
55
- Hoogelaandsters-0059 1
56
- Hoogelaandsters-0063 1
57
- Hoogelaandsters-0067 1
58
- Hoogelaandsters-0072 1
59
- Hoogelaandsters-0076 1
60
- Hoogelaandsters-0080 1
61
- Hoogelaandsters-0084 1
62
- Hoogelaandsters-0088 1
63
- Hoogelaandsters-0092 1
64
- Hoogelaandsters-0096 1
65
- Hoogelaandsters-0100 1
66
- Hoogelaandsters-0104 1
67
- Hoogelaandsters-0108 1
68
- Hoogelaandsters-0112 1
69
- Hoogelaandsters-0116 1
70
- Hoogelaandsters-0120 1
71
- Hoogelaandsters-0124 1
72
- Hoogelaandsters-0128 1
73
- Hoogelaandsters-0132 1
74
- Hoogelaandsters-0137 1
75
- Hoogelaandsters-0141 1
76
- Hoogelaandsters-0145 1
77
- Hoogelaandsters-0149 1
78
- Hoogelaandsters-0153 1
79
- Hoogelaandsters-0157 1
80
- Hoogelaandsters-0161 1
81
- Hoogelaandsters-0004 1
82
- Hoogelaandsters-0008 1
83
- Hoogelaandsters-0012 1
84
- Hoogelaandsters-0016 1
85
- Hoogelaandsters-0020 1
86
- Hoogelaandsters-0024 1
87
- Hoogelaandsters-0028 1
88
- Hoogelaandsters-0032 1
89
- Hoogelaandsters-0036 1
90
- Hoogelaandsters-0040 1
91
- Hoogelaandsters-0044 1
92
- Hoogelaandsters-0048 1
93
- Hoogelaandsters-0052 1
94
- Hoogelaandsters-0056 1
95
- Hoogelaandsters-0060 1
96
- Hoogelaandsters-0064 1
97
- Hoogelaandsters-0069 1
98
- Hoogelaandsters-0073 1
99
- Hoogelaandsters-0077 1
100
- Hoogelaandsters-0081 1
101
- Hoogelaandsters-0085 1
102
- Hoogelaandsters-0089 1
103
- Hoogelaandsters-0093 1
104
- Hoogelaandsters-0097 1
105
- Hoogelaandsters-0101 1
106
- Hoogelaandsters-0105 1
107
- Hoogelaandsters-0109 1
108
- Hoogelaandsters-0113 1
109
- Hoogelaandsters-0117 1
110
- Hoogelaandsters-0121 1
111
- Hoogelaandsters-0125 1
112
- Hoogelaandsters-0129 1
113
- Hoogelaandsters-0133 1
114
- Hoogelaandsters-0138 1
115
- Hoogelaandsters-0142 1
116
- Hoogelaandsters-0146 1
117
- Hoogelaandsters-0150 1
118
- Hoogelaandsters-0154 1
119
- Hoogelaandsters-0158 1
120
- Hoogelaandsters-0162 1
121
- Hoogelaandsters-0005 1
122
- Hoogelaandsters-0009 1
123
- Hoogelaandsters-0013 1
124
- Hoogelaandsters-0017 1
125
- Hoogelaandsters-0021 1
126
- Hoogelaandsters-0025 1
127
- Hoogelaandsters-0029 1
128
- Hoogelaandsters-0033 1
129
- Hoogelaandsters-0037 1
130
- Hoogelaandsters-0041 1
131
- Hoogelaandsters-0045 1
132
- Hoogelaandsters-0049 1
133
- Hoogelaandsters-0053 1
134
- Hoogelaandsters-0057 1
135
- Hoogelaandsters-0061 1
136
- Hoogelaandsters-0065 1
137
- Hoogelaandsters-0070 1
138
- Hoogelaandsters-0074 1
139
- Hoogelaandsters-0078 1
140
- Hoogelaandsters-0082 1
141
- Hoogelaandsters-0086 1
142
- Hoogelaandsters-0090 1
143
- Hoogelaandsters-0094 1
144
- Hoogelaandsters-0098 1
145
- Hoogelaandsters-0102 1
146
- Hoogelaandsters-0106 1
147
- Hoogelaandsters-0110 1
148
- Hoogelaandsters-0114 1
149
- Hoogelaandsters-0118 1
150
- Hoogelaandsters-0122 1
151
- Hoogelaandsters-0126 1
152
- Hoogelaandsters-0130 1
153
- Hoogelaandsters-0134 1
154
- Hoogelaandsters-0139 1
155
- Hoogelaandsters-0143 1
156
- Hoogelaandsters-0147 1
157
- Hoogelaandsters-0151 1
158
- Hoogelaandsters-0155 1
159
- Hoogelaandsters-0159 1
160
- Hoogelaandsters-0163 1
161
- Hoogelaandsters-0164 1
162
- Hoogelaandsters-0168 1
163
- Hoogelaandsters-0172 1
164
- Hoogelaandsters-0176 1
165
- Hoogelaandsters-0180 1
166
- Hoogelaandsters-0184 1
167
- Hoogelaandsters-0188 1
168
- Hoogelaandsters-0192 1
169
- Hoogelaandsters-0196 1
170
- Hoogelaandsters-0200 1
171
- Hoogelaandsters-0205 1
172
- Hoogelaandsters-0209 1
173
- Hoogelaandsters-0213 1
174
- Hoogelaandsters-0217 1
175
- Hoogelaandsters-0221 1
176
- Hoogelaandsters-0225 1
177
- Hoogelaandsters-0229 1
178
- Hoogelaandsters-0233 1
179
- Hoogelaandsters-0237 1
180
- Hoogelaandsters-0241 1
181
- Hoogelaandsters-0245 1
182
- Hoogelaandsters-0249 1
183
- Hoogelaandsters-0253 1
184
- Hoogelaandsters-0165 1
185
- Hoogelaandsters-0169 1
186
- Hoogelaandsters-0173 1
187
- Hoogelaandsters-0177 1
188
- Hoogelaandsters-0181 1
189
- Hoogelaandsters-0185 1
190
- Hoogelaandsters-0189 1
191
- Hoogelaandsters-0193 1
192
- Hoogelaandsters-0197 1
193
- Hoogelaandsters-0201 1
194
- Hoogelaandsters-0206 1
195
- Hoogelaandsters-0210 1
196
- Hoogelaandsters-0214 1
197
- Hoogelaandsters-0218 1
198
- Hoogelaandsters-0222 1
199
- Hoogelaandsters-0226 1
200
- Hoogelaandsters-0230 1
201
- Hoogelaandsters-0234 1
202
- Hoogelaandsters-0238 1
203
- Hoogelaandsters-0242 1
204
- Hoogelaandsters-0246 1
205
- Hoogelaandsters-0250 1
206
- Hoogelaandsters-0166 1
207
- Hoogelaandsters-0170 1
208
- Hoogelaandsters-0174 1
209
- Hoogelaandsters-0178 1
210
- Hoogelaandsters-0182 1
211
- Hoogelaandsters-0186 1
212
- Hoogelaandsters-0190 1
213
- Hoogelaandsters-0194 1
214
- Hoogelaandsters-0198 1
215
- Hoogelaandsters-0203 1
216
- Hoogelaandsters-0207 1
217
- Hoogelaandsters-0211 1
218
- Hoogelaandsters-0215 1
219
- Hoogelaandsters-0219 1
220
- Hoogelaandsters-0223 1
221
- Hoogelaandsters-0227 1
222
- Hoogelaandsters-0231 1
223
- Hoogelaandsters-0235 1
224
- Hoogelaandsters-0239 1
225
- Hoogelaandsters-0243 1
226
- Hoogelaandsters-0247 1
227
- Hoogelaandsters-0251 1
228
- Hoogelaandsters-0167 1
229
- Hoogelaandsters-0171 1
230
- Hoogelaandsters-0175 1
231
- Hoogelaandsters-0179 1
232
- Hoogelaandsters-0183 1
233
- Hoogelaandsters-0187 1
234
- Hoogelaandsters-0191 1
235
- Hoogelaandsters-0195 1
236
- Hoogelaandsters-0199 1
237
- Hoogelaandsters-0204 1
238
- Hoogelaandsters-0208 1
239
- Hoogelaandsters-0212 1
240
- Hoogelaandsters-0216 1
241
- Hoogelaandsters-0220 1
242
- Hoogelaandsters-0224 1
243
- Hoogelaandsters-0228 1
244
- Hoogelaandsters-0232 1
245
- Hoogelaandsters-0236 1
246
- Hoogelaandsters-0240 1
247
- Hoogelaandsters-0244 1
248
- Hoogelaandsters-0248 1
249
- Hoogelaandsters-0252 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1/train/speech_shape DELETED
@@ -1,249 +0,0 @@
1
- Hoogelaandsters-0002 148618
2
- Hoogelaandsters-0006 71366
3
- Hoogelaandsters-0010 40465
4
- Hoogelaandsters-0014 141996
5
- Hoogelaandsters-0018 123603
6
- Hoogelaandsters-0022 80930
7
- Hoogelaandsters-0026 71366
8
- Hoogelaandsters-0030 82402
9
- Hoogelaandsters-0034 85345
10
- Hoogelaandsters-0038 76516
11
- Hoogelaandsters-0042 179519
12
- Hoogelaandsters-0046 375959
13
- Hoogelaandsters-0050 44880
14
- Hoogelaandsters-0054 61802
15
- Hoogelaandsters-0058 75780
16
- Hoogelaandsters-0062 153768
17
- Hoogelaandsters-0066 83138
18
- Hoogelaandsters-0071 129489
19
- Hoogelaandsters-0075 130960
20
- Hoogelaandsters-0079 153768
21
- Hoogelaandsters-0083 135375
22
- Hoogelaandsters-0087 83874
23
- Hoogelaandsters-0091 87552
24
- Hoogelaandsters-0095 80930
25
- Hoogelaandsters-0099 90495
26
- Hoogelaandsters-0103 133168
27
- Hoogelaandsters-0107 38258
28
- Hoogelaandsters-0111 48558
29
- Hoogelaandsters-0115 100060
30
- Hoogelaandsters-0119 49294
31
- Hoogelaandsters-0123 66216
32
- Hoogelaandsters-0127 186141
33
- Hoogelaandsters-0131 37522
34
- Hoogelaandsters-0136 139054
35
- Hoogelaandsters-0140 40465
36
- Hoogelaandsters-0144 64744
37
- Hoogelaandsters-0148 41936
38
- Hoogelaandsters-0152 99324
39
- Hoogelaandsters-0156 70630
40
- Hoogelaandsters-0160 92702
41
- Hoogelaandsters-0003 124339
42
- Hoogelaandsters-0007 58858
43
- Hoogelaandsters-0011 187612
44
- Hoogelaandsters-0015 52972
45
- Hoogelaandsters-0019 56652
46
- Hoogelaandsters-0023 36050
47
- Hoogelaandsters-0027 90495
48
- Hoogelaandsters-0031 25752
49
- Hoogelaandsters-0035 91966
50
- Hoogelaandsters-0039 144204
51
- Hoogelaandsters-0043 75780
52
- Hoogelaandsters-0047 168483
53
- Hoogelaandsters-0051 153033
54
- Hoogelaandsters-0055 57387
55
- Hoogelaandsters-0059 104474
56
- Hoogelaandsters-0063 113303
57
- Hoogelaandsters-0067 47822
58
- Hoogelaandsters-0072 122868
59
- Hoogelaandsters-0076 54444
60
- Hoogelaandsters-0080 75044
61
- Hoogelaandsters-0084 103002
62
- Hoogelaandsters-0088 224399
63
- Hoogelaandsters-0092 85345
64
- Hoogelaandsters-0096 86816
65
- Hoogelaandsters-0100 122868
66
- Hoogelaandsters-0104 82402
67
- Hoogelaandsters-0108 82402
68
- Hoogelaandsters-0112 77252
69
- Hoogelaandsters-0116 139790
70
- Hoogelaandsters-0120 111096
71
- Hoogelaandsters-0124 63272
72
- Hoogelaandsters-0128 86816
73
- Hoogelaandsters-0132 42672
74
- Hoogelaandsters-0137 101531
75
- Hoogelaandsters-0141 128754
76
- Hoogelaandsters-0145 96381
77
- Hoogelaandsters-0149 151561
78
- Hoogelaandsters-0153 100060
79
- Hoogelaandsters-0157 114774
80
- Hoogelaandsters-0161 106682
81
- Hoogelaandsters-0004 96381
82
- Hoogelaandsters-0008 49294
83
- Hoogelaandsters-0012 75780
84
- Hoogelaandsters-0016 104474
85
- Hoogelaandsters-0020 55916
86
- Hoogelaandsters-0024 77988
87
- Hoogelaandsters-0028 128018
88
- Hoogelaandsters-0032 33844
89
- Hoogelaandsters-0036 94910
90
- Hoogelaandsters-0040 58858
91
- Hoogelaandsters-0044 72102
92
- Hoogelaandsters-0048 134639
93
- Hoogelaandsters-0052 95646
94
- Hoogelaandsters-0056 37522
95
- Hoogelaandsters-0060 55180
96
- Hoogelaandsters-0064 57387
97
- Hoogelaandsters-0069 126546
98
- Hoogelaandsters-0073 210420
99
- Hoogelaandsters-0077 86816
100
- Hoogelaandsters-0081 65480
101
- Hoogelaandsters-0085 122868
102
- Hoogelaandsters-0089 84610
103
- Hoogelaandsters-0093 148618
104
- Hoogelaandsters-0097 75044
105
- Hoogelaandsters-0101 44144
106
- Hoogelaandsters-0105 43408
107
- Hoogelaandsters-0109 267071
108
- Hoogelaandsters-0113 141996
109
- Hoogelaandsters-0117 44144
110
- Hoogelaandsters-0121 94910
111
- Hoogelaandsters-0125 63272
112
- Hoogelaandsters-0129 38994
113
- Hoogelaandsters-0133 61066
114
- Hoogelaandsters-0138 58122
115
- Hoogelaandsters-0142 115510
116
- Hoogelaandsters-0146 63272
117
- Hoogelaandsters-0150 52972
118
- Hoogelaandsters-0154 130225
119
- Hoogelaandsters-0158 119189
120
- Hoogelaandsters-0162 30164
121
- Hoogelaandsters-0005 44144
122
- Hoogelaandsters-0009 189819
123
- Hoogelaandsters-0013 122868
124
- Hoogelaandsters-0017 72838
125
- Hoogelaandsters-0021 86080
126
- Hoogelaandsters-0025 217042
127
- Hoogelaandsters-0029 63272
128
- Hoogelaandsters-0033 114774
129
- Hoogelaandsters-0037 138318
130
- Hoogelaandsters-0041 148618
131
- Hoogelaandsters-0045 126546
132
- Hoogelaandsters-0049 57387
133
- Hoogelaandsters-0053 66952
134
- Hoogelaandsters-0057 33844
135
- Hoogelaandsters-0061 47822
136
- Hoogelaandsters-0065 66216
137
- Hoogelaandsters-0070 186877
138
- Hoogelaandsters-0074 66952
139
- Hoogelaandsters-0078 83874
140
- Hoogelaandsters-0082 90495
141
- Hoogelaandsters-0086 167012
142
- Hoogelaandsters-0090 43408
143
- Hoogelaandsters-0094 44880
144
- Hoogelaandsters-0098 105946
145
- Hoogelaandsters-0102 143468
146
- Hoogelaandsters-0106 111096
147
- Hoogelaandsters-0110 97852
148
- Hoogelaandsters-0114 108152
149
- Hoogelaandsters-0118 100796
150
- Hoogelaandsters-0122 77252
151
- Hoogelaandsters-0126 62537
152
- Hoogelaandsters-0130 100796
153
- Hoogelaandsters-0134 67688
154
- Hoogelaandsters-0139 111096
155
- Hoogelaandsters-0143 97116
156
- Hoogelaandsters-0147 54444
157
- Hoogelaandsters-0151 58858
158
- Hoogelaandsters-0155 48558
159
- Hoogelaandsters-0159 102267
160
- Hoogelaandsters-0163 48558
161
- Hoogelaandsters-0164 41936
162
- Hoogelaandsters-0168 121396
163
- Hoogelaandsters-0172 87552
164
- Hoogelaandsters-0176 57387
165
- Hoogelaandsters-0180 102267
166
- Hoogelaandsters-0184 46350
167
- Hoogelaandsters-0188 75044
168
- Hoogelaandsters-0192 38994
169
- Hoogelaandsters-0196 77988
170
- Hoogelaandsters-0200 114038
171
- Hoogelaandsters-0205 51501
172
- Hoogelaandsters-0209 95646
173
- Hoogelaandsters-0213 176576
174
- Hoogelaandsters-0217 44880
175
- Hoogelaandsters-0221 64744
176
- Hoogelaandsters-0225 94174
177
- Hoogelaandsters-0229 102267
178
- Hoogelaandsters-0233 209684
179
- Hoogelaandsters-0237 186877
180
- Hoogelaandsters-0241 156711
181
- Hoogelaandsters-0245 252357
182
- Hoogelaandsters-0249 59594
183
- Hoogelaandsters-0253 60330
184
- Hoogelaandsters-0165 86080
185
- Hoogelaandsters-0169 62537
186
- Hoogelaandsters-0173 64008
187
- Hoogelaandsters-0177 36788
188
- Hoogelaandsters-0181 70630
189
- Hoogelaandsters-0185 85345
190
- Hoogelaandsters-0189 42672
191
- Hoogelaandsters-0193 58122
192
- Hoogelaandsters-0197 91966
193
- Hoogelaandsters-0201 116982
194
- Hoogelaandsters-0206 28693
195
- Hoogelaandsters-0210 86816
196
- Hoogelaandsters-0214 119189
197
- Hoogelaandsters-0218 41936
198
- Hoogelaandsters-0222 88288
199
- Hoogelaandsters-0226 112567
200
- Hoogelaandsters-0230 75044
201
- Hoogelaandsters-0234 106682
202
- Hoogelaandsters-0238 111832
203
- Hoogelaandsters-0242 111096
204
- Hoogelaandsters-0246 94910
205
- Hoogelaandsters-0250 75780
206
- Hoogelaandsters-0166 86080
207
- Hoogelaandsters-0170 38994
208
- Hoogelaandsters-0174 33844
209
- Hoogelaandsters-0178 42672
210
- Hoogelaandsters-0182 33844
211
- Hoogelaandsters-0186 29428
212
- Hoogelaandsters-0190 69158
213
- Hoogelaandsters-0194 36050
214
- Hoogelaandsters-0198 180991
215
- Hoogelaandsters-0203 68423
216
- Hoogelaandsters-0207 83874
217
- Hoogelaandsters-0211 147147
218
- Hoogelaandsters-0215 54444
219
- Hoogelaandsters-0219 243528
220
- Hoogelaandsters-0223 105946
221
- Hoogelaandsters-0227 64744
222
- Hoogelaandsters-0231 142732
223
- Hoogelaandsters-0235 97852
224
- Hoogelaandsters-0239 73574
225
- Hoogelaandsters-0243 160390
226
- Hoogelaandsters-0247 63272
227
- Hoogelaandsters-0251 55916
228
- Hoogelaandsters-0167 83138
229
- Hoogelaandsters-0171 75044
230
- Hoogelaandsters-0175 147882
231
- Hoogelaandsters-0179 105946
232
- Hoogelaandsters-0183 40465
233
- Hoogelaandsters-0187 54444
234
- Hoogelaandsters-0191 158183
235
- Hoogelaandsters-0195 78724
236
- Hoogelaandsters-0199 64008
237
- Hoogelaandsters-0204 143468
238
- Hoogelaandsters-0208 47822
239
- Hoogelaandsters-0212 45615
240
- Hoogelaandsters-0216 150826
241
- Hoogelaandsters-0220 125810
242
- Hoogelaandsters-0224 114774
243
- Hoogelaandsters-0228 107417
244
- Hoogelaandsters-0232 153768
245
- Hoogelaandsters-0236 139054
246
- Hoogelaandsters-0240 149354
247
- Hoogelaandsters-0244 83874
248
- Hoogelaandsters-0248 69158
249
- Hoogelaandsters-0252 112567
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1/train/stats_keys DELETED
@@ -1,2 +0,0 @@
1
- feats
2
- feats_lengths
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1/train/text_shape DELETED
@@ -1,249 +0,0 @@
1
- Hoogelaandsters-0002 64
2
- Hoogelaandsters-0006 54
3
- Hoogelaandsters-0010 29
4
- Hoogelaandsters-0014 97
5
- Hoogelaandsters-0018 94
6
- Hoogelaandsters-0022 62
7
- Hoogelaandsters-0026 54
8
- Hoogelaandsters-0030 51
9
- Hoogelaandsters-0034 52
10
- Hoogelaandsters-0038 50
11
- Hoogelaandsters-0042 114
12
- Hoogelaandsters-0046 241
13
- Hoogelaandsters-0050 32
14
- Hoogelaandsters-0054 42
15
- Hoogelaandsters-0058 48
16
- Hoogelaandsters-0062 118
17
- Hoogelaandsters-0066 72
18
- Hoogelaandsters-0071 95
19
- Hoogelaandsters-0075 108
20
- Hoogelaandsters-0079 110
21
- Hoogelaandsters-0083 110
22
- Hoogelaandsters-0087 67
23
- Hoogelaandsters-0091 60
24
- Hoogelaandsters-0095 70
25
- Hoogelaandsters-0099 61
26
- Hoogelaandsters-0103 85
27
- Hoogelaandsters-0107 30
28
- Hoogelaandsters-0111 26
29
- Hoogelaandsters-0115 61
30
- Hoogelaandsters-0119 44
31
- Hoogelaandsters-0123 54
32
- Hoogelaandsters-0127 138
33
- Hoogelaandsters-0131 31
34
- Hoogelaandsters-0136 91
35
- Hoogelaandsters-0140 28
36
- Hoogelaandsters-0144 47
37
- Hoogelaandsters-0148 26
38
- Hoogelaandsters-0152 84
39
- Hoogelaandsters-0156 46
40
- Hoogelaandsters-0160 66
41
- Hoogelaandsters-0003 96
42
- Hoogelaandsters-0007 38
43
- Hoogelaandsters-0011 113
44
- Hoogelaandsters-0015 50
45
- Hoogelaandsters-0019 43
46
- Hoogelaandsters-0023 30
47
- Hoogelaandsters-0027 69
48
- Hoogelaandsters-0031 14
49
- Hoogelaandsters-0035 60
50
- Hoogelaandsters-0039 98
51
- Hoogelaandsters-0043 52
52
- Hoogelaandsters-0047 113
53
- Hoogelaandsters-0051 92
54
- Hoogelaandsters-0055 37
55
- Hoogelaandsters-0059 82
56
- Hoogelaandsters-0063 78
57
- Hoogelaandsters-0067 30
58
- Hoogelaandsters-0072 77
59
- Hoogelaandsters-0076 50
60
- Hoogelaandsters-0080 49
61
- Hoogelaandsters-0084 86
62
- Hoogelaandsters-0088 162
63
- Hoogelaandsters-0092 60
64
- Hoogelaandsters-0096 65
65
- Hoogelaandsters-0100 83
66
- Hoogelaandsters-0104 55
67
- Hoogelaandsters-0108 66
68
- Hoogelaandsters-0112 42
69
- Hoogelaandsters-0116 97
70
- Hoogelaandsters-0120 79
71
- Hoogelaandsters-0124 50
72
- Hoogelaandsters-0128 49
73
- Hoogelaandsters-0132 26
74
- Hoogelaandsters-0137 68
75
- Hoogelaandsters-0141 89
76
- Hoogelaandsters-0145 70
77
- Hoogelaandsters-0149 92
78
- Hoogelaandsters-0153 60
79
- Hoogelaandsters-0157 74
80
- Hoogelaandsters-0161 81
81
- Hoogelaandsters-0004 69
82
- Hoogelaandsters-0008 38
83
- Hoogelaandsters-0012 55
84
- Hoogelaandsters-0016 70
85
- Hoogelaandsters-0020 28
86
- Hoogelaandsters-0024 64
87
- Hoogelaandsters-0028 87
88
- Hoogelaandsters-0032 18
89
- Hoogelaandsters-0036 72
90
- Hoogelaandsters-0040 30
91
- Hoogelaandsters-0044 44
92
- Hoogelaandsters-0048 99
93
- Hoogelaandsters-0052 75
94
- Hoogelaandsters-0056 18
95
- Hoogelaandsters-0060 38
96
- Hoogelaandsters-0064 41
97
- Hoogelaandsters-0069 86
98
- Hoogelaandsters-0073 149
99
- Hoogelaandsters-0077 62
100
- Hoogelaandsters-0081 49
101
- Hoogelaandsters-0085 92
102
- Hoogelaandsters-0089 63
103
- Hoogelaandsters-0093 104
104
- Hoogelaandsters-0097 50
105
- Hoogelaandsters-0101 36
106
- Hoogelaandsters-0105 33
107
- Hoogelaandsters-0109 166
108
- Hoogelaandsters-0113 70
109
- Hoogelaandsters-0117 31
110
- Hoogelaandsters-0121 62
111
- Hoogelaandsters-0125 41
112
- Hoogelaandsters-0129 26
113
- Hoogelaandsters-0133 41
114
- Hoogelaandsters-0138 35
115
- Hoogelaandsters-0142 86
116
- Hoogelaandsters-0146 50
117
- Hoogelaandsters-0150 42
118
- Hoogelaandsters-0154 103
119
- Hoogelaandsters-0158 74
120
- Hoogelaandsters-0162 25
121
- Hoogelaandsters-0005 37
122
- Hoogelaandsters-0009 121
123
- Hoogelaandsters-0013 78
124
- Hoogelaandsters-0017 56
125
- Hoogelaandsters-0021 67
126
- Hoogelaandsters-0025 127
127
- Hoogelaandsters-0029 51
128
- Hoogelaandsters-0033 82
129
- Hoogelaandsters-0037 100
130
- Hoogelaandsters-0041 87
131
- Hoogelaandsters-0045 87
132
- Hoogelaandsters-0049 44
133
- Hoogelaandsters-0053 42
134
- Hoogelaandsters-0057 30
135
- Hoogelaandsters-0061 36
136
- Hoogelaandsters-0065 51
137
- Hoogelaandsters-0070 130
138
- Hoogelaandsters-0074 51
139
- Hoogelaandsters-0078 62
140
- Hoogelaandsters-0082 74
141
- Hoogelaandsters-0086 119
142
- Hoogelaandsters-0090 33
143
- Hoogelaandsters-0094 32
144
- Hoogelaandsters-0098 82
145
- Hoogelaandsters-0102 83
146
- Hoogelaandsters-0106 76
147
- Hoogelaandsters-0110 54
148
- Hoogelaandsters-0114 63
149
- Hoogelaandsters-0118 61
150
- Hoogelaandsters-0122 47
151
- Hoogelaandsters-0126 42
152
- Hoogelaandsters-0130 71
153
- Hoogelaandsters-0134 45
154
- Hoogelaandsters-0139 94
155
- Hoogelaandsters-0143 67
156
- Hoogelaandsters-0147 30
157
- Hoogelaandsters-0151 38
158
- Hoogelaandsters-0155 36
159
- Hoogelaandsters-0159 79
160
- Hoogelaandsters-0163 39
161
- Hoogelaandsters-0164 32
162
- Hoogelaandsters-0168 86
163
- Hoogelaandsters-0172 59
164
- Hoogelaandsters-0176 38
165
- Hoogelaandsters-0180 59
166
- Hoogelaandsters-0184 30
167
- Hoogelaandsters-0188 54
168
- Hoogelaandsters-0192 38
169
- Hoogelaandsters-0196 58
170
- Hoogelaandsters-0200 73
171
- Hoogelaandsters-0205 42
172
- Hoogelaandsters-0209 63
173
- Hoogelaandsters-0213 119
174
- Hoogelaandsters-0217 35
175
- Hoogelaandsters-0221 36
176
- Hoogelaandsters-0225 65
177
- Hoogelaandsters-0229 75
178
- Hoogelaandsters-0233 126
179
- Hoogelaandsters-0237 126
180
- Hoogelaandsters-0241 107
181
- Hoogelaandsters-0245 168
182
- Hoogelaandsters-0249 45
183
- Hoogelaandsters-0253 41
184
- Hoogelaandsters-0165 68
185
- Hoogelaandsters-0169 35
186
- Hoogelaandsters-0173 44
187
- Hoogelaandsters-0177 28
188
- Hoogelaandsters-0181 39
189
- Hoogelaandsters-0185 58
190
- Hoogelaandsters-0189 27
191
- Hoogelaandsters-0193 35
192
- Hoogelaandsters-0197 61
193
- Hoogelaandsters-0201 76
194
- Hoogelaandsters-0206 23
195
- Hoogelaandsters-0210 62
196
- Hoogelaandsters-0214 89
197
- Hoogelaandsters-0218 26
198
- Hoogelaandsters-0222 63
199
- Hoogelaandsters-0226 76
200
- Hoogelaandsters-0230 59
201
- Hoogelaandsters-0234 72
202
- Hoogelaandsters-0238 73
203
- Hoogelaandsters-0242 79
204
- Hoogelaandsters-0246 54
205
- Hoogelaandsters-0250 63
206
- Hoogelaandsters-0166 64
207
- Hoogelaandsters-0170 37
208
- Hoogelaandsters-0174 29
209
- Hoogelaandsters-0178 30
210
- Hoogelaandsters-0182 37
211
- Hoogelaandsters-0186 29
212
- Hoogelaandsters-0190 51
213
- Hoogelaandsters-0194 30
214
- Hoogelaandsters-0198 119
215
- Hoogelaandsters-0203 49
216
- Hoogelaandsters-0207 57
217
- Hoogelaandsters-0211 101
218
- Hoogelaandsters-0215 42
219
- Hoogelaandsters-0219 166
220
- Hoogelaandsters-0223 71
221
- Hoogelaandsters-0227 40
222
- Hoogelaandsters-0231 92
223
- Hoogelaandsters-0235 66
224
- Hoogelaandsters-0239 54
225
- Hoogelaandsters-0243 94
226
- Hoogelaandsters-0247 41
227
- Hoogelaandsters-0251 43
228
- Hoogelaandsters-0167 66
229
- Hoogelaandsters-0171 54
230
- Hoogelaandsters-0175 113
231
- Hoogelaandsters-0179 78
232
- Hoogelaandsters-0183 39
233
- Hoogelaandsters-0187 44
234
- Hoogelaandsters-0191 93
235
- Hoogelaandsters-0195 58
236
- Hoogelaandsters-0199 51
237
- Hoogelaandsters-0204 111
238
- Hoogelaandsters-0208 35
239
- Hoogelaandsters-0212 27
240
- Hoogelaandsters-0216 110
241
- Hoogelaandsters-0220 73
242
- Hoogelaandsters-0224 47
243
- Hoogelaandsters-0228 66
244
- Hoogelaandsters-0232 101
245
- Hoogelaandsters-0236 94
246
- Hoogelaandsters-0240 104
247
- Hoogelaandsters-0244 50
248
- Hoogelaandsters-0248 44
249
- Hoogelaandsters-0252 85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1/valid/batch_keys DELETED
@@ -1,3 +0,0 @@
1
- text
2
- speech
3
- sids
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1/valid/feats_lengths_stats.npz DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:92b775aaa5d2948c527ad29e192999a8388c2e6fd81fad2475ee7da577f3594a
3
- size 778
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1/valid/feats_stats.npz DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4abdc1055a5330f17559bc4667c89f68c0ab191e3debc39c5735481cdf9d19ef
3
- size 1402
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1/valid/sids_shape DELETED
@@ -1,5 +0,0 @@
1
- Hoogelaandsters-0001 1
2
- Hoogelaandsters-0269 1
3
- Hoogelaandsters-0068 1
4
- Hoogelaandsters-0135 1
5
- Hoogelaandsters-0202 1
 
 
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1/valid/speech_shape DELETED
@@ -1,5 +0,0 @@
1
- Hoogelaandsters-0001 195705
2
- Hoogelaandsters-0269 119189
3
- Hoogelaandsters-0068 136111
4
- Hoogelaandsters-0135 87552
5
- Hoogelaandsters-0202 109624
 
 
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1/valid/stats_keys DELETED
@@ -1,2 +0,0 @@
1
- feats
2
- feats_lengths
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.1/valid/text_shape DELETED
@@ -1,5 +0,0 @@
1
- Hoogelaandsters-0001 111
2
- Hoogelaandsters-0269 86
3
- Hoogelaandsters-0068 91
4
- Hoogelaandsters-0135 62
5
- Hoogelaandsters-0202 75
 
 
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10.log DELETED
@@ -1,1152 +0,0 @@
1
- # python3 -m espnet2.bin.gan_tts_train --collect_stats true --write_collected_feats false --use_preprocessor true --token_type char --token_list dump/token_list/char/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/train_nodev/text,text,text --train_data_path_and_name_and_type dump/raw/train_nodev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/train_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/train_dev/wav.scp,speech,sound --train_shape_file exp-vits-lr-3e-4/tts_stats_raw_char/logdir/train.10.scp --valid_shape_file exp-vits-lr-3e-4/tts_stats_raw_char/logdir/valid.10.scp --output_dir exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10 --config conf/train_vits.yaml --feats_extract fbank --feats_extract_conf n_fft=1024 --feats_extract_conf hop_length=256 --feats_extract_conf win_length=null --feats_extract_conf fs=22050 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=22050 --pitch_extract_conf n_fft=1024 --pitch_extract_conf hop_length=256 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=22050 --energy_extract_conf n_fft=1024 --energy_extract_conf hop_length=256 --energy_extract_conf win_length=null --train_data_path_and_name_and_type dump/raw/train_nodev/utt2sid,sids,text_int --valid_data_path_and_name_and_type dump/raw/train_dev/utt2sid,sids,text_int --use_wandb true --wandb_project GROTTS --wandb_name VITS_lr_3.0e-4 --init_param downloads/espnet/kan-bayashi_ljspeech_vits/exp/tts_train_vits_raw_phn_tacotron_g2p_en_no_space/train.total_count.ave_10best.pth:tts:tts:tts.generator.text_encoder,tts.generator.posterior_encoder.input_conv --batch_size 40 --batch_bins 10000000
2
- # Started at Fri Dec 1 15:58:34 UTC 2023
3
- #
4
- /data2/p280965/tts/espnet/tools/venv/bin/python3 /data2/p280965/tts/espnet/espnet2/bin/gan_tts_train.py --collect_stats true --write_collected_feats false --use_preprocessor true --token_type char --token_list dump/token_list/char/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/train_nodev/text,text,text --train_data_path_and_name_and_type dump/raw/train_nodev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/train_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/train_dev/wav.scp,speech,sound --train_shape_file exp-vits-lr-3e-4/tts_stats_raw_char/logdir/train.10.scp --valid_shape_file exp-vits-lr-3e-4/tts_stats_raw_char/logdir/valid.10.scp --output_dir exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10 --config conf/train_vits.yaml --feats_extract fbank --feats_extract_conf n_fft=1024 --feats_extract_conf hop_length=256 --feats_extract_conf win_length=null --feats_extract_conf fs=22050 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=22050 --pitch_extract_conf n_fft=1024 --pitch_extract_conf hop_length=256 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=22050 --energy_extract_conf n_fft=1024 --energy_extract_conf hop_length=256 --energy_extract_conf win_length=null --train_data_path_and_name_and_type dump/raw/train_nodev/utt2sid,sids,text_int --valid_data_path_and_name_and_type dump/raw/train_dev/utt2sid,sids,text_int --use_wandb true --wandb_project GROTTS --wandb_name VITS_lr_3.0e-4 --init_param downloads/espnet/kan-bayashi_ljspeech_vits/exp/tts_train_vits_raw_phn_tacotron_g2p_en_no_space/train.total_count.ave_10best.pth:tts:tts:tts.generator.text_encoder,tts.generator.posterior_encoder.input_conv --batch_size 40 --batch_bins 10000000
5
- [wieling-3-a100] 2023-12-01 15:58:40,202 (gan_tts:293) INFO: Vocabulary size: 46
6
- [wieling-3-a100] 2023-12-01 15:58:40,315 (encoder:174) INFO: encoder self-attention layer type = relative self-attention
7
- /data2/p280965/tts/espnet/tools/venv/lib/python3.9/site-packages/torch/nn/utils/weight_norm.py:30: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.
8
- warnings.warn("torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.")
9
- /data2/p280965/tts/espnet/espnet2/gan_tts/vits/monotonic_align/__init__.py:19: UserWarning: Cython version is not available. Fallback to 'EXPERIMETAL' numba version. If you want to use the cython version, please build it as follows: `cd espnet2/gan_tts/vits/monotonic_align; python setup.py build_ext --inplace`
10
- warnings.warn(
11
- [wieling-3-a100] 2023-12-01 15:58:41,727 (abs_task:1268) INFO: pytorch.version=2.1.0+cu121, cuda.available=True, cudnn.version=8902, cudnn.benchmark=False, cudnn.deterministic=False
12
- [wieling-3-a100] 2023-12-01 15:58:41,759 (abs_task:1269) INFO: Model structure:
13
- ESPnetGANTTSModel(
14
- (feats_extract): LogMelFbank(
15
- (stft): Stft(n_fft=1024, win_length=1024, hop_length=256, center=True, normalized=False, onesided=True)
16
- (logmel): LogMel(sr=22050, n_fft=1024, n_mels=80, fmin=80, fmax=7600, htk=False)
17
- )
18
- (tts): VITS(
19
- (generator): VITSGenerator(
20
- (text_encoder): TextEncoder(
21
- (emb): Embedding(46, 192)
22
- (encoder): Encoder(
23
- (embed): Sequential(
24
- (0): RelPositionalEncoding(
25
- (dropout): Dropout(p=0.0, inplace=False)
26
- )
27
- )
28
- (encoders): MultiSequential(
29
- (0): EncoderLayer(
30
- (self_attn): RelPositionMultiHeadedAttention(
31
- (linear_q): Linear(in_features=192, out_features=192, bias=True)
32
- (linear_k): Linear(in_features=192, out_features=192, bias=True)
33
- (linear_v): Linear(in_features=192, out_features=192, bias=True)
34
- (linear_out): Linear(in_features=192, out_features=192, bias=True)
35
- (dropout): Dropout(p=0.1, inplace=False)
36
- (linear_pos): Linear(in_features=192, out_features=192, bias=False)
37
- )
38
- (feed_forward): MultiLayeredConv1d(
39
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
40
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
41
- (dropout): Dropout(p=0.1, inplace=False)
42
- )
43
- (feed_forward_macaron): MultiLayeredConv1d(
44
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
45
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
46
- (dropout): Dropout(p=0.1, inplace=False)
47
- )
48
- (norm_ff): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
49
- (norm_mha): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
50
- (norm_ff_macaron): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
51
- (dropout): Dropout(p=0.1, inplace=False)
52
- )
53
- (1): EncoderLayer(
54
- (self_attn): RelPositionMultiHeadedAttention(
55
- (linear_q): Linear(in_features=192, out_features=192, bias=True)
56
- (linear_k): Linear(in_features=192, out_features=192, bias=True)
57
- (linear_v): Linear(in_features=192, out_features=192, bias=True)
58
- (linear_out): Linear(in_features=192, out_features=192, bias=True)
59
- (dropout): Dropout(p=0.1, inplace=False)
60
- (linear_pos): Linear(in_features=192, out_features=192, bias=False)
61
- )
62
- (feed_forward): MultiLayeredConv1d(
63
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
64
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
65
- (dropout): Dropout(p=0.1, inplace=False)
66
- )
67
- (feed_forward_macaron): MultiLayeredConv1d(
68
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
69
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
70
- (dropout): Dropout(p=0.1, inplace=False)
71
- )
72
- (norm_ff): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
73
- (norm_mha): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
74
- (norm_ff_macaron): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
75
- (dropout): Dropout(p=0.1, inplace=False)
76
- )
77
- (2): EncoderLayer(
78
- (self_attn): RelPositionMultiHeadedAttention(
79
- (linear_q): Linear(in_features=192, out_features=192, bias=True)
80
- (linear_k): Linear(in_features=192, out_features=192, bias=True)
81
- (linear_v): Linear(in_features=192, out_features=192, bias=True)
82
- (linear_out): Linear(in_features=192, out_features=192, bias=True)
83
- (dropout): Dropout(p=0.1, inplace=False)
84
- (linear_pos): Linear(in_features=192, out_features=192, bias=False)
85
- )
86
- (feed_forward): MultiLayeredConv1d(
87
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
88
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
89
- (dropout): Dropout(p=0.1, inplace=False)
90
- )
91
- (feed_forward_macaron): MultiLayeredConv1d(
92
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
93
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
94
- (dropout): Dropout(p=0.1, inplace=False)
95
- )
96
- (norm_ff): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
97
- (norm_mha): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
98
- (norm_ff_macaron): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
99
- (dropout): Dropout(p=0.1, inplace=False)
100
- )
101
- (3): EncoderLayer(
102
- (self_attn): RelPositionMultiHeadedAttention(
103
- (linear_q): Linear(in_features=192, out_features=192, bias=True)
104
- (linear_k): Linear(in_features=192, out_features=192, bias=True)
105
- (linear_v): Linear(in_features=192, out_features=192, bias=True)
106
- (linear_out): Linear(in_features=192, out_features=192, bias=True)
107
- (dropout): Dropout(p=0.1, inplace=False)
108
- (linear_pos): Linear(in_features=192, out_features=192, bias=False)
109
- )
110
- (feed_forward): MultiLayeredConv1d(
111
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
112
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
113
- (dropout): Dropout(p=0.1, inplace=False)
114
- )
115
- (feed_forward_macaron): MultiLayeredConv1d(
116
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
117
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
118
- (dropout): Dropout(p=0.1, inplace=False)
119
- )
120
- (norm_ff): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
121
- (norm_mha): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
122
- (norm_ff_macaron): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
123
- (dropout): Dropout(p=0.1, inplace=False)
124
- )
125
- (4): EncoderLayer(
126
- (self_attn): RelPositionMultiHeadedAttention(
127
- (linear_q): Linear(in_features=192, out_features=192, bias=True)
128
- (linear_k): Linear(in_features=192, out_features=192, bias=True)
129
- (linear_v): Linear(in_features=192, out_features=192, bias=True)
130
- (linear_out): Linear(in_features=192, out_features=192, bias=True)
131
- (dropout): Dropout(p=0.1, inplace=False)
132
- (linear_pos): Linear(in_features=192, out_features=192, bias=False)
133
- )
134
- (feed_forward): MultiLayeredConv1d(
135
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
136
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
137
- (dropout): Dropout(p=0.1, inplace=False)
138
- )
139
- (feed_forward_macaron): MultiLayeredConv1d(
140
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
141
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
142
- (dropout): Dropout(p=0.1, inplace=False)
143
- )
144
- (norm_ff): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
145
- (norm_mha): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
146
- (norm_ff_macaron): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
147
- (dropout): Dropout(p=0.1, inplace=False)
148
- )
149
- (5): EncoderLayer(
150
- (self_attn): RelPositionMultiHeadedAttention(
151
- (linear_q): Linear(in_features=192, out_features=192, bias=True)
152
- (linear_k): Linear(in_features=192, out_features=192, bias=True)
153
- (linear_v): Linear(in_features=192, out_features=192, bias=True)
154
- (linear_out): Linear(in_features=192, out_features=192, bias=True)
155
- (dropout): Dropout(p=0.1, inplace=False)
156
- (linear_pos): Linear(in_features=192, out_features=192, bias=False)
157
- )
158
- (feed_forward): MultiLayeredConv1d(
159
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
160
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
161
- (dropout): Dropout(p=0.1, inplace=False)
162
- )
163
- (feed_forward_macaron): MultiLayeredConv1d(
164
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
165
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
166
- (dropout): Dropout(p=0.1, inplace=False)
167
- )
168
- (norm_ff): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
169
- (norm_mha): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
170
- (norm_ff_macaron): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
171
- (dropout): Dropout(p=0.1, inplace=False)
172
- )
173
- )
174
- (after_norm): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
175
- )
176
- (proj): Conv1d(192, 384, kernel_size=(1,), stride=(1,))
177
- )
178
- (decoder): HiFiGANGenerator(
179
- (input_conv): Conv1d(192, 512, kernel_size=(7,), stride=(1,), padding=(3,))
180
- (upsamples): ModuleList(
181
- (0): Sequential(
182
- (0): LeakyReLU(negative_slope=0.1)
183
- (1): ConvTranspose1d(512, 256, kernel_size=(16,), stride=(8,), padding=(4,))
184
- )
185
- (1): Sequential(
186
- (0): LeakyReLU(negative_slope=0.1)
187
- (1): ConvTranspose1d(256, 128, kernel_size=(16,), stride=(8,), padding=(4,))
188
- )
189
- (2): Sequential(
190
- (0): LeakyReLU(negative_slope=0.1)
191
- (1): ConvTranspose1d(128, 64, kernel_size=(4,), stride=(2,), padding=(1,))
192
- )
193
- (3): Sequential(
194
- (0): LeakyReLU(negative_slope=0.1)
195
- (1): ConvTranspose1d(64, 32, kernel_size=(4,), stride=(2,), padding=(1,))
196
- )
197
- )
198
- (blocks): ModuleList(
199
- (0): ResidualBlock(
200
- (convs1): ModuleList(
201
- (0): Sequential(
202
- (0): LeakyReLU(negative_slope=0.1)
203
- (1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
204
- )
205
- (1): Sequential(
206
- (0): LeakyReLU(negative_slope=0.1)
207
- (1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,))
208
- )
209
- (2): Sequential(
210
- (0): LeakyReLU(negative_slope=0.1)
211
- (1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,))
212
- )
213
- )
214
- (convs2): ModuleList(
215
- (0-2): 3 x Sequential(
216
- (0): LeakyReLU(negative_slope=0.1)
217
- (1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
218
- )
219
- )
220
- )
221
- (1): ResidualBlock(
222
- (convs1): ModuleList(
223
- (0): Sequential(
224
- (0): LeakyReLU(negative_slope=0.1)
225
- (1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(3,))
226
- )
227
- (1): Sequential(
228
- (0): LeakyReLU(negative_slope=0.1)
229
- (1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,))
230
- )
231
- (2): Sequential(
232
- (0): LeakyReLU(negative_slope=0.1)
233
- (1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,))
234
- )
235
- )
236
- (convs2): ModuleList(
237
- (0-2): 3 x Sequential(
238
- (0): LeakyReLU(negative_slope=0.1)
239
- (1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(3,))
240
- )
241
- )
242
- )
243
- (2): ResidualBlock(
244
- (convs1): ModuleList(
245
- (0): Sequential(
246
- (0): LeakyReLU(negative_slope=0.1)
247
- (1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(5,))
248
- )
249
- (1): Sequential(
250
- (0): LeakyReLU(negative_slope=0.1)
251
- (1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,))
252
- )
253
- (2): Sequential(
254
- (0): LeakyReLU(negative_slope=0.1)
255
- (1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,))
256
- )
257
- )
258
- (convs2): ModuleList(
259
- (0-2): 3 x Sequential(
260
- (0): LeakyReLU(negative_slope=0.1)
261
- (1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(5,))
262
- )
263
- )
264
- )
265
- (3): ResidualBlock(
266
- (convs1): ModuleList(
267
- (0): Sequential(
268
- (0): LeakyReLU(negative_slope=0.1)
269
- (1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
270
- )
271
- (1): Sequential(
272
- (0): LeakyReLU(negative_slope=0.1)
273
- (1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,))
274
- )
275
- (2): Sequential(
276
- (0): LeakyReLU(negative_slope=0.1)
277
- (1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,))
278
- )
279
- )
280
- (convs2): ModuleList(
281
- (0-2): 3 x Sequential(
282
- (0): LeakyReLU(negative_slope=0.1)
283
- (1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
284
- )
285
- )
286
- )
287
- (4): ResidualBlock(
288
- (convs1): ModuleList(
289
- (0): Sequential(
290
- (0): LeakyReLU(negative_slope=0.1)
291
- (1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(3,))
292
- )
293
- (1): Sequential(
294
- (0): LeakyReLU(negative_slope=0.1)
295
- (1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,))
296
- )
297
- (2): Sequential(
298
- (0): LeakyReLU(negative_slope=0.1)
299
- (1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,))
300
- )
301
- )
302
- (convs2): ModuleList(
303
- (0-2): 3 x Sequential(
304
- (0): LeakyReLU(negative_slope=0.1)
305
- (1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(3,))
306
- )
307
- )
308
- )
309
- (5): ResidualBlock(
310
- (convs1): ModuleList(
311
- (0): Sequential(
312
- (0): LeakyReLU(negative_slope=0.1)
313
- (1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(5,))
314
- )
315
- (1): Sequential(
316
- (0): LeakyReLU(negative_slope=0.1)
317
- (1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,))
318
- )
319
- (2): Sequential(
320
- (0): LeakyReLU(negative_slope=0.1)
321
- (1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,))
322
- )
323
- )
324
- (convs2): ModuleList(
325
- (0-2): 3 x Sequential(
326
- (0): LeakyReLU(negative_slope=0.1)
327
- (1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(5,))
328
- )
329
- )
330
- )
331
- (6): ResidualBlock(
332
- (convs1): ModuleList(
333
- (0): Sequential(
334
- (0): LeakyReLU(negative_slope=0.1)
335
- (1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
336
- )
337
- (1): Sequential(
338
- (0): LeakyReLU(negative_slope=0.1)
339
- (1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,))
340
- )
341
- (2): Sequential(
342
- (0): LeakyReLU(negative_slope=0.1)
343
- (1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,))
344
- )
345
- )
346
- (convs2): ModuleList(
347
- (0-2): 3 x Sequential(
348
- (0): LeakyReLU(negative_slope=0.1)
349
- (1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
350
- )
351
- )
352
- )
353
- (7): ResidualBlock(
354
- (convs1): ModuleList(
355
- (0): Sequential(
356
- (0): LeakyReLU(negative_slope=0.1)
357
- (1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(3,))
358
- )
359
- (1): Sequential(
360
- (0): LeakyReLU(negative_slope=0.1)
361
- (1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,))
362
- )
363
- (2): Sequential(
364
- (0): LeakyReLU(negative_slope=0.1)
365
- (1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,))
366
- )
367
- )
368
- (convs2): ModuleList(
369
- (0-2): 3 x Sequential(
370
- (0): LeakyReLU(negative_slope=0.1)
371
- (1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(3,))
372
- )
373
- )
374
- )
375
- (8): ResidualBlock(
376
- (convs1): ModuleList(
377
- (0): Sequential(
378
- (0): LeakyReLU(negative_slope=0.1)
379
- (1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(5,))
380
- )
381
- (1): Sequential(
382
- (0): LeakyReLU(negative_slope=0.1)
383
- (1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,))
384
- )
385
- (2): Sequential(
386
- (0): LeakyReLU(negative_slope=0.1)
387
- (1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,))
388
- )
389
- )
390
- (convs2): ModuleList(
391
- (0-2): 3 x Sequential(
392
- (0): LeakyReLU(negative_slope=0.1)
393
- (1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(5,))
394
- )
395
- )
396
- )
397
- (9): ResidualBlock(
398
- (convs1): ModuleList(
399
- (0): Sequential(
400
- (0): LeakyReLU(negative_slope=0.1)
401
- (1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
402
- )
403
- (1): Sequential(
404
- (0): LeakyReLU(negative_slope=0.1)
405
- (1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,))
406
- )
407
- (2): Sequential(
408
- (0): LeakyReLU(negative_slope=0.1)
409
- (1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,))
410
- )
411
- )
412
- (convs2): ModuleList(
413
- (0-2): 3 x Sequential(
414
- (0): LeakyReLU(negative_slope=0.1)
415
- (1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
416
- )
417
- )
418
- )
419
- (10): ResidualBlock(
420
- (convs1): ModuleList(
421
- (0): Sequential(
422
- (0): LeakyReLU(negative_slope=0.1)
423
- (1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(3,))
424
- )
425
- (1): Sequential(
426
- (0): LeakyReLU(negative_slope=0.1)
427
- (1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,))
428
- )
429
- (2): Sequential(
430
- (0): LeakyReLU(negative_slope=0.1)
431
- (1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,))
432
- )
433
- )
434
- (convs2): ModuleList(
435
- (0-2): 3 x Sequential(
436
- (0): LeakyReLU(negative_slope=0.1)
437
- (1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(3,))
438
- )
439
- )
440
- )
441
- (11): ResidualBlock(
442
- (convs1): ModuleList(
443
- (0): Sequential(
444
- (0): LeakyReLU(negative_slope=0.1)
445
- (1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(5,))
446
- )
447
- (1): Sequential(
448
- (0): LeakyReLU(negative_slope=0.1)
449
- (1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,))
450
- )
451
- (2): Sequential(
452
- (0): LeakyReLU(negative_slope=0.1)
453
- (1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,))
454
- )
455
- )
456
- (convs2): ModuleList(
457
- (0-2): 3 x Sequential(
458
- (0): LeakyReLU(negative_slope=0.1)
459
- (1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(5,))
460
- )
461
- )
462
- )
463
- )
464
- (output_conv): Sequential(
465
- (0): LeakyReLU(negative_slope=0.01)
466
- (1): Conv1d(32, 1, kernel_size=(7,), stride=(1,), padding=(3,))
467
- (2): Tanh()
468
- )
469
- (global_conv): Conv1d(256, 512, kernel_size=(1,), stride=(1,))
470
- )
471
- (posterior_encoder): PosteriorEncoder(
472
- (input_conv): Conv1d(80, 192, kernel_size=(1,), stride=(1,))
473
- (encoder): WaveNet(
474
- (conv_layers): ModuleList(
475
- (0-15): 16 x ResidualBlock(
476
- (conv): Conv1d(192, 384, kernel_size=(5,), stride=(1,), padding=(2,))
477
- (conv1x1_glo): Conv1d1x1(256, 384, kernel_size=(1,), stride=(1,), bias=False)
478
- (conv1x1_out): Conv1d1x1(192, 384, kernel_size=(1,), stride=(1,))
479
- )
480
- )
481
- )
482
- (proj): Conv1d(192, 384, kernel_size=(1,), stride=(1,))
483
- )
484
- (flow): ResidualAffineCouplingBlock(
485
- (flows): ModuleList(
486
- (0): ResidualAffineCouplingLayer(
487
- (input_conv): Conv1d(96, 192, kernel_size=(1,), stride=(1,))
488
- (encoder): WaveNet(
489
- (conv_layers): ModuleList(
490
- (0-3): 4 x ResidualBlock(
491
- (conv): Conv1d(192, 384, kernel_size=(5,), stride=(1,), padding=(2,))
492
- (conv1x1_glo): Conv1d1x1(256, 384, kernel_size=(1,), stride=(1,), bias=False)
493
- (conv1x1_out): Conv1d1x1(192, 384, kernel_size=(1,), stride=(1,))
494
- )
495
- )
496
- )
497
- (proj): Conv1d(192, 96, kernel_size=(1,), stride=(1,))
498
- )
499
- (1): FlipFlow()
500
- (2): ResidualAffineCouplingLayer(
501
- (input_conv): Conv1d(96, 192, kernel_size=(1,), stride=(1,))
502
- (encoder): WaveNet(
503
- (conv_layers): ModuleList(
504
- (0-3): 4 x ResidualBlock(
505
- (conv): Conv1d(192, 384, kernel_size=(5,), stride=(1,), padding=(2,))
506
- (conv1x1_glo): Conv1d1x1(256, 384, kernel_size=(1,), stride=(1,), bias=False)
507
- (conv1x1_out): Conv1d1x1(192, 384, kernel_size=(1,), stride=(1,))
508
- )
509
- )
510
- )
511
- (proj): Conv1d(192, 96, kernel_size=(1,), stride=(1,))
512
- )
513
- (3): FlipFlow()
514
- (4): ResidualAffineCouplingLayer(
515
- (input_conv): Conv1d(96, 192, kernel_size=(1,), stride=(1,))
516
- (encoder): WaveNet(
517
- (conv_layers): ModuleList(
518
- (0-3): 4 x ResidualBlock(
519
- (conv): Conv1d(192, 384, kernel_size=(5,), stride=(1,), padding=(2,))
520
- (conv1x1_glo): Conv1d1x1(256, 384, kernel_size=(1,), stride=(1,), bias=False)
521
- (conv1x1_out): Conv1d1x1(192, 384, kernel_size=(1,), stride=(1,))
522
- )
523
- )
524
- )
525
- (proj): Conv1d(192, 96, kernel_size=(1,), stride=(1,))
526
- )
527
- (5): FlipFlow()
528
- (6): ResidualAffineCouplingLayer(
529
- (input_conv): Conv1d(96, 192, kernel_size=(1,), stride=(1,))
530
- (encoder): WaveNet(
531
- (conv_layers): ModuleList(
532
- (0-3): 4 x ResidualBlock(
533
- (conv): Conv1d(192, 384, kernel_size=(5,), stride=(1,), padding=(2,))
534
- (conv1x1_glo): Conv1d1x1(256, 384, kernel_size=(1,), stride=(1,), bias=False)
535
- (conv1x1_out): Conv1d1x1(192, 384, kernel_size=(1,), stride=(1,))
536
- )
537
- )
538
- )
539
- (proj): Conv1d(192, 96, kernel_size=(1,), stride=(1,))
540
- )
541
- (7): FlipFlow()
542
- )
543
- )
544
- (duration_predictor): StochasticDurationPredictor(
545
- (pre): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
546
- (dds): DilatedDepthSeparableConv(
547
- (convs): ModuleList(
548
- (0): Sequential(
549
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
550
- (1): Transpose()
551
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
552
- (3): Transpose()
553
- (4): GELU(approximate='none')
554
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
555
- (6): Transpose()
556
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
557
- (8): Transpose()
558
- (9): GELU(approximate='none')
559
- (10): Dropout(p=0.5, inplace=False)
560
- )
561
- (1): Sequential(
562
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
563
- (1): Transpose()
564
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
565
- (3): Transpose()
566
- (4): GELU(approximate='none')
567
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
568
- (6): Transpose()
569
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
570
- (8): Transpose()
571
- (9): GELU(approximate='none')
572
- (10): Dropout(p=0.5, inplace=False)
573
- )
574
- (2): Sequential(
575
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
576
- (1): Transpose()
577
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
578
- (3): Transpose()
579
- (4): GELU(approximate='none')
580
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
581
- (6): Transpose()
582
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
583
- (8): Transpose()
584
- (9): GELU(approximate='none')
585
- (10): Dropout(p=0.5, inplace=False)
586
- )
587
- )
588
- )
589
- (proj): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
590
- (log_flow): LogFlow()
591
- (flows): ModuleList(
592
- (0): ElementwiseAffineFlow()
593
- (1): ConvFlow(
594
- (input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
595
- (dds_conv): DilatedDepthSeparableConv(
596
- (convs): ModuleList(
597
- (0): Sequential(
598
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
599
- (1): Transpose()
600
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
601
- (3): Transpose()
602
- (4): GELU(approximate='none')
603
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
604
- (6): Transpose()
605
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
606
- (8): Transpose()
607
- (9): GELU(approximate='none')
608
- (10): Dropout(p=0.0, inplace=False)
609
- )
610
- (1): Sequential(
611
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
612
- (1): Transpose()
613
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
614
- (3): Transpose()
615
- (4): GELU(approximate='none')
616
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
617
- (6): Transpose()
618
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
619
- (8): Transpose()
620
- (9): GELU(approximate='none')
621
- (10): Dropout(p=0.0, inplace=False)
622
- )
623
- (2): Sequential(
624
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
625
- (1): Transpose()
626
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
627
- (3): Transpose()
628
- (4): GELU(approximate='none')
629
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
630
- (6): Transpose()
631
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
632
- (8): Transpose()
633
- (9): GELU(approximate='none')
634
- (10): Dropout(p=0.0, inplace=False)
635
- )
636
- )
637
- )
638
- (proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,))
639
- )
640
- (2): FlipFlow()
641
- (3): ConvFlow(
642
- (input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
643
- (dds_conv): DilatedDepthSeparableConv(
644
- (convs): ModuleList(
645
- (0): Sequential(
646
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
647
- (1): Transpose()
648
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
649
- (3): Transpose()
650
- (4): GELU(approximate='none')
651
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
652
- (6): Transpose()
653
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
654
- (8): Transpose()
655
- (9): GELU(approximate='none')
656
- (10): Dropout(p=0.0, inplace=False)
657
- )
658
- (1): Sequential(
659
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
660
- (1): Transpose()
661
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
662
- (3): Transpose()
663
- (4): GELU(approximate='none')
664
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
665
- (6): Transpose()
666
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
667
- (8): Transpose()
668
- (9): GELU(approximate='none')
669
- (10): Dropout(p=0.0, inplace=False)
670
- )
671
- (2): Sequential(
672
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
673
- (1): Transpose()
674
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
675
- (3): Transpose()
676
- (4): GELU(approximate='none')
677
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
678
- (6): Transpose()
679
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
680
- (8): Transpose()
681
- (9): GELU(approximate='none')
682
- (10): Dropout(p=0.0, inplace=False)
683
- )
684
- )
685
- )
686
- (proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,))
687
- )
688
- (4): FlipFlow()
689
- (5): ConvFlow(
690
- (input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
691
- (dds_conv): DilatedDepthSeparableConv(
692
- (convs): ModuleList(
693
- (0): Sequential(
694
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
695
- (1): Transpose()
696
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
697
- (3): Transpose()
698
- (4): GELU(approximate='none')
699
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
700
- (6): Transpose()
701
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
702
- (8): Transpose()
703
- (9): GELU(approximate='none')
704
- (10): Dropout(p=0.0, inplace=False)
705
- )
706
- (1): Sequential(
707
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
708
- (1): Transpose()
709
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
710
- (3): Transpose()
711
- (4): GELU(approximate='none')
712
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
713
- (6): Transpose()
714
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
715
- (8): Transpose()
716
- (9): GELU(approximate='none')
717
- (10): Dropout(p=0.0, inplace=False)
718
- )
719
- (2): Sequential(
720
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
721
- (1): Transpose()
722
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
723
- (3): Transpose()
724
- (4): GELU(approximate='none')
725
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
726
- (6): Transpose()
727
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
728
- (8): Transpose()
729
- (9): GELU(approximate='none')
730
- (10): Dropout(p=0.0, inplace=False)
731
- )
732
- )
733
- )
734
- (proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,))
735
- )
736
- (6): FlipFlow()
737
- (7): ConvFlow(
738
- (input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
739
- (dds_conv): DilatedDepthSeparableConv(
740
- (convs): ModuleList(
741
- (0): Sequential(
742
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
743
- (1): Transpose()
744
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
745
- (3): Transpose()
746
- (4): GELU(approximate='none')
747
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
748
- (6): Transpose()
749
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
750
- (8): Transpose()
751
- (9): GELU(approximate='none')
752
- (10): Dropout(p=0.0, inplace=False)
753
- )
754
- (1): Sequential(
755
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
756
- (1): Transpose()
757
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
758
- (3): Transpose()
759
- (4): GELU(approximate='none')
760
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
761
- (6): Transpose()
762
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
763
- (8): Transpose()
764
- (9): GELU(approximate='none')
765
- (10): Dropout(p=0.0, inplace=False)
766
- )
767
- (2): Sequential(
768
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
769
- (1): Transpose()
770
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
771
- (3): Transpose()
772
- (4): GELU(approximate='none')
773
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
774
- (6): Transpose()
775
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
776
- (8): Transpose()
777
- (9): GELU(approximate='none')
778
- (10): Dropout(p=0.0, inplace=False)
779
- )
780
- )
781
- )
782
- (proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,))
783
- )
784
- (8): FlipFlow()
785
- )
786
- (post_pre): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
787
- (post_dds): DilatedDepthSeparableConv(
788
- (convs): ModuleList(
789
- (0): Sequential(
790
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
791
- (1): Transpose()
792
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
793
- (3): Transpose()
794
- (4): GELU(approximate='none')
795
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
796
- (6): Transpose()
797
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
798
- (8): Transpose()
799
- (9): GELU(approximate='none')
800
- (10): Dropout(p=0.5, inplace=False)
801
- )
802
- (1): Sequential(
803
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
804
- (1): Transpose()
805
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
806
- (3): Transpose()
807
- (4): GELU(approximate='none')
808
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
809
- (6): Transpose()
810
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
811
- (8): Transpose()
812
- (9): GELU(approximate='none')
813
- (10): Dropout(p=0.5, inplace=False)
814
- )
815
- (2): Sequential(
816
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
817
- (1): Transpose()
818
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
819
- (3): Transpose()
820
- (4): GELU(approximate='none')
821
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
822
- (6): Transpose()
823
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
824
- (8): Transpose()
825
- (9): GELU(approximate='none')
826
- (10): Dropout(p=0.5, inplace=False)
827
- )
828
- )
829
- )
830
- (post_proj): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
831
- (post_flows): ModuleList(
832
- (0): ElementwiseAffineFlow()
833
- (1): ConvFlow(
834
- (input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
835
- (dds_conv): DilatedDepthSeparableConv(
836
- (convs): ModuleList(
837
- (0): Sequential(
838
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
839
- (1): Transpose()
840
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
841
- (3): Transpose()
842
- (4): GELU(approximate='none')
843
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
844
- (6): Transpose()
845
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
846
- (8): Transpose()
847
- (9): GELU(approximate='none')
848
- (10): Dropout(p=0.0, inplace=False)
849
- )
850
- (1): Sequential(
851
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
852
- (1): Transpose()
853
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
854
- (3): Transpose()
855
- (4): GELU(approximate='none')
856
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
857
- (6): Transpose()
858
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
859
- (8): Transpose()
860
- (9): GELU(approximate='none')
861
- (10): Dropout(p=0.0, inplace=False)
862
- )
863
- (2): Sequential(
864
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
865
- (1): Transpose()
866
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
867
- (3): Transpose()
868
- (4): GELU(approximate='none')
869
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
870
- (6): Transpose()
871
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
872
- (8): Transpose()
873
- (9): GELU(approximate='none')
874
- (10): Dropout(p=0.0, inplace=False)
875
- )
876
- )
877
- )
878
- (proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,))
879
- )
880
- (2): FlipFlow()
881
- (3): ConvFlow(
882
- (input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
883
- (dds_conv): DilatedDepthSeparableConv(
884
- (convs): ModuleList(
885
- (0): Sequential(
886
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
887
- (1): Transpose()
888
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
889
- (3): Transpose()
890
- (4): GELU(approximate='none')
891
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
892
- (6): Transpose()
893
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
894
- (8): Transpose()
895
- (9): GELU(approximate='none')
896
- (10): Dropout(p=0.0, inplace=False)
897
- )
898
- (1): Sequential(
899
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
900
- (1): Transpose()
901
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
902
- (3): Transpose()
903
- (4): GELU(approximate='none')
904
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
905
- (6): Transpose()
906
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
907
- (8): Transpose()
908
- (9): GELU(approximate='none')
909
- (10): Dropout(p=0.0, inplace=False)
910
- )
911
- (2): Sequential(
912
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
913
- (1): Transpose()
914
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
915
- (3): Transpose()
916
- (4): GELU(approximate='none')
917
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
918
- (6): Transpose()
919
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
920
- (8): Transpose()
921
- (9): GELU(approximate='none')
922
- (10): Dropout(p=0.0, inplace=False)
923
- )
924
- )
925
- )
926
- (proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,))
927
- )
928
- (4): FlipFlow()
929
- (5): ConvFlow(
930
- (input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
931
- (dds_conv): DilatedDepthSeparableConv(
932
- (convs): ModuleList(
933
- (0): Sequential(
934
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
935
- (1): Transpose()
936
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
937
- (3): Transpose()
938
- (4): GELU(approximate='none')
939
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
940
- (6): Transpose()
941
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
942
- (8): Transpose()
943
- (9): GELU(approximate='none')
944
- (10): Dropout(p=0.0, inplace=False)
945
- )
946
- (1): Sequential(
947
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
948
- (1): Transpose()
949
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
950
- (3): Transpose()
951
- (4): GELU(approximate='none')
952
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
953
- (6): Transpose()
954
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
955
- (8): Transpose()
956
- (9): GELU(approximate='none')
957
- (10): Dropout(p=0.0, inplace=False)
958
- )
959
- (2): Sequential(
960
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
961
- (1): Transpose()
962
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
963
- (3): Transpose()
964
- (4): GELU(approximate='none')
965
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
966
- (6): Transpose()
967
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
968
- (8): Transpose()
969
- (9): GELU(approximate='none')
970
- (10): Dropout(p=0.0, inplace=False)
971
- )
972
- )
973
- )
974
- (proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,))
975
- )
976
- (6): FlipFlow()
977
- (7): ConvFlow(
978
- (input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
979
- (dds_conv): DilatedDepthSeparableConv(
980
- (convs): ModuleList(
981
- (0): Sequential(
982
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
983
- (1): Transpose()
984
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
985
- (3): Transpose()
986
- (4): GELU(approximate='none')
987
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
988
- (6): Transpose()
989
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
990
- (8): Transpose()
991
- (9): GELU(approximate='none')
992
- (10): Dropout(p=0.0, inplace=False)
993
- )
994
- (1): Sequential(
995
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
996
- (1): Transpose()
997
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
998
- (3): Transpose()
999
- (4): GELU(approximate='none')
1000
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
1001
- (6): Transpose()
1002
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
1003
- (8): Transpose()
1004
- (9): GELU(approximate='none')
1005
- (10): Dropout(p=0.0, inplace=False)
1006
- )
1007
- (2): Sequential(
1008
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
1009
- (1): Transpose()
1010
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
1011
- (3): Transpose()
1012
- (4): GELU(approximate='none')
1013
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
1014
- (6): Transpose()
1015
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
1016
- (8): Transpose()
1017
- (9): GELU(approximate='none')
1018
- (10): Dropout(p=0.0, inplace=False)
1019
- )
1020
- )
1021
- )
1022
- (proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,))
1023
- )
1024
- (8): FlipFlow()
1025
- )
1026
- (global_conv): Conv1d(256, 192, kernel_size=(1,), stride=(1,))
1027
- )
1028
- (global_emb): Embedding(4, 256)
1029
- )
1030
- (discriminator): HiFiGANMultiScaleMultiPeriodDiscriminator(
1031
- (msd): HiFiGANMultiScaleDiscriminator(
1032
- (discriminators): ModuleList(
1033
- (0): HiFiGANScaleDiscriminator(
1034
- (layers): ModuleList(
1035
- (0): Sequential(
1036
- (0): Conv1d(1, 128, kernel_size=(15,), stride=(1,), padding=(7,))
1037
- (1): LeakyReLU(negative_slope=0.1)
1038
- )
1039
- (1): Sequential(
1040
- (0): Conv1d(128, 128, kernel_size=(41,), stride=(2,), padding=(20,), groups=4)
1041
- (1): LeakyReLU(negative_slope=0.1)
1042
- )
1043
- (2): Sequential(
1044
- (0): Conv1d(128, 256, kernel_size=(41,), stride=(2,), padding=(20,), groups=16)
1045
- (1): LeakyReLU(negative_slope=0.1)
1046
- )
1047
- (3): Sequential(
1048
- (0): Conv1d(256, 512, kernel_size=(41,), stride=(4,), padding=(20,), groups=16)
1049
- (1): LeakyReLU(negative_slope=0.1)
1050
- )
1051
- (4): Sequential(
1052
- (0): Conv1d(512, 1024, kernel_size=(41,), stride=(4,), padding=(20,), groups=16)
1053
- (1): LeakyReLU(negative_slope=0.1)
1054
- )
1055
- (5): Sequential(
1056
- (0): Conv1d(1024, 1024, kernel_size=(41,), stride=(1,), padding=(20,), groups=16)
1057
- (1): LeakyReLU(negative_slope=0.1)
1058
- )
1059
- (6): Sequential(
1060
- (0): Conv1d(1024, 1024, kernel_size=(5,), stride=(1,), padding=(2,))
1061
- (1): LeakyReLU(negative_slope=0.1)
1062
- )
1063
- (7): Conv1d(1024, 1, kernel_size=(3,), stride=(1,), padding=(1,))
1064
- )
1065
- )
1066
- )
1067
- )
1068
- (mpd): HiFiGANMultiPeriodDiscriminator(
1069
- (discriminators): ModuleList(
1070
- (0-4): 5 x HiFiGANPeriodDiscriminator(
1071
- (convs): ModuleList(
1072
- (0): Sequential(
1073
- (0): Conv2d(1, 32, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
1074
- (1): LeakyReLU(negative_slope=0.1)
1075
- )
1076
- (1): Sequential(
1077
- (0): Conv2d(32, 128, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
1078
- (1): LeakyReLU(negative_slope=0.1)
1079
- )
1080
- (2): Sequential(
1081
- (0): Conv2d(128, 512, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
1082
- (1): LeakyReLU(negative_slope=0.1)
1083
- )
1084
- (3): Sequential(
1085
- (0): Conv2d(512, 1024, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
1086
- (1): LeakyReLU(negative_slope=0.1)
1087
- )
1088
- (4): Sequential(
1089
- (0): Conv2d(1024, 1024, kernel_size=(5, 1), stride=(1, 1), padding=(2, 0))
1090
- (1): LeakyReLU(negative_slope=0.1)
1091
- )
1092
- )
1093
- (output_conv): Conv2d(1024, 1, kernel_size=(2, 1), stride=(1, 1), padding=(1, 0))
1094
- )
1095
- )
1096
- )
1097
- )
1098
- (generator_adv_loss): GeneratorAdversarialLoss()
1099
- (discriminator_adv_loss): DiscriminatorAdversarialLoss()
1100
- (feat_match_loss): FeatureMatchLoss()
1101
- (mel_loss): MelSpectrogramLoss(
1102
- (wav_to_mel): LogMelFbank(
1103
- (stft): Stft(n_fft=1024, win_length=1024, hop_length=256, center=True, normalized=False, onesided=True)
1104
- (logmel): LogMel(sr=22050, n_fft=1024, n_mels=80, fmin=0, fmax=11025.0, htk=False)
1105
- )
1106
- )
1107
- (kl_loss): KLDivergenceLoss()
1108
- )
1109
- )
1110
-
1111
- Model summary:
1112
- Class Name: ESPnetGANTTSModel
1113
- Total Number of model parameters: 96.24 M
1114
- Number of trainable parameters: 96.24 M (100.0%)
1115
- Size: 384.96 MB
1116
- Type: torch.float32
1117
- [wieling-3-a100] 2023-12-01 15:58:41,759 (abs_task:1272) INFO: Optimizer:
1118
- AdamW (
1119
- Parameter Group 0
1120
- amsgrad: False
1121
- betas: [0.8, 0.99]
1122
- capturable: False
1123
- differentiable: False
1124
- eps: 1e-09
1125
- foreach: None
1126
- fused: None
1127
- initial_lr: 0.0003
1128
- lr: 0.0003
1129
- maximize: False
1130
- weight_decay: 0.0
1131
- )
1132
- [wieling-3-a100] 2023-12-01 15:58:41,759 (abs_task:1273) INFO: Scheduler: <torch.optim.lr_scheduler.ExponentialLR object at 0x7ffa7c9b4880>
1133
- [wieling-3-a100] 2023-12-01 15:58:41,759 (abs_task:1272) INFO: Optimizer2:
1134
- AdamW (
1135
- Parameter Group 0
1136
- amsgrad: False
1137
- betas: [0.8, 0.99]
1138
- capturable: False
1139
- differentiable: False
1140
- eps: 1e-09
1141
- foreach: None
1142
- fused: None
1143
- initial_lr: 0.0003
1144
- lr: 0.0003
1145
- maximize: False
1146
- weight_decay: 0.0
1147
- )
1148
- [wieling-3-a100] 2023-12-01 15:58:41,759 (abs_task:1273) INFO: Scheduler2: <torch.optim.lr_scheduler.ExponentialLR object at 0x7ffa7c9b4820>
1149
- [wieling-3-a100] 2023-12-01 15:58:41,759 (abs_task:1282) INFO: Saving the configuration in exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10/config.yaml
1150
- [wieling-3-a100] 2023-12-01 15:58:41,790 (abs_task:1293) INFO: Namespace(config='conf/train_vits.yaml', print_config=False, log_level='INFO', drop_last_iter=False, dry_run=False, iterator_type='sequence', valid_iterator_type=None, output_dir='exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10', ngpu=0, seed=67823, num_workers=4, num_att_plot=3, dist_backend='nccl', dist_init_method='env://', dist_world_size=None, dist_rank=None, local_rank=None, dist_master_addr=None, dist_master_port=None, dist_launcher=None, multiprocessing_distributed=False, unused_parameters=True, sharded_ddp=False, cudnn_enabled=True, cudnn_benchmark=False, cudnn_deterministic=False, collect_stats=True, write_collected_feats=False, max_epoch=1000, patience=None, val_scheduler_criterion=('valid', 'loss'), early_stopping_criterion=('valid', 'loss', 'min'), best_model_criterion=[['train', 'total_count', 'max']], keep_nbest_models=10, nbest_averaging_interval=0, grad_clip=-1, grad_clip_type=2.0, grad_noise=False, accum_grad=1, no_forward_run=False, resume=False, train_dtype='float32', use_amp=False, log_interval=50, use_matplotlib=True, use_tensorboard=True, create_graph_in_tensorboard=False, use_wandb=True, wandb_project='GROTTS', wandb_id=None, wandb_entity=None, wandb_name='VITS_lr_3.0e-4', wandb_model_log_interval=-1, detect_anomaly=False, use_lora=False, save_lora_only=True, lora_conf={}, pretrain_path=None, init_param=['downloads/espnet/kan-bayashi_ljspeech_vits/exp/tts_train_vits_raw_phn_tacotron_g2p_en_no_space/train.total_count.ave_10best.pth:tts:tts:tts.generator.text_encoder,tts.generator.posterior_encoder.input_conv'], ignore_init_mismatch=False, freeze_param=[], num_iters_per_epoch=1000, batch_size=40, valid_batch_size=None, batch_bins=10000000, valid_batch_bins=None, train_shape_file=['exp-vits-lr-3e-4/tts_stats_raw_char/logdir/train.10.scp'], valid_shape_file=['exp-vits-lr-3e-4/tts_stats_raw_char/logdir/valid.10.scp'], batch_type='numel', valid_batch_type=None, fold_length=[], sort_in_batch='descending', shuffle_within_batch=False, sort_batch='descending', multiple_iterator=False, chunk_length=500, chunk_shift_ratio=0.5, num_cache_chunks=1024, chunk_excluded_key_prefixes=[], chunk_default_fs=None, train_data_path_and_name_and_type=[('dump/raw/train_nodev/text', 'text', 'text'), ('dump/raw/train_nodev/wav.scp', 'speech', 'sound'), ('dump/raw/train_nodev/utt2sid', 'sids', 'text_int')], valid_data_path_and_name_and_type=[('dump/raw/train_dev/text', 'text', 'text'), ('dump/raw/train_dev/wav.scp', 'speech', 'sound'), ('dump/raw/train_dev/utt2sid', 'sids', 'text_int')], allow_variable_data_keys=False, max_cache_size=0.0, max_cache_fd=32, allow_multi_rates=False, valid_max_cache_size=None, exclude_weight_decay=False, exclude_weight_decay_conf={}, optim='adamw', optim_conf={'lr': 0.0003, 'betas': [0.8, 0.99], 'eps': 1e-09, 'weight_decay': 0.0}, scheduler='exponentiallr', scheduler_conf={'gamma': 0.999875}, optim2='adamw', optim2_conf={'lr': 0.0003, 'betas': [0.8, 0.99], 'eps': 1e-09, 'weight_decay': 0.0}, scheduler2='exponentiallr', scheduler2_conf={'gamma': 0.999875}, generator_first=False, token_list=['<blank>', '<unk>', '<space>', 'e', 'n', 'a', 'o', 't', 'i', 'r', 'd', 's', 'k', 'l', 'm', 'u', 'g', 'h', 'w', 'v', '.', 'z', 'b', 'p', ',', 'j', 'c', 'f', '‘', '’', ':', '?', 'ö', "'", '!', '-', ';', 'ò', 'è', 'ì', 'é', 'y', 'ë', 'x', 'q', '<sos/eos>'], odim=None, model_conf={}, use_preprocessor=True, token_type='char', bpemodel=None, non_linguistic_symbols=None, cleaner=None, g2p=None, feats_extract='fbank', feats_extract_conf={'n_fft': 1024, 'hop_length': 256, 'win_length': None, 'fs': 22050, 'fmin': 80, 'fmax': 7600, 'n_mels': 80}, normalize=None, normalize_conf={}, tts='vits', tts_conf={'generator_type': 'vits_generator', 'generator_params': {'hidden_channels': 192, 'spks': 4, 'global_channels': 256, 'segment_size': 32, 'text_encoder_attention_heads': 2, 'text_encoder_ffn_expand': 4, 'text_encoder_blocks': 6, 'text_encoder_positionwise_layer_type': 'conv1d', 'text_encoder_positionwise_conv_kernel_size': 3, 'text_encoder_positional_encoding_layer_type': 'rel_pos', 'text_encoder_self_attention_layer_type': 'rel_selfattn', 'text_encoder_activation_type': 'swish', 'text_encoder_normalize_before': True, 'text_encoder_dropout_rate': 0.1, 'text_encoder_positional_dropout_rate': 0.0, 'text_encoder_attention_dropout_rate': 0.1, 'use_macaron_style_in_text_encoder': True, 'use_conformer_conv_in_text_encoder': False, 'text_encoder_conformer_kernel_size': -1, 'decoder_kernel_size': 7, 'decoder_channels': 512, 'decoder_upsample_scales': [8, 8, 2, 2], 'decoder_upsample_kernel_sizes': [16, 16, 4, 4], 'decoder_resblock_kernel_sizes': [3, 7, 11], 'decoder_resblock_dilations': [[1, 3, 5], [1, 3, 5], [1, 3, 5]], 'use_weight_norm_in_decoder': True, 'posterior_encoder_kernel_size': 5, 'posterior_encoder_layers': 16, 'posterior_encoder_stacks': 1, 'posterior_encoder_base_dilation': 1, 'posterior_encoder_dropout_rate': 0.0, 'use_weight_norm_in_posterior_encoder': True, 'flow_flows': 4, 'flow_kernel_size': 5, 'flow_base_dilation': 1, 'flow_layers': 4, 'flow_dropout_rate': 0.0, 'use_weight_norm_in_flow': True, 'use_only_mean_in_flow': True, 'stochastic_duration_predictor_kernel_size': 3, 'stochastic_duration_predictor_dropout_rate': 0.5, 'stochastic_duration_predictor_flows': 4, 'stochastic_duration_predictor_dds_conv_layers': 3, 'vocabs': 46, 'aux_channels': 80}, 'discriminator_type': 'hifigan_multi_scale_multi_period_discriminator', 'discriminator_params': {'scales': 1, 'scale_downsample_pooling': 'AvgPool1d', 'scale_downsample_pooling_params': {'kernel_size': 4, 'stride': 2, 'padding': 2}, 'scale_discriminator_params': {'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [15, 41, 5, 3], 'channels': 128, 'max_downsample_channels': 1024, 'max_groups': 16, 'bias': True, 'downsample_scales': [2, 2, 4, 4, 1], 'nonlinear_activation': 'LeakyReLU', 'nonlinear_activation_params': {'negative_slope': 0.1}, 'use_weight_norm': False, 'use_spectral_norm': False}, 'follow_official_norm': False, 'periods': [2, 3, 5, 7, 11], 'period_discriminator_params': {'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'downsample_scales': [3, 3, 3, 3, 1], 'max_downsample_channels': 1024, 'bias': True, 'nonlinear_activation': 'LeakyReLU', 'nonlinear_activation_params': {'negative_slope': 0.1}, 'use_weight_norm': True, 'use_spectral_norm': False}}, 'generator_adv_loss_params': {'average_by_discriminators': False, 'loss_type': 'mse'}, 'discriminator_adv_loss_params': {'average_by_discriminators': False, 'loss_type': 'mse'}, 'feat_match_loss_params': {'average_by_discriminators': False, 'average_by_layers': False, 'include_final_outputs': True}, 'mel_loss_params': {'fs': 22050, 'n_fft': 1024, 'hop_length': 256, 'win_length': None, 'window': 'hann', 'n_mels': 80, 'fmin': 0, 'fmax': None, 'log_base': None}, 'lambda_adv': 1.0, 'lambda_mel': 45.0, 'lambda_feat_match': 2.0, 'lambda_dur': 1.0, 'lambda_kl': 1.0, 'sampling_rate': 22050, 'cache_generator_outputs': True}, pitch_extract=None, pitch_extract_conf={'fs': 22050, 'n_fft': 1024, 'hop_length': 256, 'f0max': 400, 'f0min': 80}, pitch_normalize=None, pitch_normalize_conf={}, energy_extract=None, energy_extract_conf={'fs': 22050, 'n_fft': 1024, 'hop_length': 256, 'win_length': None}, energy_normalize=None, energy_normalize_conf={}, required=['output_dir', 'token_list'], version='202310', distributed=False)
1151
- # Accounting: time=18 threads=1
1152
- # Ended (code 0) at Fri Dec 1 15:58:52 UTC 2023, elapsed time 18 seconds
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10/config.yaml DELETED
@@ -1,383 +0,0 @@
1
- config: conf/train_vits.yaml
2
- print_config: false
3
- log_level: INFO
4
- drop_last_iter: false
5
- dry_run: false
6
- iterator_type: sequence
7
- valid_iterator_type: null
8
- output_dir: exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10
9
- ngpu: 0
10
- seed: 67823
11
- num_workers: 4
12
- num_att_plot: 3
13
- dist_backend: nccl
14
- dist_init_method: env://
15
- dist_world_size: null
16
- dist_rank: null
17
- local_rank: null
18
- dist_master_addr: null
19
- dist_master_port: null
20
- dist_launcher: null
21
- multiprocessing_distributed: false
22
- unused_parameters: true
23
- sharded_ddp: false
24
- cudnn_enabled: true
25
- cudnn_benchmark: false
26
- cudnn_deterministic: false
27
- collect_stats: true
28
- write_collected_feats: false
29
- max_epoch: 1000
30
- patience: null
31
- val_scheduler_criterion:
32
- - valid
33
- - loss
34
- early_stopping_criterion:
35
- - valid
36
- - loss
37
- - min
38
- best_model_criterion:
39
- - - train
40
- - total_count
41
- - max
42
- keep_nbest_models: 10
43
- nbest_averaging_interval: 0
44
- grad_clip: -1
45
- grad_clip_type: 2.0
46
- grad_noise: false
47
- accum_grad: 1
48
- no_forward_run: false
49
- resume: false
50
- train_dtype: float32
51
- use_amp: false
52
- log_interval: 50
53
- use_matplotlib: true
54
- use_tensorboard: true
55
- create_graph_in_tensorboard: false
56
- use_wandb: true
57
- wandb_project: GROTTS
58
- wandb_id: null
59
- wandb_entity: null
60
- wandb_name: VITS_lr_3.0e-4
61
- wandb_model_log_interval: -1
62
- detect_anomaly: false
63
- use_lora: false
64
- save_lora_only: true
65
- lora_conf: {}
66
- pretrain_path: null
67
- init_param:
68
- - downloads/espnet/kan-bayashi_ljspeech_vits/exp/tts_train_vits_raw_phn_tacotron_g2p_en_no_space/train.total_count.ave_10best.pth:tts:tts:tts.generator.text_encoder,tts.generator.posterior_encoder.input_conv
69
- ignore_init_mismatch: false
70
- freeze_param: []
71
- num_iters_per_epoch: 1000
72
- batch_size: 40
73
- valid_batch_size: null
74
- batch_bins: 10000000
75
- valid_batch_bins: null
76
- train_shape_file:
77
- - exp-vits-lr-3e-4/tts_stats_raw_char/logdir/train.10.scp
78
- valid_shape_file:
79
- - exp-vits-lr-3e-4/tts_stats_raw_char/logdir/valid.10.scp
80
- batch_type: numel
81
- valid_batch_type: null
82
- fold_length: []
83
- sort_in_batch: descending
84
- shuffle_within_batch: false
85
- sort_batch: descending
86
- multiple_iterator: false
87
- chunk_length: 500
88
- chunk_shift_ratio: 0.5
89
- num_cache_chunks: 1024
90
- chunk_excluded_key_prefixes: []
91
- chunk_default_fs: null
92
- train_data_path_and_name_and_type:
93
- - - dump/raw/train_nodev/text
94
- - text
95
- - text
96
- - - dump/raw/train_nodev/wav.scp
97
- - speech
98
- - sound
99
- - - dump/raw/train_nodev/utt2sid
100
- - sids
101
- - text_int
102
- valid_data_path_and_name_and_type:
103
- - - dump/raw/train_dev/text
104
- - text
105
- - text
106
- - - dump/raw/train_dev/wav.scp
107
- - speech
108
- - sound
109
- - - dump/raw/train_dev/utt2sid
110
- - sids
111
- - text_int
112
- allow_variable_data_keys: false
113
- max_cache_size: 0.0
114
- max_cache_fd: 32
115
- allow_multi_rates: false
116
- valid_max_cache_size: null
117
- exclude_weight_decay: false
118
- exclude_weight_decay_conf: {}
119
- optim: adamw
120
- optim_conf:
121
- lr: 0.0003
122
- betas:
123
- - 0.8
124
- - 0.99
125
- eps: 1.0e-09
126
- weight_decay: 0.0
127
- scheduler: exponentiallr
128
- scheduler_conf:
129
- gamma: 0.999875
130
- optim2: adamw
131
- optim2_conf:
132
- lr: 0.0003
133
- betas:
134
- - 0.8
135
- - 0.99
136
- eps: 1.0e-09
137
- weight_decay: 0.0
138
- scheduler2: exponentiallr
139
- scheduler2_conf:
140
- gamma: 0.999875
141
- generator_first: false
142
- token_list:
143
- - <blank>
144
- - <unk>
145
- - <space>
146
- - e
147
- - n
148
- - a
149
- - o
150
- - t
151
- - i
152
- - r
153
- - d
154
- - s
155
- - k
156
- - l
157
- - m
158
- - u
159
- - g
160
- - h
161
- - w
162
- - v
163
- - .
164
- - z
165
- - b
166
- - p
167
- - ','
168
- - j
169
- - c
170
- - f
171
- - ‘
172
- - ’
173
- - ':'
174
- - '?'
175
- - ö
176
- - ''''
177
- - '!'
178
- - '-'
179
- - ;
180
- - ò
181
- - è
182
- - ì
183
- - é
184
- - y
185
- - ë
186
- - x
187
- - q
188
- - <sos/eos>
189
- odim: null
190
- model_conf: {}
191
- use_preprocessor: true
192
- token_type: char
193
- bpemodel: null
194
- non_linguistic_symbols: null
195
- cleaner: null
196
- g2p: null
197
- feats_extract: fbank
198
- feats_extract_conf:
199
- n_fft: 1024
200
- hop_length: 256
201
- win_length: null
202
- fs: 22050
203
- fmin: 80
204
- fmax: 7600
205
- n_mels: 80
206
- normalize: null
207
- normalize_conf: {}
208
- tts: vits
209
- tts_conf:
210
- generator_type: vits_generator
211
- generator_params:
212
- hidden_channels: 192
213
- spks: 4
214
- global_channels: 256
215
- segment_size: 32
216
- text_encoder_attention_heads: 2
217
- text_encoder_ffn_expand: 4
218
- text_encoder_blocks: 6
219
- text_encoder_positionwise_layer_type: conv1d
220
- text_encoder_positionwise_conv_kernel_size: 3
221
- text_encoder_positional_encoding_layer_type: rel_pos
222
- text_encoder_self_attention_layer_type: rel_selfattn
223
- text_encoder_activation_type: swish
224
- text_encoder_normalize_before: true
225
- text_encoder_dropout_rate: 0.1
226
- text_encoder_positional_dropout_rate: 0.0
227
- text_encoder_attention_dropout_rate: 0.1
228
- use_macaron_style_in_text_encoder: true
229
- use_conformer_conv_in_text_encoder: false
230
- text_encoder_conformer_kernel_size: -1
231
- decoder_kernel_size: 7
232
- decoder_channels: 512
233
- decoder_upsample_scales:
234
- - 8
235
- - 8
236
- - 2
237
- - 2
238
- decoder_upsample_kernel_sizes:
239
- - 16
240
- - 16
241
- - 4
242
- - 4
243
- decoder_resblock_kernel_sizes:
244
- - 3
245
- - 7
246
- - 11
247
- decoder_resblock_dilations:
248
- - - 1
249
- - 3
250
- - 5
251
- - - 1
252
- - 3
253
- - 5
254
- - - 1
255
- - 3
256
- - 5
257
- use_weight_norm_in_decoder: true
258
- posterior_encoder_kernel_size: 5
259
- posterior_encoder_layers: 16
260
- posterior_encoder_stacks: 1
261
- posterior_encoder_base_dilation: 1
262
- posterior_encoder_dropout_rate: 0.0
263
- use_weight_norm_in_posterior_encoder: true
264
- flow_flows: 4
265
- flow_kernel_size: 5
266
- flow_base_dilation: 1
267
- flow_layers: 4
268
- flow_dropout_rate: 0.0
269
- use_weight_norm_in_flow: true
270
- use_only_mean_in_flow: true
271
- stochastic_duration_predictor_kernel_size: 3
272
- stochastic_duration_predictor_dropout_rate: 0.5
273
- stochastic_duration_predictor_flows: 4
274
- stochastic_duration_predictor_dds_conv_layers: 3
275
- vocabs: 46
276
- aux_channels: 80
277
- discriminator_type: hifigan_multi_scale_multi_period_discriminator
278
- discriminator_params:
279
- scales: 1
280
- scale_downsample_pooling: AvgPool1d
281
- scale_downsample_pooling_params:
282
- kernel_size: 4
283
- stride: 2
284
- padding: 2
285
- scale_discriminator_params:
286
- in_channels: 1
287
- out_channels: 1
288
- kernel_sizes:
289
- - 15
290
- - 41
291
- - 5
292
- - 3
293
- channels: 128
294
- max_downsample_channels: 1024
295
- max_groups: 16
296
- bias: true
297
- downsample_scales:
298
- - 2
299
- - 2
300
- - 4
301
- - 4
302
- - 1
303
- nonlinear_activation: LeakyReLU
304
- nonlinear_activation_params:
305
- negative_slope: 0.1
306
- use_weight_norm: false
307
- use_spectral_norm: false
308
- follow_official_norm: false
309
- periods:
310
- - 2
311
- - 3
312
- - 5
313
- - 7
314
- - 11
315
- period_discriminator_params:
316
- in_channels: 1
317
- out_channels: 1
318
- kernel_sizes:
319
- - 5
320
- - 3
321
- channels: 32
322
- downsample_scales:
323
- - 3
324
- - 3
325
- - 3
326
- - 3
327
- - 1
328
- max_downsample_channels: 1024
329
- bias: true
330
- nonlinear_activation: LeakyReLU
331
- nonlinear_activation_params:
332
- negative_slope: 0.1
333
- use_weight_norm: true
334
- use_spectral_norm: false
335
- generator_adv_loss_params:
336
- average_by_discriminators: false
337
- loss_type: mse
338
- discriminator_adv_loss_params:
339
- average_by_discriminators: false
340
- loss_type: mse
341
- feat_match_loss_params:
342
- average_by_discriminators: false
343
- average_by_layers: false
344
- include_final_outputs: true
345
- mel_loss_params:
346
- fs: 22050
347
- n_fft: 1024
348
- hop_length: 256
349
- win_length: null
350
- window: hann
351
- n_mels: 80
352
- fmin: 0
353
- fmax: null
354
- log_base: null
355
- lambda_adv: 1.0
356
- lambda_mel: 45.0
357
- lambda_feat_match: 2.0
358
- lambda_dur: 1.0
359
- lambda_kl: 1.0
360
- sampling_rate: 22050
361
- cache_generator_outputs: true
362
- pitch_extract: null
363
- pitch_extract_conf:
364
- fs: 22050
365
- n_fft: 1024
366
- hop_length: 256
367
- f0max: 400
368
- f0min: 80
369
- pitch_normalize: null
370
- pitch_normalize_conf: {}
371
- energy_extract: null
372
- energy_extract_conf:
373
- fs: 22050
374
- n_fft: 1024
375
- hop_length: 256
376
- win_length: null
377
- energy_normalize: null
378
- energy_normalize_conf: {}
379
- required:
380
- - output_dir
381
- - token_list
382
- version: '202310'
383
- distributed: false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10/train/batch_keys DELETED
@@ -1,3 +0,0 @@
1
- text
2
- speech
3
- sids
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10/train/feats_lengths_stats.npz DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:af79191d13bfcc12aecb8430b36098d2678e2934adbe5d1a061dee1deef5773e
3
- size 778
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10/train/feats_stats.npz DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:574f2692fe20cefd55fe03010ac1815121a4aa53069aa042c591fb317d7804b0
3
- size 1402
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10/train/sids_shape DELETED
@@ -1,249 +0,0 @@
1
- Hoogelaandsters-2288-MoanMorn 1
2
- Hoogelaandsters-2294-MoanMorn 1
3
- Hoogelaandsters-2299-MoanMorn 1
4
- Hoogelaandsters-2303-MoanMorn 1
5
- Hoogelaandsters-2307-MoanMorn 1
6
- Hoogelaandsters-2311-MoanMorn 1
7
- Hoogelaandsters-2316-MoanMorn 1
8
- Hoogelaandsters-2320-MoanMorn 1
9
- Hoogelaandsters-2324-MoanMorn 1
10
- Hoogelaandsters-2328-MoanMorn 1
11
- Hoogelaandsters-2332-MoanMorn 1
12
- Hoogelaandsters-2336-MoanMorn 1
13
- Hoogelaandsters-2340-MoanMorn 1
14
- Hoogelaandsters-2344-MoanMorn 1
15
- Hoogelaandsters-2348-MoanMorn 1
16
- Hoogelaandsters-2352-MoanMorn 1
17
- Hoogelaandsters-2356-MoanMorn 1
18
- Hoogelaandsters-2361-MoanMorn 1
19
- Hoogelaandsters-2365-MoanMorn 1
20
- Hoogelaandsters-2369-MoanMorn 1
21
- Hoogelaandsters-2373-MoanMorn 1
22
- Hoogelaandsters-2377-MoanMorn 1
23
- Hoogelaandsters-2381-MoanMorn 1
24
- Hoogelaandsters-2386-MoanMorn 1
25
- Hoogelaandsters-2390-MoanMorn 1
26
- Hoogelaandsters-2395-MoanMorn 1
27
- Hoogelaandsters-2399-MoanMorn 1
28
- Hoogelaandsters-2403-MoanMorn 1
29
- Hoogelaandsters-2407-MoanMorn 1
30
- Hoogelaandsters-2411-MoanMorn 1
31
- Hoogelaandsters-2418-MoanMorn 1
32
- Hoogelaandsters-2422-MoanMorn 1
33
- Hoogelaandsters-2426-MoanMorn 1
34
- Hoogelaandsters-2431-MoanMorn 1
35
- Hoogelaandsters-2435-MoanMorn 1
36
- Hoogelaandsters-2440-MoanMorn 1
37
- Hoogelaandsters-2444-MoanMorn 1
38
- Hoogelaandsters-2448-MoanMorn 1
39
- Hoogelaandsters-2452-MoanMorn 1
40
- Hoogelaandsters-2456-MoanMorn 1
41
- Hoogelaandsters-2289-MoanMorn 1
42
- Hoogelaandsters-2295-MoanMorn 1
43
- Hoogelaandsters-2300-MoanMorn 1
44
- Hoogelaandsters-2304-MoanMorn 1
45
- Hoogelaandsters-2308-MoanMorn 1
46
- Hoogelaandsters-2312-MoanMorn 1
47
- Hoogelaandsters-2317-MoanMorn 1
48
- Hoogelaandsters-2321-MoanMorn 1
49
- Hoogelaandsters-2325-MoanMorn 1
50
- Hoogelaandsters-2329-MoanMorn 1
51
- Hoogelaandsters-2333-MoanMorn 1
52
- Hoogelaandsters-2337-MoanMorn 1
53
- Hoogelaandsters-2341-MoanMorn 1
54
- Hoogelaandsters-2345-MoanMorn 1
55
- Hoogelaandsters-2349-MoanMorn 1
56
- Hoogelaandsters-2353-MoanMorn 1
57
- Hoogelaandsters-2358-MoanMorn 1
58
- Hoogelaandsters-2362-MoanMorn 1
59
- Hoogelaandsters-2366-MoanMorn 1
60
- Hoogelaandsters-2370-MoanMorn 1
61
- Hoogelaandsters-2374-MoanMorn 1
62
- Hoogelaandsters-2378-MoanMorn 1
63
- Hoogelaandsters-2382-MoanMorn 1
64
- Hoogelaandsters-2387-MoanMorn 1
65
- Hoogelaandsters-2391-MoanMorn 1
66
- Hoogelaandsters-2396-MoanMorn 1
67
- Hoogelaandsters-2400-MoanMorn 1
68
- Hoogelaandsters-2404-MoanMorn 1
69
- Hoogelaandsters-2408-MoanMorn 1
70
- Hoogelaandsters-2412-MoanMorn 1
71
- Hoogelaandsters-2419-MoanMorn 1
72
- Hoogelaandsters-2423-MoanMorn 1
73
- Hoogelaandsters-2427-MoanMorn 1
74
- Hoogelaandsters-2432-MoanMorn 1
75
- Hoogelaandsters-2437-MoanMorn 1
76
- Hoogelaandsters-2441-MoanMorn 1
77
- Hoogelaandsters-2445-MoanMorn 1
78
- Hoogelaandsters-2449-MoanMorn 1
79
- Hoogelaandsters-2453-MoanMorn 1
80
- Hoogelaandsters-2457-MoanMorn 1
81
- Hoogelaandsters-2291-MoanMorn 1
82
- Hoogelaandsters-2297-MoanMorn 1
83
- Hoogelaandsters-2301-MoanMorn 1
84
- Hoogelaandsters-2305-MoanMorn 1
85
- Hoogelaandsters-2309-MoanMorn 1
86
- Hoogelaandsters-2314-MoanMorn 1
87
- Hoogelaandsters-2318-MoanMorn 1
88
- Hoogelaandsters-2322-MoanMorn 1
89
- Hoogelaandsters-2326-MoanMorn 1
90
- Hoogelaandsters-2330-MoanMorn 1
91
- Hoogelaandsters-2334-MoanMorn 1
92
- Hoogelaandsters-2338-MoanMorn 1
93
- Hoogelaandsters-2342-MoanMorn 1
94
- Hoogelaandsters-2346-MoanMorn 1
95
- Hoogelaandsters-2350-MoanMorn 1
96
- Hoogelaandsters-2354-MoanMorn 1
97
- Hoogelaandsters-2359-MoanMorn 1
98
- Hoogelaandsters-2363-MoanMorn 1
99
- Hoogelaandsters-2367-MoanMorn 1
100
- Hoogelaandsters-2371-MoanMorn 1
101
- Hoogelaandsters-2375-MoanMorn 1
102
- Hoogelaandsters-2379-MoanMorn 1
103
- Hoogelaandsters-2384-MoanMorn 1
104
- Hoogelaandsters-2388-MoanMorn 1
105
- Hoogelaandsters-2392-MoanMorn 1
106
- Hoogelaandsters-2397-MoanMorn 1
107
- Hoogelaandsters-2401-MoanMorn 1
108
- Hoogelaandsters-2405-MoanMorn 1
109
- Hoogelaandsters-2409-MoanMorn 1
110
- Hoogelaandsters-2413-MoanMorn 1
111
- Hoogelaandsters-2420-MoanMorn 1
112
- Hoogelaandsters-2424-MoanMorn 1
113
- Hoogelaandsters-2429-MoanMorn 1
114
- Hoogelaandsters-2433-MoanMorn 1
115
- Hoogelaandsters-2438-MoanMorn 1
116
- Hoogelaandsters-2442-MoanMorn 1
117
- Hoogelaandsters-2446-MoanMorn 1
118
- Hoogelaandsters-2450-MoanMorn 1
119
- Hoogelaandsters-2454-MoanMorn 1
120
- Hoogelaandsters-2459-MoanMorn 1
121
- Hoogelaandsters-2293-MoanMorn 1
122
- Hoogelaandsters-2298-MoanMorn 1
123
- Hoogelaandsters-2302-MoanMorn 1
124
- Hoogelaandsters-2306-MoanMorn 1
125
- Hoogelaandsters-2310-MoanMorn 1
126
- Hoogelaandsters-2315-MoanMorn 1
127
- Hoogelaandsters-2319-MoanMorn 1
128
- Hoogelaandsters-2323-MoanMorn 1
129
- Hoogelaandsters-2327-MoanMorn 1
130
- Hoogelaandsters-2331-MoanMorn 1
131
- Hoogelaandsters-2335-MoanMorn 1
132
- Hoogelaandsters-2339-MoanMorn 1
133
- Hoogelaandsters-2343-MoanMorn 1
134
- Hoogelaandsters-2347-MoanMorn 1
135
- Hoogelaandsters-2351-MoanMorn 1
136
- Hoogelaandsters-2355-MoanMorn 1
137
- Hoogelaandsters-2360-MoanMorn 1
138
- Hoogelaandsters-2364-MoanMorn 1
139
- Hoogelaandsters-2368-MoanMorn 1
140
- Hoogelaandsters-2372-MoanMorn 1
141
- Hoogelaandsters-2376-MoanMorn 1
142
- Hoogelaandsters-2380-MoanMorn 1
143
- Hoogelaandsters-2385-MoanMorn 1
144
- Hoogelaandsters-2389-MoanMorn 1
145
- Hoogelaandsters-2393-MoanMorn 1
146
- Hoogelaandsters-2398-MoanMorn 1
147
- Hoogelaandsters-2402-MoanMorn 1
148
- Hoogelaandsters-2406-MoanMorn 1
149
- Hoogelaandsters-2410-MoanMorn 1
150
- Hoogelaandsters-2415-MoanMorn 1
151
- Hoogelaandsters-2421-MoanMorn 1
152
- Hoogelaandsters-2425-MoanMorn 1
153
- Hoogelaandsters-2430-MoanMorn 1
154
- Hoogelaandsters-2434-MoanMorn 1
155
- Hoogelaandsters-2439-MoanMorn 1
156
- Hoogelaandsters-2443-MoanMorn 1
157
- Hoogelaandsters-2447-MoanMorn 1
158
- Hoogelaandsters-2451-MoanMorn 1
159
- Hoogelaandsters-2455-MoanMorn 1
160
- Hoogelaandsters-2460-MoanMorn 1
161
- Hoogelaandsters-2461-MoanMorn 1
162
- Hoogelaandsters-2465-MoanMorn 1
163
- Hoogelaandsters-2469-MoanMorn 1
164
- Hoogelaandsters-2473-MoanMorn 1
165
- Hoogelaandsters-2477-MoanMorn 1
166
- Hoogelaandsters-2481-MoanMorn 1
167
- Hoogelaandsters-2486-MoanMorn 1
168
- Hoogelaandsters-2490-MoanMorn 1
169
- Hoogelaandsters-2494-MoanMorn 1
170
- Hoogelaandsters-2499-MoanMorn 1
171
- Hoogelaandsters-2503-MoanMorn 1
172
- Hoogelaandsters-2507-MoanMorn 1
173
- Hoogelaandsters-2511-MoanMorn 1
174
- Hoogelaandsters-2515-MoanMorn 1
175
- Hoogelaandsters-2519-MoanMorn 1
176
- Hoogelaandsters-2523-MoanMorn 1
177
- Hoogelaandsters-2527-MoanMorn 1
178
- Hoogelaandsters-2531-MoanMorn 1
179
- Hoogelaandsters-2535-MoanMorn 1
180
- Hoogelaandsters-2540-MoanMorn 1
181
- Hoogelaandsters-2544-MoanMorn 1
182
- Hoogelaandsters-2548-MoanMorn 1
183
- Hoogelaandsters-2552-MoanMorn 1
184
- Hoogelaandsters-2462-MoanMorn 1
185
- Hoogelaandsters-2466-MoanMorn 1
186
- Hoogelaandsters-2470-MoanMorn 1
187
- Hoogelaandsters-2474-MoanMorn 1
188
- Hoogelaandsters-2478-MoanMorn 1
189
- Hoogelaandsters-2482-MoanMorn 1
190
- Hoogelaandsters-2487-MoanMorn 1
191
- Hoogelaandsters-2491-MoanMorn 1
192
- Hoogelaandsters-2495-MoanMorn 1
193
- Hoogelaandsters-2500-MoanMorn 1
194
- Hoogelaandsters-2504-MoanMorn 1
195
- Hoogelaandsters-2508-MoanMorn 1
196
- Hoogelaandsters-2512-MoanMorn 1
197
- Hoogelaandsters-2516-MoanMorn 1
198
- Hoogelaandsters-2520-MoanMorn 1
199
- Hoogelaandsters-2524-MoanMorn 1
200
- Hoogelaandsters-2528-MoanMorn 1
201
- Hoogelaandsters-2532-MoanMorn 1
202
- Hoogelaandsters-2537-MoanMorn 1
203
- Hoogelaandsters-2541-MoanMorn 1
204
- Hoogelaandsters-2545-MoanMorn 1
205
- Hoogelaandsters-2549-MoanMorn 1
206
- Hoogelaandsters-2463-MoanMorn 1
207
- Hoogelaandsters-2467-MoanMorn 1
208
- Hoogelaandsters-2471-MoanMorn 1
209
- Hoogelaandsters-2475-MoanMorn 1
210
- Hoogelaandsters-2479-MoanMorn 1
211
- Hoogelaandsters-2483-MoanMorn 1
212
- Hoogelaandsters-2488-MoanMorn 1
213
- Hoogelaandsters-2492-MoanMorn 1
214
- Hoogelaandsters-2497-MoanMorn 1
215
- Hoogelaandsters-2501-MoanMorn 1
216
- Hoogelaandsters-2505-MoanMorn 1
217
- Hoogelaandsters-2509-MoanMorn 1
218
- Hoogelaandsters-2513-MoanMorn 1
219
- Hoogelaandsters-2517-MoanMorn 1
220
- Hoogelaandsters-2521-MoanMorn 1
221
- Hoogelaandsters-2525-MoanMorn 1
222
- Hoogelaandsters-2529-MoanMorn 1
223
- Hoogelaandsters-2533-MoanMorn 1
224
- Hoogelaandsters-2538-MoanMorn 1
225
- Hoogelaandsters-2542-MoanMorn 1
226
- Hoogelaandsters-2546-MoanMorn 1
227
- Hoogelaandsters-2550-MoanMorn 1
228
- Hoogelaandsters-2464-MoanMorn 1
229
- Hoogelaandsters-2468-MoanMorn 1
230
- Hoogelaandsters-2472-MoanMorn 1
231
- Hoogelaandsters-2476-MoanMorn 1
232
- Hoogelaandsters-2480-MoanMorn 1
233
- Hoogelaandsters-2485-MoanMorn 1
234
- Hoogelaandsters-2489-MoanMorn 1
235
- Hoogelaandsters-2493-MoanMorn 1
236
- Hoogelaandsters-2498-MoanMorn 1
237
- Hoogelaandsters-2502-MoanMorn 1
238
- Hoogelaandsters-2506-MoanMorn 1
239
- Hoogelaandsters-2510-MoanMorn 1
240
- Hoogelaandsters-2514-MoanMorn 1
241
- Hoogelaandsters-2518-MoanMorn 1
242
- Hoogelaandsters-2522-MoanMorn 1
243
- Hoogelaandsters-2526-MoanMorn 1
244
- Hoogelaandsters-2530-MoanMorn 1
245
- Hoogelaandsters-2534-MoanMorn 1
246
- Hoogelaandsters-2539-MoanMorn 1
247
- Hoogelaandsters-2543-MoanMorn 1
248
- Hoogelaandsters-2547-MoanMorn 1
249
- Hoogelaandsters-2551-MoanMorn 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10/train/speech_shape DELETED
@@ -1,249 +0,0 @@
1
- Hoogelaandsters-2288-MoanMorn 117445
2
- Hoogelaandsters-2294-MoanMorn 152790
3
- Hoogelaandsters-2299-MoanMorn 95858
4
- Hoogelaandsters-2303-MoanMorn 79027
5
- Hoogelaandsters-2307-MoanMorn 97165
6
- Hoogelaandsters-2311-MoanMorn 77335
7
- Hoogelaandsters-2316-MoanMorn 115718
8
- Hoogelaandsters-2320-MoanMorn 256717
9
- Hoogelaandsters-2324-MoanMorn 62264
10
- Hoogelaandsters-2328-MoanMorn 162365
11
- Hoogelaandsters-2332-MoanMorn 244260
12
- Hoogelaandsters-2336-MoanMorn 85700
13
- Hoogelaandsters-2340-MoanMorn 64222
14
- Hoogelaandsters-2344-MoanMorn 52887
15
- Hoogelaandsters-2348-MoanMorn 167328
16
- Hoogelaandsters-2352-MoanMorn 135077
17
- Hoogelaandsters-2356-MoanMorn 134774
18
- Hoogelaandsters-2361-MoanMorn 74095
19
- Hoogelaandsters-2365-MoanMorn 90694
20
- Hoogelaandsters-2369-MoanMorn 187706
21
- Hoogelaandsters-2373-MoanMorn 204919
22
- Hoogelaandsters-2377-MoanMorn 98649
23
- Hoogelaandsters-2381-MoanMorn 128977
24
- Hoogelaandsters-2386-MoanMorn 90664
25
- Hoogelaandsters-2390-MoanMorn 323639
26
- Hoogelaandsters-2395-MoanMorn 61014
27
- Hoogelaandsters-2399-MoanMorn 270015
28
- Hoogelaandsters-2403-MoanMorn 80986
29
- Hoogelaandsters-2407-MoanMorn 120663
30
- Hoogelaandsters-2411-MoanMorn 101842
31
- Hoogelaandsters-2418-MoanMorn 172321
32
- Hoogelaandsters-2422-MoanMorn 119205
33
- Hoogelaandsters-2426-MoanMorn 41493
34
- Hoogelaandsters-2431-MoanMorn 155997
35
- Hoogelaandsters-2435-MoanMorn 149437
36
- Hoogelaandsters-2440-MoanMorn 106718
37
- Hoogelaandsters-2444-MoanMorn 223892
38
- Hoogelaandsters-2448-MoanMorn 111220
39
- Hoogelaandsters-2452-MoanMorn 210485
40
- Hoogelaandsters-2456-MoanMorn 235422
41
- Hoogelaandsters-2289-MoanMorn 270050
42
- Hoogelaandsters-2295-MoanMorn 64915
43
- Hoogelaandsters-2300-MoanMorn 234710
44
- Hoogelaandsters-2304-MoanMorn 281339
45
- Hoogelaandsters-2308-MoanMorn 51878
46
- Hoogelaandsters-2312-MoanMorn 76205
47
- Hoogelaandsters-2317-MoanMorn 256841
48
- Hoogelaandsters-2321-MoanMorn 109919
49
- Hoogelaandsters-2325-MoanMorn 56870
50
- Hoogelaandsters-2329-MoanMorn 56448
51
- Hoogelaandsters-2333-MoanMorn 114522
52
- Hoogelaandsters-2337-MoanMorn 69593
53
- Hoogelaandsters-2341-MoanMorn 124803
54
- Hoogelaandsters-2345-MoanMorn 103242
55
- Hoogelaandsters-2349-MoanMorn 193651
56
- Hoogelaandsters-2353-MoanMorn 81114
57
- Hoogelaandsters-2358-MoanMorn 95962
58
- Hoogelaandsters-2362-MoanMorn 196092
59
- Hoogelaandsters-2366-MoanMorn 124712
60
- Hoogelaandsters-2370-MoanMorn 90317
61
- Hoogelaandsters-2374-MoanMorn 147754
62
- Hoogelaandsters-2378-MoanMorn 237031
63
- Hoogelaandsters-2382-MoanMorn 143942
64
- Hoogelaandsters-2387-MoanMorn 56448
65
- Hoogelaandsters-2391-MoanMorn 69609
66
- Hoogelaandsters-2396-MoanMorn 99452
67
- Hoogelaandsters-2400-MoanMorn 78535
68
- Hoogelaandsters-2404-MoanMorn 130726
69
- Hoogelaandsters-2408-MoanMorn 193877
70
- Hoogelaandsters-2412-MoanMorn 53626
71
- Hoogelaandsters-2419-MoanMorn 316671
72
- Hoogelaandsters-2423-MoanMorn 117655
73
- Hoogelaandsters-2427-MoanMorn 319451
74
- Hoogelaandsters-2432-MoanMorn 162733
75
- Hoogelaandsters-2437-MoanMorn 107702
76
- Hoogelaandsters-2441-MoanMorn 53626
77
- Hoogelaandsters-2445-MoanMorn 115718
78
- Hoogelaandsters-2449-MoanMorn 217179
79
- Hoogelaandsters-2453-MoanMorn 66767
80
- Hoogelaandsters-2457-MoanMorn 255710
81
- Hoogelaandsters-2291-MoanMorn 31046
82
- Hoogelaandsters-2297-MoanMorn 377978
83
- Hoogelaandsters-2301-MoanMorn 141324
84
- Hoogelaandsters-2305-MoanMorn 84672
85
- Hoogelaandsters-2309-MoanMorn 79787
86
- Hoogelaandsters-2314-MoanMorn 75773
87
- Hoogelaandsters-2318-MoanMorn 160174
88
- Hoogelaandsters-2322-MoanMorn 132835
89
- Hoogelaandsters-2326-MoanMorn 101606
90
- Hoogelaandsters-2330-MoanMorn 135933
91
- Hoogelaandsters-2334-MoanMorn 78025
92
- Hoogelaandsters-2338-MoanMorn 129449
93
- Hoogelaandsters-2342-MoanMorn 38856
94
- Hoogelaandsters-2346-MoanMorn 174562
95
- Hoogelaandsters-2350-MoanMorn 128445
96
- Hoogelaandsters-2354-MoanMorn 160490
97
- Hoogelaandsters-2359-MoanMorn 228493
98
- Hoogelaandsters-2363-MoanMorn 98433
99
- Hoogelaandsters-2367-MoanMorn 56448
100
- Hoogelaandsters-2371-MoanMorn 77172
101
- Hoogelaandsters-2375-MoanMorn 126690
102
- Hoogelaandsters-2379-MoanMorn 166001
103
- Hoogelaandsters-2384-MoanMorn 414643
104
- Hoogelaandsters-2388-MoanMorn 135592
105
- Hoogelaandsters-2392-MoanMorn 199799
106
- Hoogelaandsters-2397-MoanMorn 35135
107
- Hoogelaandsters-2401-MoanMorn 189656
108
- Hoogelaandsters-2405-MoanMorn 280393
109
- Hoogelaandsters-2409-MoanMorn 222797
110
- Hoogelaandsters-2413-MoanMorn 244597
111
- Hoogelaandsters-2420-MoanMorn 112072
112
- Hoogelaandsters-2424-MoanMorn 65163
113
- Hoogelaandsters-2429-MoanMorn 73382
114
- Hoogelaandsters-2433-MoanMorn 144641
115
- Hoogelaandsters-2438-MoanMorn 132752
116
- Hoogelaandsters-2442-MoanMorn 81602
117
- Hoogelaandsters-2446-MoanMorn 178999
118
- Hoogelaandsters-2450-MoanMorn 139858
119
- Hoogelaandsters-2454-MoanMorn 297187
120
- Hoogelaandsters-2459-MoanMorn 348416
121
- Hoogelaandsters-2293-MoanMorn 173015
122
- Hoogelaandsters-2298-MoanMorn 114482
123
- Hoogelaandsters-2302-MoanMorn 129931
124
- Hoogelaandsters-2306-MoanMorn 59714
125
- Hoogelaandsters-2310-MoanMorn 98201
126
- Hoogelaandsters-2315-MoanMorn 365776
127
- Hoogelaandsters-2319-MoanMorn 118998
128
- Hoogelaandsters-2323-MoanMorn 133528
129
- Hoogelaandsters-2327-MoanMorn 137558
130
- Hoogelaandsters-2331-MoanMorn 30210
131
- Hoogelaandsters-2335-MoanMorn 147860
132
- Hoogelaandsters-2339-MoanMorn 162823
133
- Hoogelaandsters-2343-MoanMorn 124110
134
- Hoogelaandsters-2347-MoanMorn 122295
135
- Hoogelaandsters-2351-MoanMorn 69605
136
- Hoogelaandsters-2355-MoanMorn 117925
137
- Hoogelaandsters-2360-MoanMorn 79325
138
- Hoogelaandsters-2364-MoanMorn 155437
139
- Hoogelaandsters-2368-MoanMorn 347030
140
- Hoogelaandsters-2372-MoanMorn 116698
141
- Hoogelaandsters-2376-MoanMorn 127143
142
- Hoogelaandsters-2380-MoanMorn 214235
143
- Hoogelaandsters-2385-MoanMorn 114241
144
- Hoogelaandsters-2389-MoanMorn 136878
145
- Hoogelaandsters-2393-MoanMorn 110685
146
- Hoogelaandsters-2398-MoanMorn 148220
147
- Hoogelaandsters-2402-MoanMorn 189724
148
- Hoogelaandsters-2406-MoanMorn 142468
149
- Hoogelaandsters-2410-MoanMorn 80968
150
- Hoogelaandsters-2415-MoanMorn 92540
151
- Hoogelaandsters-2421-MoanMorn 117050
152
- Hoogelaandsters-2425-MoanMorn 91659
153
- Hoogelaandsters-2430-MoanMorn 109139
154
- Hoogelaandsters-2434-MoanMorn 150905
155
- Hoogelaandsters-2439-MoanMorn 89319
156
- Hoogelaandsters-2443-MoanMorn 151690
157
- Hoogelaandsters-2447-MoanMorn 191388
158
- Hoogelaandsters-2451-MoanMorn 256575
159
- Hoogelaandsters-2455-MoanMorn 143121
160
- Hoogelaandsters-2460-MoanMorn 148964
161
- Hoogelaandsters-2461-MoanMorn 202459
162
- Hoogelaandsters-2465-MoanMorn 77353
163
- Hoogelaandsters-2469-MoanMorn 202205
164
- Hoogelaandsters-2473-MoanMorn 144494
165
- Hoogelaandsters-2477-MoanMorn 261203
166
- Hoogelaandsters-2481-MoanMorn 55523
167
- Hoogelaandsters-2486-MoanMorn 154579
168
- Hoogelaandsters-2490-MoanMorn 189951
169
- Hoogelaandsters-2494-MoanMorn 322452
170
- Hoogelaandsters-2499-MoanMorn 80295
171
- Hoogelaandsters-2503-MoanMorn 49367
172
- Hoogelaandsters-2507-MoanMorn 150136
173
- Hoogelaandsters-2511-MoanMorn 154426
174
- Hoogelaandsters-2515-MoanMorn 72940
175
- Hoogelaandsters-2519-MoanMorn 117693
176
- Hoogelaandsters-2523-MoanMorn 65506
177
- Hoogelaandsters-2527-MoanMorn 186486
178
- Hoogelaandsters-2531-MoanMorn 66332
179
- Hoogelaandsters-2535-MoanMorn 59270
180
- Hoogelaandsters-2540-MoanMorn 92545
181
- Hoogelaandsters-2544-MoanMorn 74985
182
- Hoogelaandsters-2548-MoanMorn 187809
183
- Hoogelaandsters-2552-MoanMorn 165658
184
- Hoogelaandsters-2462-MoanMorn 151515
185
- Hoogelaandsters-2466-MoanMorn 75367
186
- Hoogelaandsters-2470-MoanMorn 59270
187
- Hoogelaandsters-2474-MoanMorn 76195
188
- Hoogelaandsters-2478-MoanMorn 97331
189
- Hoogelaandsters-2482-MoanMorn 55406
190
- Hoogelaandsters-2487-MoanMorn 267130
191
- Hoogelaandsters-2491-MoanMorn 197159
192
- Hoogelaandsters-2495-MoanMorn 115714
193
- Hoogelaandsters-2500-MoanMorn 115155
194
- Hoogelaandsters-2504-MoanMorn 122315
195
- Hoogelaandsters-2508-MoanMorn 173640
196
- Hoogelaandsters-2512-MoanMorn 387092
197
- Hoogelaandsters-2516-MoanMorn 56448
198
- Hoogelaandsters-2520-MoanMorn 271221
199
- Hoogelaandsters-2524-MoanMorn 206196
200
- Hoogelaandsters-2528-MoanMorn 216440
201
- Hoogelaandsters-2532-MoanMorn 102710
202
- Hoogelaandsters-2537-MoanMorn 426213
203
- Hoogelaandsters-2541-MoanMorn 119456
204
- Hoogelaandsters-2545-MoanMorn 107043
205
- Hoogelaandsters-2549-MoanMorn 164150
206
- Hoogelaandsters-2463-MoanMorn 50962
207
- Hoogelaandsters-2467-MoanMorn 122984
208
- Hoogelaandsters-2471-MoanMorn 73212
209
- Hoogelaandsters-2475-MoanMorn 71040
210
- Hoogelaandsters-2479-MoanMorn 79027
211
- Hoogelaandsters-2483-MoanMorn 33869
212
- Hoogelaandsters-2488-MoanMorn 61981
213
- Hoogelaandsters-2492-MoanMorn 341502
214
- Hoogelaandsters-2497-MoanMorn 150345
215
- Hoogelaandsters-2501-MoanMorn 134396
216
- Hoogelaandsters-2505-MoanMorn 151752
217
- Hoogelaandsters-2509-MoanMorn 127099
218
- Hoogelaandsters-2513-MoanMorn 124822
219
- Hoogelaandsters-2517-MoanMorn 90045
220
- Hoogelaandsters-2521-MoanMorn 73110
221
- Hoogelaandsters-2525-MoanMorn 70279
222
- Hoogelaandsters-2529-MoanMorn 188522
223
- Hoogelaandsters-2533-MoanMorn 158311
224
- Hoogelaandsters-2538-MoanMorn 136310
225
- Hoogelaandsters-2542-MoanMorn 253487
226
- Hoogelaandsters-2546-MoanMorn 67738
227
- Hoogelaandsters-2550-MoanMorn 115291
228
- Hoogelaandsters-2464-MoanMorn 93139
229
- Hoogelaandsters-2468-MoanMorn 244448
230
- Hoogelaandsters-2472-MoanMorn 111901
231
- Hoogelaandsters-2476-MoanMorn 129484
232
- Hoogelaandsters-2480-MoanMorn 122230
233
- Hoogelaandsters-2485-MoanMorn 63917
234
- Hoogelaandsters-2489-MoanMorn 146083
235
- Hoogelaandsters-2493-MoanMorn 79573
236
- Hoogelaandsters-2498-MoanMorn 116144
237
- Hoogelaandsters-2502-MoanMorn 126772
238
- Hoogelaandsters-2506-MoanMorn 84358
239
- Hoogelaandsters-2510-MoanMorn 258288
240
- Hoogelaandsters-2514-MoanMorn 218302
241
- Hoogelaandsters-2518-MoanMorn 387837
242
- Hoogelaandsters-2522-MoanMorn 67878
243
- Hoogelaandsters-2526-MoanMorn 139164
244
- Hoogelaandsters-2530-MoanMorn 150036
245
- Hoogelaandsters-2534-MoanMorn 163176
246
- Hoogelaandsters-2539-MoanMorn 115911
247
- Hoogelaandsters-2543-MoanMorn 387694
248
- Hoogelaandsters-2547-MoanMorn 160101
249
- Hoogelaandsters-2551-MoanMorn 178543
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10/train/stats_keys DELETED
@@ -1,2 +0,0 @@
1
- feats
2
- feats_lengths
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10/train/text_shape DELETED
@@ -1,249 +0,0 @@
1
- Hoogelaandsters-2288-MoanMorn 68
2
- Hoogelaandsters-2294-MoanMorn 95
3
- Hoogelaandsters-2299-MoanMorn 42
4
- Hoogelaandsters-2303-MoanMorn 54
5
- Hoogelaandsters-2307-MoanMorn 55
6
- Hoogelaandsters-2311-MoanMorn 42
7
- Hoogelaandsters-2316-MoanMorn 81
8
- Hoogelaandsters-2320-MoanMorn 149
9
- Hoogelaandsters-2324-MoanMorn 41
10
- Hoogelaandsters-2328-MoanMorn 104
11
- Hoogelaandsters-2332-MoanMorn 162
12
- Hoogelaandsters-2336-MoanMorn 53
13
- Hoogelaandsters-2340-MoanMorn 34
14
- Hoogelaandsters-2344-MoanMorn 23
15
- Hoogelaandsters-2348-MoanMorn 98
16
- Hoogelaandsters-2352-MoanMorn 90
17
- Hoogelaandsters-2356-MoanMorn 86
18
- Hoogelaandsters-2361-MoanMorn 51
19
- Hoogelaandsters-2365-MoanMorn 45
20
- Hoogelaandsters-2369-MoanMorn 119
21
- Hoogelaandsters-2373-MoanMorn 129
22
- Hoogelaandsters-2377-MoanMorn 50
23
- Hoogelaandsters-2381-MoanMorn 74
24
- Hoogelaandsters-2386-MoanMorn 57
25
- Hoogelaandsters-2390-MoanMorn 185
26
- Hoogelaandsters-2395-MoanMorn 30
27
- Hoogelaandsters-2399-MoanMorn 178
28
- Hoogelaandsters-2403-MoanMorn 55
29
- Hoogelaandsters-2407-MoanMorn 62
30
- Hoogelaandsters-2411-MoanMorn 52
31
- Hoogelaandsters-2418-MoanMorn 103
32
- Hoogelaandsters-2422-MoanMorn 63
33
- Hoogelaandsters-2426-MoanMorn 21
34
- Hoogelaandsters-2431-MoanMorn 90
35
- Hoogelaandsters-2435-MoanMorn 97
36
- Hoogelaandsters-2440-MoanMorn 63
37
- Hoogelaandsters-2444-MoanMorn 144
38
- Hoogelaandsters-2448-MoanMorn 62
39
- Hoogelaandsters-2452-MoanMorn 128
40
- Hoogelaandsters-2456-MoanMorn 165
41
- Hoogelaandsters-2289-MoanMorn 177
42
- Hoogelaandsters-2295-MoanMorn 45
43
- Hoogelaandsters-2300-MoanMorn 137
44
- Hoogelaandsters-2304-MoanMorn 175
45
- Hoogelaandsters-2308-MoanMorn 27
46
- Hoogelaandsters-2312-MoanMorn 48
47
- Hoogelaandsters-2317-MoanMorn 133
48
- Hoogelaandsters-2321-MoanMorn 73
49
- Hoogelaandsters-2325-MoanMorn 48
50
- Hoogelaandsters-2329-MoanMorn 31
51
- Hoogelaandsters-2333-MoanMorn 74
52
- Hoogelaandsters-2337-MoanMorn 32
53
- Hoogelaandsters-2341-MoanMorn 80
54
- Hoogelaandsters-2345-MoanMorn 62
55
- Hoogelaandsters-2349-MoanMorn 104
56
- Hoogelaandsters-2353-MoanMorn 45
57
- Hoogelaandsters-2358-MoanMorn 46
58
- Hoogelaandsters-2362-MoanMorn 119
59
- Hoogelaandsters-2366-MoanMorn 69
60
- Hoogelaandsters-2370-MoanMorn 69
61
- Hoogelaandsters-2374-MoanMorn 84
62
- Hoogelaandsters-2378-MoanMorn 134
63
- Hoogelaandsters-2382-MoanMorn 93
64
- Hoogelaandsters-2387-MoanMorn 48
65
- Hoogelaandsters-2391-MoanMorn 34
66
- Hoogelaandsters-2396-MoanMorn 51
67
- Hoogelaandsters-2400-MoanMorn 44
68
- Hoogelaandsters-2404-MoanMorn 75
69
- Hoogelaandsters-2408-MoanMorn 112
70
- Hoogelaandsters-2412-MoanMorn 25
71
- Hoogelaandsters-2419-MoanMorn 186
72
- Hoogelaandsters-2423-MoanMorn 60
73
- Hoogelaandsters-2427-MoanMorn 210
74
- Hoogelaandsters-2432-MoanMorn 105
75
- Hoogelaandsters-2437-MoanMorn 65
76
- Hoogelaandsters-2441-MoanMorn 35
77
- Hoogelaandsters-2445-MoanMorn 76
78
- Hoogelaandsters-2449-MoanMorn 125
79
- Hoogelaandsters-2453-MoanMorn 33
80
- Hoogelaandsters-2457-MoanMorn 156
81
- Hoogelaandsters-2291-MoanMorn 22
82
- Hoogelaandsters-2297-MoanMorn 202
83
- Hoogelaandsters-2301-MoanMorn 75
84
- Hoogelaandsters-2305-MoanMorn 47
85
- Hoogelaandsters-2309-MoanMorn 43
86
- Hoogelaandsters-2314-MoanMorn 32
87
- Hoogelaandsters-2318-MoanMorn 101
88
- Hoogelaandsters-2322-MoanMorn 61
89
- Hoogelaandsters-2326-MoanMorn 63
90
- Hoogelaandsters-2330-MoanMorn 74
91
- Hoogelaandsters-2334-MoanMorn 50
92
- Hoogelaandsters-2338-MoanMorn 74
93
- Hoogelaandsters-2342-MoanMorn 24
94
- Hoogelaandsters-2346-MoanMorn 108
95
- Hoogelaandsters-2350-MoanMorn 82
96
- Hoogelaandsters-2354-MoanMorn 89
97
- Hoogelaandsters-2359-MoanMorn 141
98
- Hoogelaandsters-2363-MoanMorn 63
99
- Hoogelaandsters-2367-MoanMorn 36
100
- Hoogelaandsters-2371-MoanMorn 46
101
- Hoogelaandsters-2375-MoanMorn 89
102
- Hoogelaandsters-2379-MoanMorn 106
103
- Hoogelaandsters-2384-MoanMorn 301
104
- Hoogelaandsters-2388-MoanMorn 80
105
- Hoogelaandsters-2392-MoanMorn 110
106
- Hoogelaandsters-2397-MoanMorn 18
107
- Hoogelaandsters-2401-MoanMorn 102
108
- Hoogelaandsters-2405-MoanMorn 199
109
- Hoogelaandsters-2409-MoanMorn 103
110
- Hoogelaandsters-2413-MoanMorn 147
111
- Hoogelaandsters-2420-MoanMorn 65
112
- Hoogelaandsters-2424-MoanMorn 38
113
- Hoogelaandsters-2429-MoanMorn 57
114
- Hoogelaandsters-2433-MoanMorn 75
115
- Hoogelaandsters-2438-MoanMorn 76
116
- Hoogelaandsters-2442-MoanMorn 42
117
- Hoogelaandsters-2446-MoanMorn 106
118
- Hoogelaandsters-2450-MoanMorn 97
119
- Hoogelaandsters-2454-MoanMorn 183
120
- Hoogelaandsters-2459-MoanMorn 224
121
- Hoogelaandsters-2293-MoanMorn 88
122
- Hoogelaandsters-2298-MoanMorn 65
123
- Hoogelaandsters-2302-MoanMorn 86
124
- Hoogelaandsters-2306-MoanMorn 29
125
- Hoogelaandsters-2310-MoanMorn 66
126
- Hoogelaandsters-2315-MoanMorn 210
127
- Hoogelaandsters-2319-MoanMorn 78
128
- Hoogelaandsters-2323-MoanMorn 71
129
- Hoogelaandsters-2327-MoanMorn 86
130
- Hoogelaandsters-2331-MoanMorn 21
131
- Hoogelaandsters-2335-MoanMorn 91
132
- Hoogelaandsters-2339-MoanMorn 109
133
- Hoogelaandsters-2343-MoanMorn 65
134
- Hoogelaandsters-2347-MoanMorn 70
135
- Hoogelaandsters-2351-MoanMorn 42
136
- Hoogelaandsters-2355-MoanMorn 64
137
- Hoogelaandsters-2360-MoanMorn 50
138
- Hoogelaandsters-2364-MoanMorn 267
139
- Hoogelaandsters-2368-MoanMorn 216
140
- Hoogelaandsters-2372-MoanMorn 66
141
- Hoogelaandsters-2376-MoanMorn 68
142
- Hoogelaandsters-2380-MoanMorn 136
143
- Hoogelaandsters-2385-MoanMorn 61
144
- Hoogelaandsters-2389-MoanMorn 82
145
- Hoogelaandsters-2393-MoanMorn 82
146
- Hoogelaandsters-2398-MoanMorn 97
147
- Hoogelaandsters-2402-MoanMorn 135
148
- Hoogelaandsters-2406-MoanMorn 80
149
- Hoogelaandsters-2410-MoanMorn 48
150
- Hoogelaandsters-2415-MoanMorn 69
151
- Hoogelaandsters-2421-MoanMorn 64
152
- Hoogelaandsters-2425-MoanMorn 50
153
- Hoogelaandsters-2430-MoanMorn 60
154
- Hoogelaandsters-2434-MoanMorn 94
155
- Hoogelaandsters-2439-MoanMorn 56
156
- Hoogelaandsters-2443-MoanMorn 80
157
- Hoogelaandsters-2447-MoanMorn 130
158
- Hoogelaandsters-2451-MoanMorn 163
159
- Hoogelaandsters-2455-MoanMorn 84
160
- Hoogelaandsters-2460-MoanMorn 97
161
- Hoogelaandsters-2461-MoanMorn 126
162
- Hoogelaandsters-2465-MoanMorn 36
163
- Hoogelaandsters-2469-MoanMorn 117
164
- Hoogelaandsters-2473-MoanMorn 79
165
- Hoogelaandsters-2477-MoanMorn 171
166
- Hoogelaandsters-2481-MoanMorn 30
167
- Hoogelaandsters-2486-MoanMorn 84
168
- Hoogelaandsters-2490-MoanMorn 135
169
- Hoogelaandsters-2494-MoanMorn 164
170
- Hoogelaandsters-2499-MoanMorn 53
171
- Hoogelaandsters-2503-MoanMorn 16
172
- Hoogelaandsters-2507-MoanMorn 82
173
- Hoogelaandsters-2511-MoanMorn 91
174
- Hoogelaandsters-2515-MoanMorn 49
175
- Hoogelaandsters-2519-MoanMorn 70
176
- Hoogelaandsters-2523-MoanMorn 37
177
- Hoogelaandsters-2527-MoanMorn 116
178
- Hoogelaandsters-2531-MoanMorn 32
179
- Hoogelaandsters-2535-MoanMorn 43
180
- Hoogelaandsters-2540-MoanMorn 74
181
- Hoogelaandsters-2544-MoanMorn 49
182
- Hoogelaandsters-2548-MoanMorn 121
183
- Hoogelaandsters-2552-MoanMorn 92
184
- Hoogelaandsters-2462-MoanMorn 103
185
- Hoogelaandsters-2466-MoanMorn 38
186
- Hoogelaandsters-2470-MoanMorn 24
187
- Hoogelaandsters-2474-MoanMorn 33
188
- Hoogelaandsters-2478-MoanMorn 43
189
- Hoogelaandsters-2482-MoanMorn 18
190
- Hoogelaandsters-2487-MoanMorn 175
191
- Hoogelaandsters-2491-MoanMorn 115
192
- Hoogelaandsters-2495-MoanMorn 77
193
- Hoogelaandsters-2500-MoanMorn 66
194
- Hoogelaandsters-2504-MoanMorn 65
195
- Hoogelaandsters-2508-MoanMorn 91
196
- Hoogelaandsters-2512-MoanMorn 230
197
- Hoogelaandsters-2516-MoanMorn 40
198
- Hoogelaandsters-2520-MoanMorn 168
199
- Hoogelaandsters-2524-MoanMorn 134
200
- Hoogelaandsters-2528-MoanMorn 154
201
- Hoogelaandsters-2532-MoanMorn 60
202
- Hoogelaandsters-2537-MoanMorn 292
203
- Hoogelaandsters-2541-MoanMorn 71
204
- Hoogelaandsters-2545-MoanMorn 62
205
- Hoogelaandsters-2549-MoanMorn 123
206
- Hoogelaandsters-2463-MoanMorn 250
207
- Hoogelaandsters-2467-MoanMorn 63
208
- Hoogelaandsters-2471-MoanMorn 51
209
- Hoogelaandsters-2475-MoanMorn 59
210
- Hoogelaandsters-2479-MoanMorn 47
211
- Hoogelaandsters-2483-MoanMorn 9
212
- Hoogelaandsters-2488-MoanMorn 33
213
- Hoogelaandsters-2492-MoanMorn 173
214
- Hoogelaandsters-2497-MoanMorn 95
215
- Hoogelaandsters-2501-MoanMorn 66
216
- Hoogelaandsters-2505-MoanMorn 89
217
- Hoogelaandsters-2509-MoanMorn 82
218
- Hoogelaandsters-2513-MoanMorn 68
219
- Hoogelaandsters-2517-MoanMorn 54
220
- Hoogelaandsters-2521-MoanMorn 44
221
- Hoogelaandsters-2525-MoanMorn 39
222
- Hoogelaandsters-2529-MoanMorn 116
223
- Hoogelaandsters-2533-MoanMorn 106
224
- Hoogelaandsters-2538-MoanMorn 71
225
- Hoogelaandsters-2542-MoanMorn 162
226
- Hoogelaandsters-2546-MoanMorn 63
227
- Hoogelaandsters-2550-MoanMorn 56
228
- Hoogelaandsters-2464-MoanMorn 59
229
- Hoogelaandsters-2468-MoanMorn 146
230
- Hoogelaandsters-2472-MoanMorn 76
231
- Hoogelaandsters-2476-MoanMorn 74
232
- Hoogelaandsters-2480-MoanMorn 76
233
- Hoogelaandsters-2485-MoanMorn 49
234
- Hoogelaandsters-2489-MoanMorn 90
235
- Hoogelaandsters-2493-MoanMorn 36
236
- Hoogelaandsters-2498-MoanMorn 67
237
- Hoogelaandsters-2502-MoanMorn 71
238
- Hoogelaandsters-2506-MoanMorn 44
239
- Hoogelaandsters-2510-MoanMorn 134
240
- Hoogelaandsters-2514-MoanMorn 139
241
- Hoogelaandsters-2518-MoanMorn 217
242
- Hoogelaandsters-2522-MoanMorn 44
243
- Hoogelaandsters-2526-MoanMorn 102
244
- Hoogelaandsters-2530-MoanMorn 96
245
- Hoogelaandsters-2534-MoanMorn 92
246
- Hoogelaandsters-2539-MoanMorn 86
247
- Hoogelaandsters-2543-MoanMorn 249
248
- Hoogelaandsters-2547-MoanMorn 102
249
- Hoogelaandsters-2551-MoanMorn 97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10/valid/batch_keys DELETED
@@ -1,3 +0,0 @@
1
- text
2
- speech
3
- sids
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10/valid/feats_lengths_stats.npz DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:15ffdfea3d7f1876d6a022d37c05b82e4b1fce8aefbb321bd0cddd482ac406c8
3
- size 778
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10/valid/feats_stats.npz DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb9a150225e097511ae2eaa906b3c3fe455256237be5df75b824e8c2ae27d3e5
3
- size 1402
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10/valid/sids_shape DELETED
@@ -1,5 +0,0 @@
1
- Hoogelaandsters-3047-MoanMorn 1
2
- Hoogelaandsters-3322-OpBuus 1
3
- Hoogelaandsters-3115-MoanMorn 1
4
- Hoogelaandsters-3184-MoanMorn 1
5
- Hoogelaandsters-3251-MoanMorn 1
 
 
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10/valid/speech_shape DELETED
@@ -1,5 +0,0 @@
1
- Hoogelaandsters-3047-MoanMorn 152884
2
- Hoogelaandsters-3322-OpBuus 41382
3
- Hoogelaandsters-3115-MoanMorn 54997
4
- Hoogelaandsters-3184-MoanMorn 112079
5
- Hoogelaandsters-3251-MoanMorn 176275
 
 
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10/valid/stats_keys DELETED
@@ -1,2 +0,0 @@
1
- feats
2
- feats_lengths
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.10/valid/text_shape DELETED
@@ -1,5 +0,0 @@
1
- Hoogelaandsters-3047-MoanMorn 84
2
- Hoogelaandsters-3322-OpBuus 21
3
- Hoogelaandsters-3115-MoanMorn 32
4
- Hoogelaandsters-3184-MoanMorn 60
5
- Hoogelaandsters-3251-MoanMorn 113
 
 
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11.log DELETED
@@ -1,1152 +0,0 @@
1
- # python3 -m espnet2.bin.gan_tts_train --collect_stats true --write_collected_feats false --use_preprocessor true --token_type char --token_list dump/token_list/char/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/train_nodev/text,text,text --train_data_path_and_name_and_type dump/raw/train_nodev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/train_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/train_dev/wav.scp,speech,sound --train_shape_file exp-vits-lr-3e-4/tts_stats_raw_char/logdir/train.11.scp --valid_shape_file exp-vits-lr-3e-4/tts_stats_raw_char/logdir/valid.11.scp --output_dir exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11 --config conf/train_vits.yaml --feats_extract fbank --feats_extract_conf n_fft=1024 --feats_extract_conf hop_length=256 --feats_extract_conf win_length=null --feats_extract_conf fs=22050 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=22050 --pitch_extract_conf n_fft=1024 --pitch_extract_conf hop_length=256 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=22050 --energy_extract_conf n_fft=1024 --energy_extract_conf hop_length=256 --energy_extract_conf win_length=null --train_data_path_and_name_and_type dump/raw/train_nodev/utt2sid,sids,text_int --valid_data_path_and_name_and_type dump/raw/train_dev/utt2sid,sids,text_int --use_wandb true --wandb_project GROTTS --wandb_name VITS_lr_3.0e-4 --init_param downloads/espnet/kan-bayashi_ljspeech_vits/exp/tts_train_vits_raw_phn_tacotron_g2p_en_no_space/train.total_count.ave_10best.pth:tts:tts:tts.generator.text_encoder,tts.generator.posterior_encoder.input_conv --batch_size 40 --batch_bins 10000000
2
- # Started at Fri Dec 1 15:58:34 UTC 2023
3
- #
4
- /data2/p280965/tts/espnet/tools/venv/bin/python3 /data2/p280965/tts/espnet/espnet2/bin/gan_tts_train.py --collect_stats true --write_collected_feats false --use_preprocessor true --token_type char --token_list dump/token_list/char/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/train_nodev/text,text,text --train_data_path_and_name_and_type dump/raw/train_nodev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/train_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/train_dev/wav.scp,speech,sound --train_shape_file exp-vits-lr-3e-4/tts_stats_raw_char/logdir/train.11.scp --valid_shape_file exp-vits-lr-3e-4/tts_stats_raw_char/logdir/valid.11.scp --output_dir exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11 --config conf/train_vits.yaml --feats_extract fbank --feats_extract_conf n_fft=1024 --feats_extract_conf hop_length=256 --feats_extract_conf win_length=null --feats_extract_conf fs=22050 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=22050 --pitch_extract_conf n_fft=1024 --pitch_extract_conf hop_length=256 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=22050 --energy_extract_conf n_fft=1024 --energy_extract_conf hop_length=256 --energy_extract_conf win_length=null --train_data_path_and_name_and_type dump/raw/train_nodev/utt2sid,sids,text_int --valid_data_path_and_name_and_type dump/raw/train_dev/utt2sid,sids,text_int --use_wandb true --wandb_project GROTTS --wandb_name VITS_lr_3.0e-4 --init_param downloads/espnet/kan-bayashi_ljspeech_vits/exp/tts_train_vits_raw_phn_tacotron_g2p_en_no_space/train.total_count.ave_10best.pth:tts:tts:tts.generator.text_encoder,tts.generator.posterior_encoder.input_conv --batch_size 40 --batch_bins 10000000
5
- [wieling-3-a100] 2023-12-01 15:58:40,493 (gan_tts:293) INFO: Vocabulary size: 46
6
- [wieling-3-a100] 2023-12-01 15:58:40,627 (encoder:174) INFO: encoder self-attention layer type = relative self-attention
7
- /data2/p280965/tts/espnet/tools/venv/lib/python3.9/site-packages/torch/nn/utils/weight_norm.py:30: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.
8
- warnings.warn("torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.")
9
- /data2/p280965/tts/espnet/espnet2/gan_tts/vits/monotonic_align/__init__.py:19: UserWarning: Cython version is not available. Fallback to 'EXPERIMETAL' numba version. If you want to use the cython version, please build it as follows: `cd espnet2/gan_tts/vits/monotonic_align; python setup.py build_ext --inplace`
10
- warnings.warn(
11
- [wieling-3-a100] 2023-12-01 15:58:41,832 (abs_task:1268) INFO: pytorch.version=2.1.0+cu121, cuda.available=True, cudnn.version=8902, cudnn.benchmark=False, cudnn.deterministic=False
12
- [wieling-3-a100] 2023-12-01 15:58:41,847 (abs_task:1269) INFO: Model structure:
13
- ESPnetGANTTSModel(
14
- (feats_extract): LogMelFbank(
15
- (stft): Stft(n_fft=1024, win_length=1024, hop_length=256, center=True, normalized=False, onesided=True)
16
- (logmel): LogMel(sr=22050, n_fft=1024, n_mels=80, fmin=80, fmax=7600, htk=False)
17
- )
18
- (tts): VITS(
19
- (generator): VITSGenerator(
20
- (text_encoder): TextEncoder(
21
- (emb): Embedding(46, 192)
22
- (encoder): Encoder(
23
- (embed): Sequential(
24
- (0): RelPositionalEncoding(
25
- (dropout): Dropout(p=0.0, inplace=False)
26
- )
27
- )
28
- (encoders): MultiSequential(
29
- (0): EncoderLayer(
30
- (self_attn): RelPositionMultiHeadedAttention(
31
- (linear_q): Linear(in_features=192, out_features=192, bias=True)
32
- (linear_k): Linear(in_features=192, out_features=192, bias=True)
33
- (linear_v): Linear(in_features=192, out_features=192, bias=True)
34
- (linear_out): Linear(in_features=192, out_features=192, bias=True)
35
- (dropout): Dropout(p=0.1, inplace=False)
36
- (linear_pos): Linear(in_features=192, out_features=192, bias=False)
37
- )
38
- (feed_forward): MultiLayeredConv1d(
39
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
40
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
41
- (dropout): Dropout(p=0.1, inplace=False)
42
- )
43
- (feed_forward_macaron): MultiLayeredConv1d(
44
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
45
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
46
- (dropout): Dropout(p=0.1, inplace=False)
47
- )
48
- (norm_ff): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
49
- (norm_mha): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
50
- (norm_ff_macaron): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
51
- (dropout): Dropout(p=0.1, inplace=False)
52
- )
53
- (1): EncoderLayer(
54
- (self_attn): RelPositionMultiHeadedAttention(
55
- (linear_q): Linear(in_features=192, out_features=192, bias=True)
56
- (linear_k): Linear(in_features=192, out_features=192, bias=True)
57
- (linear_v): Linear(in_features=192, out_features=192, bias=True)
58
- (linear_out): Linear(in_features=192, out_features=192, bias=True)
59
- (dropout): Dropout(p=0.1, inplace=False)
60
- (linear_pos): Linear(in_features=192, out_features=192, bias=False)
61
- )
62
- (feed_forward): MultiLayeredConv1d(
63
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
64
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
65
- (dropout): Dropout(p=0.1, inplace=False)
66
- )
67
- (feed_forward_macaron): MultiLayeredConv1d(
68
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
69
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
70
- (dropout): Dropout(p=0.1, inplace=False)
71
- )
72
- (norm_ff): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
73
- (norm_mha): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
74
- (norm_ff_macaron): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
75
- (dropout): Dropout(p=0.1, inplace=False)
76
- )
77
- (2): EncoderLayer(
78
- (self_attn): RelPositionMultiHeadedAttention(
79
- (linear_q): Linear(in_features=192, out_features=192, bias=True)
80
- (linear_k): Linear(in_features=192, out_features=192, bias=True)
81
- (linear_v): Linear(in_features=192, out_features=192, bias=True)
82
- (linear_out): Linear(in_features=192, out_features=192, bias=True)
83
- (dropout): Dropout(p=0.1, inplace=False)
84
- (linear_pos): Linear(in_features=192, out_features=192, bias=False)
85
- )
86
- (feed_forward): MultiLayeredConv1d(
87
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
88
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
89
- (dropout): Dropout(p=0.1, inplace=False)
90
- )
91
- (feed_forward_macaron): MultiLayeredConv1d(
92
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
93
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
94
- (dropout): Dropout(p=0.1, inplace=False)
95
- )
96
- (norm_ff): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
97
- (norm_mha): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
98
- (norm_ff_macaron): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
99
- (dropout): Dropout(p=0.1, inplace=False)
100
- )
101
- (3): EncoderLayer(
102
- (self_attn): RelPositionMultiHeadedAttention(
103
- (linear_q): Linear(in_features=192, out_features=192, bias=True)
104
- (linear_k): Linear(in_features=192, out_features=192, bias=True)
105
- (linear_v): Linear(in_features=192, out_features=192, bias=True)
106
- (linear_out): Linear(in_features=192, out_features=192, bias=True)
107
- (dropout): Dropout(p=0.1, inplace=False)
108
- (linear_pos): Linear(in_features=192, out_features=192, bias=False)
109
- )
110
- (feed_forward): MultiLayeredConv1d(
111
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
112
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
113
- (dropout): Dropout(p=0.1, inplace=False)
114
- )
115
- (feed_forward_macaron): MultiLayeredConv1d(
116
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
117
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
118
- (dropout): Dropout(p=0.1, inplace=False)
119
- )
120
- (norm_ff): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
121
- (norm_mha): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
122
- (norm_ff_macaron): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
123
- (dropout): Dropout(p=0.1, inplace=False)
124
- )
125
- (4): EncoderLayer(
126
- (self_attn): RelPositionMultiHeadedAttention(
127
- (linear_q): Linear(in_features=192, out_features=192, bias=True)
128
- (linear_k): Linear(in_features=192, out_features=192, bias=True)
129
- (linear_v): Linear(in_features=192, out_features=192, bias=True)
130
- (linear_out): Linear(in_features=192, out_features=192, bias=True)
131
- (dropout): Dropout(p=0.1, inplace=False)
132
- (linear_pos): Linear(in_features=192, out_features=192, bias=False)
133
- )
134
- (feed_forward): MultiLayeredConv1d(
135
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
136
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
137
- (dropout): Dropout(p=0.1, inplace=False)
138
- )
139
- (feed_forward_macaron): MultiLayeredConv1d(
140
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
141
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
142
- (dropout): Dropout(p=0.1, inplace=False)
143
- )
144
- (norm_ff): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
145
- (norm_mha): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
146
- (norm_ff_macaron): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
147
- (dropout): Dropout(p=0.1, inplace=False)
148
- )
149
- (5): EncoderLayer(
150
- (self_attn): RelPositionMultiHeadedAttention(
151
- (linear_q): Linear(in_features=192, out_features=192, bias=True)
152
- (linear_k): Linear(in_features=192, out_features=192, bias=True)
153
- (linear_v): Linear(in_features=192, out_features=192, bias=True)
154
- (linear_out): Linear(in_features=192, out_features=192, bias=True)
155
- (dropout): Dropout(p=0.1, inplace=False)
156
- (linear_pos): Linear(in_features=192, out_features=192, bias=False)
157
- )
158
- (feed_forward): MultiLayeredConv1d(
159
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
160
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
161
- (dropout): Dropout(p=0.1, inplace=False)
162
- )
163
- (feed_forward_macaron): MultiLayeredConv1d(
164
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
165
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
166
- (dropout): Dropout(p=0.1, inplace=False)
167
- )
168
- (norm_ff): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
169
- (norm_mha): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
170
- (norm_ff_macaron): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
171
- (dropout): Dropout(p=0.1, inplace=False)
172
- )
173
- )
174
- (after_norm): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
175
- )
176
- (proj): Conv1d(192, 384, kernel_size=(1,), stride=(1,))
177
- )
178
- (decoder): HiFiGANGenerator(
179
- (input_conv): Conv1d(192, 512, kernel_size=(7,), stride=(1,), padding=(3,))
180
- (upsamples): ModuleList(
181
- (0): Sequential(
182
- (0): LeakyReLU(negative_slope=0.1)
183
- (1): ConvTranspose1d(512, 256, kernel_size=(16,), stride=(8,), padding=(4,))
184
- )
185
- (1): Sequential(
186
- (0): LeakyReLU(negative_slope=0.1)
187
- (1): ConvTranspose1d(256, 128, kernel_size=(16,), stride=(8,), padding=(4,))
188
- )
189
- (2): Sequential(
190
- (0): LeakyReLU(negative_slope=0.1)
191
- (1): ConvTranspose1d(128, 64, kernel_size=(4,), stride=(2,), padding=(1,))
192
- )
193
- (3): Sequential(
194
- (0): LeakyReLU(negative_slope=0.1)
195
- (1): ConvTranspose1d(64, 32, kernel_size=(4,), stride=(2,), padding=(1,))
196
- )
197
- )
198
- (blocks): ModuleList(
199
- (0): ResidualBlock(
200
- (convs1): ModuleList(
201
- (0): Sequential(
202
- (0): LeakyReLU(negative_slope=0.1)
203
- (1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
204
- )
205
- (1): Sequential(
206
- (0): LeakyReLU(negative_slope=0.1)
207
- (1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,))
208
- )
209
- (2): Sequential(
210
- (0): LeakyReLU(negative_slope=0.1)
211
- (1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,))
212
- )
213
- )
214
- (convs2): ModuleList(
215
- (0-2): 3 x Sequential(
216
- (0): LeakyReLU(negative_slope=0.1)
217
- (1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
218
- )
219
- )
220
- )
221
- (1): ResidualBlock(
222
- (convs1): ModuleList(
223
- (0): Sequential(
224
- (0): LeakyReLU(negative_slope=0.1)
225
- (1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(3,))
226
- )
227
- (1): Sequential(
228
- (0): LeakyReLU(negative_slope=0.1)
229
- (1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,))
230
- )
231
- (2): Sequential(
232
- (0): LeakyReLU(negative_slope=0.1)
233
- (1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,))
234
- )
235
- )
236
- (convs2): ModuleList(
237
- (0-2): 3 x Sequential(
238
- (0): LeakyReLU(negative_slope=0.1)
239
- (1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(3,))
240
- )
241
- )
242
- )
243
- (2): ResidualBlock(
244
- (convs1): ModuleList(
245
- (0): Sequential(
246
- (0): LeakyReLU(negative_slope=0.1)
247
- (1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(5,))
248
- )
249
- (1): Sequential(
250
- (0): LeakyReLU(negative_slope=0.1)
251
- (1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,))
252
- )
253
- (2): Sequential(
254
- (0): LeakyReLU(negative_slope=0.1)
255
- (1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,))
256
- )
257
- )
258
- (convs2): ModuleList(
259
- (0-2): 3 x Sequential(
260
- (0): LeakyReLU(negative_slope=0.1)
261
- (1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(5,))
262
- )
263
- )
264
- )
265
- (3): ResidualBlock(
266
- (convs1): ModuleList(
267
- (0): Sequential(
268
- (0): LeakyReLU(negative_slope=0.1)
269
- (1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
270
- )
271
- (1): Sequential(
272
- (0): LeakyReLU(negative_slope=0.1)
273
- (1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,))
274
- )
275
- (2): Sequential(
276
- (0): LeakyReLU(negative_slope=0.1)
277
- (1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,))
278
- )
279
- )
280
- (convs2): ModuleList(
281
- (0-2): 3 x Sequential(
282
- (0): LeakyReLU(negative_slope=0.1)
283
- (1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
284
- )
285
- )
286
- )
287
- (4): ResidualBlock(
288
- (convs1): ModuleList(
289
- (0): Sequential(
290
- (0): LeakyReLU(negative_slope=0.1)
291
- (1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(3,))
292
- )
293
- (1): Sequential(
294
- (0): LeakyReLU(negative_slope=0.1)
295
- (1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,))
296
- )
297
- (2): Sequential(
298
- (0): LeakyReLU(negative_slope=0.1)
299
- (1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,))
300
- )
301
- )
302
- (convs2): ModuleList(
303
- (0-2): 3 x Sequential(
304
- (0): LeakyReLU(negative_slope=0.1)
305
- (1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(3,))
306
- )
307
- )
308
- )
309
- (5): ResidualBlock(
310
- (convs1): ModuleList(
311
- (0): Sequential(
312
- (0): LeakyReLU(negative_slope=0.1)
313
- (1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(5,))
314
- )
315
- (1): Sequential(
316
- (0): LeakyReLU(negative_slope=0.1)
317
- (1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,))
318
- )
319
- (2): Sequential(
320
- (0): LeakyReLU(negative_slope=0.1)
321
- (1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,))
322
- )
323
- )
324
- (convs2): ModuleList(
325
- (0-2): 3 x Sequential(
326
- (0): LeakyReLU(negative_slope=0.1)
327
- (1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(5,))
328
- )
329
- )
330
- )
331
- (6): ResidualBlock(
332
- (convs1): ModuleList(
333
- (0): Sequential(
334
- (0): LeakyReLU(negative_slope=0.1)
335
- (1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
336
- )
337
- (1): Sequential(
338
- (0): LeakyReLU(negative_slope=0.1)
339
- (1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,))
340
- )
341
- (2): Sequential(
342
- (0): LeakyReLU(negative_slope=0.1)
343
- (1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,))
344
- )
345
- )
346
- (convs2): ModuleList(
347
- (0-2): 3 x Sequential(
348
- (0): LeakyReLU(negative_slope=0.1)
349
- (1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
350
- )
351
- )
352
- )
353
- (7): ResidualBlock(
354
- (convs1): ModuleList(
355
- (0): Sequential(
356
- (0): LeakyReLU(negative_slope=0.1)
357
- (1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(3,))
358
- )
359
- (1): Sequential(
360
- (0): LeakyReLU(negative_slope=0.1)
361
- (1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,))
362
- )
363
- (2): Sequential(
364
- (0): LeakyReLU(negative_slope=0.1)
365
- (1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,))
366
- )
367
- )
368
- (convs2): ModuleList(
369
- (0-2): 3 x Sequential(
370
- (0): LeakyReLU(negative_slope=0.1)
371
- (1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(3,))
372
- )
373
- )
374
- )
375
- (8): ResidualBlock(
376
- (convs1): ModuleList(
377
- (0): Sequential(
378
- (0): LeakyReLU(negative_slope=0.1)
379
- (1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(5,))
380
- )
381
- (1): Sequential(
382
- (0): LeakyReLU(negative_slope=0.1)
383
- (1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,))
384
- )
385
- (2): Sequential(
386
- (0): LeakyReLU(negative_slope=0.1)
387
- (1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,))
388
- )
389
- )
390
- (convs2): ModuleList(
391
- (0-2): 3 x Sequential(
392
- (0): LeakyReLU(negative_slope=0.1)
393
- (1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(5,))
394
- )
395
- )
396
- )
397
- (9): ResidualBlock(
398
- (convs1): ModuleList(
399
- (0): Sequential(
400
- (0): LeakyReLU(negative_slope=0.1)
401
- (1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
402
- )
403
- (1): Sequential(
404
- (0): LeakyReLU(negative_slope=0.1)
405
- (1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,))
406
- )
407
- (2): Sequential(
408
- (0): LeakyReLU(negative_slope=0.1)
409
- (1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,))
410
- )
411
- )
412
- (convs2): ModuleList(
413
- (0-2): 3 x Sequential(
414
- (0): LeakyReLU(negative_slope=0.1)
415
- (1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
416
- )
417
- )
418
- )
419
- (10): ResidualBlock(
420
- (convs1): ModuleList(
421
- (0): Sequential(
422
- (0): LeakyReLU(negative_slope=0.1)
423
- (1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(3,))
424
- )
425
- (1): Sequential(
426
- (0): LeakyReLU(negative_slope=0.1)
427
- (1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,))
428
- )
429
- (2): Sequential(
430
- (0): LeakyReLU(negative_slope=0.1)
431
- (1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,))
432
- )
433
- )
434
- (convs2): ModuleList(
435
- (0-2): 3 x Sequential(
436
- (0): LeakyReLU(negative_slope=0.1)
437
- (1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(3,))
438
- )
439
- )
440
- )
441
- (11): ResidualBlock(
442
- (convs1): ModuleList(
443
- (0): Sequential(
444
- (0): LeakyReLU(negative_slope=0.1)
445
- (1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(5,))
446
- )
447
- (1): Sequential(
448
- (0): LeakyReLU(negative_slope=0.1)
449
- (1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,))
450
- )
451
- (2): Sequential(
452
- (0): LeakyReLU(negative_slope=0.1)
453
- (1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,))
454
- )
455
- )
456
- (convs2): ModuleList(
457
- (0-2): 3 x Sequential(
458
- (0): LeakyReLU(negative_slope=0.1)
459
- (1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(5,))
460
- )
461
- )
462
- )
463
- )
464
- (output_conv): Sequential(
465
- (0): LeakyReLU(negative_slope=0.01)
466
- (1): Conv1d(32, 1, kernel_size=(7,), stride=(1,), padding=(3,))
467
- (2): Tanh()
468
- )
469
- (global_conv): Conv1d(256, 512, kernel_size=(1,), stride=(1,))
470
- )
471
- (posterior_encoder): PosteriorEncoder(
472
- (input_conv): Conv1d(80, 192, kernel_size=(1,), stride=(1,))
473
- (encoder): WaveNet(
474
- (conv_layers): ModuleList(
475
- (0-15): 16 x ResidualBlock(
476
- (conv): Conv1d(192, 384, kernel_size=(5,), stride=(1,), padding=(2,))
477
- (conv1x1_glo): Conv1d1x1(256, 384, kernel_size=(1,), stride=(1,), bias=False)
478
- (conv1x1_out): Conv1d1x1(192, 384, kernel_size=(1,), stride=(1,))
479
- )
480
- )
481
- )
482
- (proj): Conv1d(192, 384, kernel_size=(1,), stride=(1,))
483
- )
484
- (flow): ResidualAffineCouplingBlock(
485
- (flows): ModuleList(
486
- (0): ResidualAffineCouplingLayer(
487
- (input_conv): Conv1d(96, 192, kernel_size=(1,), stride=(1,))
488
- (encoder): WaveNet(
489
- (conv_layers): ModuleList(
490
- (0-3): 4 x ResidualBlock(
491
- (conv): Conv1d(192, 384, kernel_size=(5,), stride=(1,), padding=(2,))
492
- (conv1x1_glo): Conv1d1x1(256, 384, kernel_size=(1,), stride=(1,), bias=False)
493
- (conv1x1_out): Conv1d1x1(192, 384, kernel_size=(1,), stride=(1,))
494
- )
495
- )
496
- )
497
- (proj): Conv1d(192, 96, kernel_size=(1,), stride=(1,))
498
- )
499
- (1): FlipFlow()
500
- (2): ResidualAffineCouplingLayer(
501
- (input_conv): Conv1d(96, 192, kernel_size=(1,), stride=(1,))
502
- (encoder): WaveNet(
503
- (conv_layers): ModuleList(
504
- (0-3): 4 x ResidualBlock(
505
- (conv): Conv1d(192, 384, kernel_size=(5,), stride=(1,), padding=(2,))
506
- (conv1x1_glo): Conv1d1x1(256, 384, kernel_size=(1,), stride=(1,), bias=False)
507
- (conv1x1_out): Conv1d1x1(192, 384, kernel_size=(1,), stride=(1,))
508
- )
509
- )
510
- )
511
- (proj): Conv1d(192, 96, kernel_size=(1,), stride=(1,))
512
- )
513
- (3): FlipFlow()
514
- (4): ResidualAffineCouplingLayer(
515
- (input_conv): Conv1d(96, 192, kernel_size=(1,), stride=(1,))
516
- (encoder): WaveNet(
517
- (conv_layers): ModuleList(
518
- (0-3): 4 x ResidualBlock(
519
- (conv): Conv1d(192, 384, kernel_size=(5,), stride=(1,), padding=(2,))
520
- (conv1x1_glo): Conv1d1x1(256, 384, kernel_size=(1,), stride=(1,), bias=False)
521
- (conv1x1_out): Conv1d1x1(192, 384, kernel_size=(1,), stride=(1,))
522
- )
523
- )
524
- )
525
- (proj): Conv1d(192, 96, kernel_size=(1,), stride=(1,))
526
- )
527
- (5): FlipFlow()
528
- (6): ResidualAffineCouplingLayer(
529
- (input_conv): Conv1d(96, 192, kernel_size=(1,), stride=(1,))
530
- (encoder): WaveNet(
531
- (conv_layers): ModuleList(
532
- (0-3): 4 x ResidualBlock(
533
- (conv): Conv1d(192, 384, kernel_size=(5,), stride=(1,), padding=(2,))
534
- (conv1x1_glo): Conv1d1x1(256, 384, kernel_size=(1,), stride=(1,), bias=False)
535
- (conv1x1_out): Conv1d1x1(192, 384, kernel_size=(1,), stride=(1,))
536
- )
537
- )
538
- )
539
- (proj): Conv1d(192, 96, kernel_size=(1,), stride=(1,))
540
- )
541
- (7): FlipFlow()
542
- )
543
- )
544
- (duration_predictor): StochasticDurationPredictor(
545
- (pre): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
546
- (dds): DilatedDepthSeparableConv(
547
- (convs): ModuleList(
548
- (0): Sequential(
549
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
550
- (1): Transpose()
551
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
552
- (3): Transpose()
553
- (4): GELU(approximate='none')
554
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
555
- (6): Transpose()
556
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
557
- (8): Transpose()
558
- (9): GELU(approximate='none')
559
- (10): Dropout(p=0.5, inplace=False)
560
- )
561
- (1): Sequential(
562
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
563
- (1): Transpose()
564
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
565
- (3): Transpose()
566
- (4): GELU(approximate='none')
567
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
568
- (6): Transpose()
569
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
570
- (8): Transpose()
571
- (9): GELU(approximate='none')
572
- (10): Dropout(p=0.5, inplace=False)
573
- )
574
- (2): Sequential(
575
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
576
- (1): Transpose()
577
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
578
- (3): Transpose()
579
- (4): GELU(approximate='none')
580
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
581
- (6): Transpose()
582
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
583
- (8): Transpose()
584
- (9): GELU(approximate='none')
585
- (10): Dropout(p=0.5, inplace=False)
586
- )
587
- )
588
- )
589
- (proj): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
590
- (log_flow): LogFlow()
591
- (flows): ModuleList(
592
- (0): ElementwiseAffineFlow()
593
- (1): ConvFlow(
594
- (input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
595
- (dds_conv): DilatedDepthSeparableConv(
596
- (convs): ModuleList(
597
- (0): Sequential(
598
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
599
- (1): Transpose()
600
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
601
- (3): Transpose()
602
- (4): GELU(approximate='none')
603
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
604
- (6): Transpose()
605
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
606
- (8): Transpose()
607
- (9): GELU(approximate='none')
608
- (10): Dropout(p=0.0, inplace=False)
609
- )
610
- (1): Sequential(
611
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
612
- (1): Transpose()
613
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
614
- (3): Transpose()
615
- (4): GELU(approximate='none')
616
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
617
- (6): Transpose()
618
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
619
- (8): Transpose()
620
- (9): GELU(approximate='none')
621
- (10): Dropout(p=0.0, inplace=False)
622
- )
623
- (2): Sequential(
624
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
625
- (1): Transpose()
626
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
627
- (3): Transpose()
628
- (4): GELU(approximate='none')
629
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
630
- (6): Transpose()
631
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
632
- (8): Transpose()
633
- (9): GELU(approximate='none')
634
- (10): Dropout(p=0.0, inplace=False)
635
- )
636
- )
637
- )
638
- (proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,))
639
- )
640
- (2): FlipFlow()
641
- (3): ConvFlow(
642
- (input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
643
- (dds_conv): DilatedDepthSeparableConv(
644
- (convs): ModuleList(
645
- (0): Sequential(
646
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
647
- (1): Transpose()
648
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
649
- (3): Transpose()
650
- (4): GELU(approximate='none')
651
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
652
- (6): Transpose()
653
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
654
- (8): Transpose()
655
- (9): GELU(approximate='none')
656
- (10): Dropout(p=0.0, inplace=False)
657
- )
658
- (1): Sequential(
659
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
660
- (1): Transpose()
661
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
662
- (3): Transpose()
663
- (4): GELU(approximate='none')
664
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
665
- (6): Transpose()
666
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
667
- (8): Transpose()
668
- (9): GELU(approximate='none')
669
- (10): Dropout(p=0.0, inplace=False)
670
- )
671
- (2): Sequential(
672
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
673
- (1): Transpose()
674
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
675
- (3): Transpose()
676
- (4): GELU(approximate='none')
677
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
678
- (6): Transpose()
679
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
680
- (8): Transpose()
681
- (9): GELU(approximate='none')
682
- (10): Dropout(p=0.0, inplace=False)
683
- )
684
- )
685
- )
686
- (proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,))
687
- )
688
- (4): FlipFlow()
689
- (5): ConvFlow(
690
- (input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
691
- (dds_conv): DilatedDepthSeparableConv(
692
- (convs): ModuleList(
693
- (0): Sequential(
694
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
695
- (1): Transpose()
696
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
697
- (3): Transpose()
698
- (4): GELU(approximate='none')
699
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
700
- (6): Transpose()
701
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
702
- (8): Transpose()
703
- (9): GELU(approximate='none')
704
- (10): Dropout(p=0.0, inplace=False)
705
- )
706
- (1): Sequential(
707
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
708
- (1): Transpose()
709
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
710
- (3): Transpose()
711
- (4): GELU(approximate='none')
712
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
713
- (6): Transpose()
714
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
715
- (8): Transpose()
716
- (9): GELU(approximate='none')
717
- (10): Dropout(p=0.0, inplace=False)
718
- )
719
- (2): Sequential(
720
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
721
- (1): Transpose()
722
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
723
- (3): Transpose()
724
- (4): GELU(approximate='none')
725
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
726
- (6): Transpose()
727
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
728
- (8): Transpose()
729
- (9): GELU(approximate='none')
730
- (10): Dropout(p=0.0, inplace=False)
731
- )
732
- )
733
- )
734
- (proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,))
735
- )
736
- (6): FlipFlow()
737
- (7): ConvFlow(
738
- (input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
739
- (dds_conv): DilatedDepthSeparableConv(
740
- (convs): ModuleList(
741
- (0): Sequential(
742
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
743
- (1): Transpose()
744
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
745
- (3): Transpose()
746
- (4): GELU(approximate='none')
747
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
748
- (6): Transpose()
749
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
750
- (8): Transpose()
751
- (9): GELU(approximate='none')
752
- (10): Dropout(p=0.0, inplace=False)
753
- )
754
- (1): Sequential(
755
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
756
- (1): Transpose()
757
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
758
- (3): Transpose()
759
- (4): GELU(approximate='none')
760
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
761
- (6): Transpose()
762
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
763
- (8): Transpose()
764
- (9): GELU(approximate='none')
765
- (10): Dropout(p=0.0, inplace=False)
766
- )
767
- (2): Sequential(
768
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
769
- (1): Transpose()
770
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
771
- (3): Transpose()
772
- (4): GELU(approximate='none')
773
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
774
- (6): Transpose()
775
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
776
- (8): Transpose()
777
- (9): GELU(approximate='none')
778
- (10): Dropout(p=0.0, inplace=False)
779
- )
780
- )
781
- )
782
- (proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,))
783
- )
784
- (8): FlipFlow()
785
- )
786
- (post_pre): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
787
- (post_dds): DilatedDepthSeparableConv(
788
- (convs): ModuleList(
789
- (0): Sequential(
790
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
791
- (1): Transpose()
792
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
793
- (3): Transpose()
794
- (4): GELU(approximate='none')
795
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
796
- (6): Transpose()
797
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
798
- (8): Transpose()
799
- (9): GELU(approximate='none')
800
- (10): Dropout(p=0.5, inplace=False)
801
- )
802
- (1): Sequential(
803
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
804
- (1): Transpose()
805
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
806
- (3): Transpose()
807
- (4): GELU(approximate='none')
808
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
809
- (6): Transpose()
810
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
811
- (8): Transpose()
812
- (9): GELU(approximate='none')
813
- (10): Dropout(p=0.5, inplace=False)
814
- )
815
- (2): Sequential(
816
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
817
- (1): Transpose()
818
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
819
- (3): Transpose()
820
- (4): GELU(approximate='none')
821
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
822
- (6): Transpose()
823
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
824
- (8): Transpose()
825
- (9): GELU(approximate='none')
826
- (10): Dropout(p=0.5, inplace=False)
827
- )
828
- )
829
- )
830
- (post_proj): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
831
- (post_flows): ModuleList(
832
- (0): ElementwiseAffineFlow()
833
- (1): ConvFlow(
834
- (input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
835
- (dds_conv): DilatedDepthSeparableConv(
836
- (convs): ModuleList(
837
- (0): Sequential(
838
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
839
- (1): Transpose()
840
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
841
- (3): Transpose()
842
- (4): GELU(approximate='none')
843
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
844
- (6): Transpose()
845
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
846
- (8): Transpose()
847
- (9): GELU(approximate='none')
848
- (10): Dropout(p=0.0, inplace=False)
849
- )
850
- (1): Sequential(
851
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
852
- (1): Transpose()
853
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
854
- (3): Transpose()
855
- (4): GELU(approximate='none')
856
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
857
- (6): Transpose()
858
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
859
- (8): Transpose()
860
- (9): GELU(approximate='none')
861
- (10): Dropout(p=0.0, inplace=False)
862
- )
863
- (2): Sequential(
864
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
865
- (1): Transpose()
866
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
867
- (3): Transpose()
868
- (4): GELU(approximate='none')
869
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
870
- (6): Transpose()
871
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
872
- (8): Transpose()
873
- (9): GELU(approximate='none')
874
- (10): Dropout(p=0.0, inplace=False)
875
- )
876
- )
877
- )
878
- (proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,))
879
- )
880
- (2): FlipFlow()
881
- (3): ConvFlow(
882
- (input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
883
- (dds_conv): DilatedDepthSeparableConv(
884
- (convs): ModuleList(
885
- (0): Sequential(
886
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
887
- (1): Transpose()
888
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
889
- (3): Transpose()
890
- (4): GELU(approximate='none')
891
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
892
- (6): Transpose()
893
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
894
- (8): Transpose()
895
- (9): GELU(approximate='none')
896
- (10): Dropout(p=0.0, inplace=False)
897
- )
898
- (1): Sequential(
899
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
900
- (1): Transpose()
901
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
902
- (3): Transpose()
903
- (4): GELU(approximate='none')
904
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
905
- (6): Transpose()
906
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
907
- (8): Transpose()
908
- (9): GELU(approximate='none')
909
- (10): Dropout(p=0.0, inplace=False)
910
- )
911
- (2): Sequential(
912
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
913
- (1): Transpose()
914
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
915
- (3): Transpose()
916
- (4): GELU(approximate='none')
917
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
918
- (6): Transpose()
919
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
920
- (8): Transpose()
921
- (9): GELU(approximate='none')
922
- (10): Dropout(p=0.0, inplace=False)
923
- )
924
- )
925
- )
926
- (proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,))
927
- )
928
- (4): FlipFlow()
929
- (5): ConvFlow(
930
- (input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
931
- (dds_conv): DilatedDepthSeparableConv(
932
- (convs): ModuleList(
933
- (0): Sequential(
934
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
935
- (1): Transpose()
936
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
937
- (3): Transpose()
938
- (4): GELU(approximate='none')
939
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
940
- (6): Transpose()
941
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
942
- (8): Transpose()
943
- (9): GELU(approximate='none')
944
- (10): Dropout(p=0.0, inplace=False)
945
- )
946
- (1): Sequential(
947
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
948
- (1): Transpose()
949
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
950
- (3): Transpose()
951
- (4): GELU(approximate='none')
952
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
953
- (6): Transpose()
954
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
955
- (8): Transpose()
956
- (9): GELU(approximate='none')
957
- (10): Dropout(p=0.0, inplace=False)
958
- )
959
- (2): Sequential(
960
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
961
- (1): Transpose()
962
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
963
- (3): Transpose()
964
- (4): GELU(approximate='none')
965
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
966
- (6): Transpose()
967
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
968
- (8): Transpose()
969
- (9): GELU(approximate='none')
970
- (10): Dropout(p=0.0, inplace=False)
971
- )
972
- )
973
- )
974
- (proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,))
975
- )
976
- (6): FlipFlow()
977
- (7): ConvFlow(
978
- (input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
979
- (dds_conv): DilatedDepthSeparableConv(
980
- (convs): ModuleList(
981
- (0): Sequential(
982
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
983
- (1): Transpose()
984
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
985
- (3): Transpose()
986
- (4): GELU(approximate='none')
987
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
988
- (6): Transpose()
989
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
990
- (8): Transpose()
991
- (9): GELU(approximate='none')
992
- (10): Dropout(p=0.0, inplace=False)
993
- )
994
- (1): Sequential(
995
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
996
- (1): Transpose()
997
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
998
- (3): Transpose()
999
- (4): GELU(approximate='none')
1000
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
1001
- (6): Transpose()
1002
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
1003
- (8): Transpose()
1004
- (9): GELU(approximate='none')
1005
- (10): Dropout(p=0.0, inplace=False)
1006
- )
1007
- (2): Sequential(
1008
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
1009
- (1): Transpose()
1010
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
1011
- (3): Transpose()
1012
- (4): GELU(approximate='none')
1013
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
1014
- (6): Transpose()
1015
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
1016
- (8): Transpose()
1017
- (9): GELU(approximate='none')
1018
- (10): Dropout(p=0.0, inplace=False)
1019
- )
1020
- )
1021
- )
1022
- (proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,))
1023
- )
1024
- (8): FlipFlow()
1025
- )
1026
- (global_conv): Conv1d(256, 192, kernel_size=(1,), stride=(1,))
1027
- )
1028
- (global_emb): Embedding(4, 256)
1029
- )
1030
- (discriminator): HiFiGANMultiScaleMultiPeriodDiscriminator(
1031
- (msd): HiFiGANMultiScaleDiscriminator(
1032
- (discriminators): ModuleList(
1033
- (0): HiFiGANScaleDiscriminator(
1034
- (layers): ModuleList(
1035
- (0): Sequential(
1036
- (0): Conv1d(1, 128, kernel_size=(15,), stride=(1,), padding=(7,))
1037
- (1): LeakyReLU(negative_slope=0.1)
1038
- )
1039
- (1): Sequential(
1040
- (0): Conv1d(128, 128, kernel_size=(41,), stride=(2,), padding=(20,), groups=4)
1041
- (1): LeakyReLU(negative_slope=0.1)
1042
- )
1043
- (2): Sequential(
1044
- (0): Conv1d(128, 256, kernel_size=(41,), stride=(2,), padding=(20,), groups=16)
1045
- (1): LeakyReLU(negative_slope=0.1)
1046
- )
1047
- (3): Sequential(
1048
- (0): Conv1d(256, 512, kernel_size=(41,), stride=(4,), padding=(20,), groups=16)
1049
- (1): LeakyReLU(negative_slope=0.1)
1050
- )
1051
- (4): Sequential(
1052
- (0): Conv1d(512, 1024, kernel_size=(41,), stride=(4,), padding=(20,), groups=16)
1053
- (1): LeakyReLU(negative_slope=0.1)
1054
- )
1055
- (5): Sequential(
1056
- (0): Conv1d(1024, 1024, kernel_size=(41,), stride=(1,), padding=(20,), groups=16)
1057
- (1): LeakyReLU(negative_slope=0.1)
1058
- )
1059
- (6): Sequential(
1060
- (0): Conv1d(1024, 1024, kernel_size=(5,), stride=(1,), padding=(2,))
1061
- (1): LeakyReLU(negative_slope=0.1)
1062
- )
1063
- (7): Conv1d(1024, 1, kernel_size=(3,), stride=(1,), padding=(1,))
1064
- )
1065
- )
1066
- )
1067
- )
1068
- (mpd): HiFiGANMultiPeriodDiscriminator(
1069
- (discriminators): ModuleList(
1070
- (0-4): 5 x HiFiGANPeriodDiscriminator(
1071
- (convs): ModuleList(
1072
- (0): Sequential(
1073
- (0): Conv2d(1, 32, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
1074
- (1): LeakyReLU(negative_slope=0.1)
1075
- )
1076
- (1): Sequential(
1077
- (0): Conv2d(32, 128, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
1078
- (1): LeakyReLU(negative_slope=0.1)
1079
- )
1080
- (2): Sequential(
1081
- (0): Conv2d(128, 512, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
1082
- (1): LeakyReLU(negative_slope=0.1)
1083
- )
1084
- (3): Sequential(
1085
- (0): Conv2d(512, 1024, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
1086
- (1): LeakyReLU(negative_slope=0.1)
1087
- )
1088
- (4): Sequential(
1089
- (0): Conv2d(1024, 1024, kernel_size=(5, 1), stride=(1, 1), padding=(2, 0))
1090
- (1): LeakyReLU(negative_slope=0.1)
1091
- )
1092
- )
1093
- (output_conv): Conv2d(1024, 1, kernel_size=(2, 1), stride=(1, 1), padding=(1, 0))
1094
- )
1095
- )
1096
- )
1097
- )
1098
- (generator_adv_loss): GeneratorAdversarialLoss()
1099
- (discriminator_adv_loss): DiscriminatorAdversarialLoss()
1100
- (feat_match_loss): FeatureMatchLoss()
1101
- (mel_loss): MelSpectrogramLoss(
1102
- (wav_to_mel): LogMelFbank(
1103
- (stft): Stft(n_fft=1024, win_length=1024, hop_length=256, center=True, normalized=False, onesided=True)
1104
- (logmel): LogMel(sr=22050, n_fft=1024, n_mels=80, fmin=0, fmax=11025.0, htk=False)
1105
- )
1106
- )
1107
- (kl_loss): KLDivergenceLoss()
1108
- )
1109
- )
1110
-
1111
- Model summary:
1112
- Class Name: ESPnetGANTTSModel
1113
- Total Number of model parameters: 96.24 M
1114
- Number of trainable parameters: 96.24 M (100.0%)
1115
- Size: 384.96 MB
1116
- Type: torch.float32
1117
- [wieling-3-a100] 2023-12-01 15:58:41,847 (abs_task:1272) INFO: Optimizer:
1118
- AdamW (
1119
- Parameter Group 0
1120
- amsgrad: False
1121
- betas: [0.8, 0.99]
1122
- capturable: False
1123
- differentiable: False
1124
- eps: 1e-09
1125
- foreach: None
1126
- fused: None
1127
- initial_lr: 0.0003
1128
- lr: 0.0003
1129
- maximize: False
1130
- weight_decay: 0.0
1131
- )
1132
- [wieling-3-a100] 2023-12-01 15:58:41,847 (abs_task:1273) INFO: Scheduler: <torch.optim.lr_scheduler.ExponentialLR object at 0x7f9de23eb8b0>
1133
- [wieling-3-a100] 2023-12-01 15:58:41,847 (abs_task:1272) INFO: Optimizer2:
1134
- AdamW (
1135
- Parameter Group 0
1136
- amsgrad: False
1137
- betas: [0.8, 0.99]
1138
- capturable: False
1139
- differentiable: False
1140
- eps: 1e-09
1141
- foreach: None
1142
- fused: None
1143
- initial_lr: 0.0003
1144
- lr: 0.0003
1145
- maximize: False
1146
- weight_decay: 0.0
1147
- )
1148
- [wieling-3-a100] 2023-12-01 15:58:41,847 (abs_task:1273) INFO: Scheduler2: <torch.optim.lr_scheduler.ExponentialLR object at 0x7f9de23eb850>
1149
- [wieling-3-a100] 2023-12-01 15:58:41,848 (abs_task:1282) INFO: Saving the configuration in exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11/config.yaml
1150
- [wieling-3-a100] 2023-12-01 15:58:41,866 (abs_task:1293) INFO: Namespace(config='conf/train_vits.yaml', print_config=False, log_level='INFO', drop_last_iter=False, dry_run=False, iterator_type='sequence', valid_iterator_type=None, output_dir='exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11', ngpu=0, seed=67823, num_workers=4, num_att_plot=3, dist_backend='nccl', dist_init_method='env://', dist_world_size=None, dist_rank=None, local_rank=None, dist_master_addr=None, dist_master_port=None, dist_launcher=None, multiprocessing_distributed=False, unused_parameters=True, sharded_ddp=False, cudnn_enabled=True, cudnn_benchmark=False, cudnn_deterministic=False, collect_stats=True, write_collected_feats=False, max_epoch=1000, patience=None, val_scheduler_criterion=('valid', 'loss'), early_stopping_criterion=('valid', 'loss', 'min'), best_model_criterion=[['train', 'total_count', 'max']], keep_nbest_models=10, nbest_averaging_interval=0, grad_clip=-1, grad_clip_type=2.0, grad_noise=False, accum_grad=1, no_forward_run=False, resume=False, train_dtype='float32', use_amp=False, log_interval=50, use_matplotlib=True, use_tensorboard=True, create_graph_in_tensorboard=False, use_wandb=True, wandb_project='GROTTS', wandb_id=None, wandb_entity=None, wandb_name='VITS_lr_3.0e-4', wandb_model_log_interval=-1, detect_anomaly=False, use_lora=False, save_lora_only=True, lora_conf={}, pretrain_path=None, init_param=['downloads/espnet/kan-bayashi_ljspeech_vits/exp/tts_train_vits_raw_phn_tacotron_g2p_en_no_space/train.total_count.ave_10best.pth:tts:tts:tts.generator.text_encoder,tts.generator.posterior_encoder.input_conv'], ignore_init_mismatch=False, freeze_param=[], num_iters_per_epoch=1000, batch_size=40, valid_batch_size=None, batch_bins=10000000, valid_batch_bins=None, train_shape_file=['exp-vits-lr-3e-4/tts_stats_raw_char/logdir/train.11.scp'], valid_shape_file=['exp-vits-lr-3e-4/tts_stats_raw_char/logdir/valid.11.scp'], batch_type='numel', valid_batch_type=None, fold_length=[], sort_in_batch='descending', shuffle_within_batch=False, sort_batch='descending', multiple_iterator=False, chunk_length=500, chunk_shift_ratio=0.5, num_cache_chunks=1024, chunk_excluded_key_prefixes=[], chunk_default_fs=None, train_data_path_and_name_and_type=[('dump/raw/train_nodev/text', 'text', 'text'), ('dump/raw/train_nodev/wav.scp', 'speech', 'sound'), ('dump/raw/train_nodev/utt2sid', 'sids', 'text_int')], valid_data_path_and_name_and_type=[('dump/raw/train_dev/text', 'text', 'text'), ('dump/raw/train_dev/wav.scp', 'speech', 'sound'), ('dump/raw/train_dev/utt2sid', 'sids', 'text_int')], allow_variable_data_keys=False, max_cache_size=0.0, max_cache_fd=32, allow_multi_rates=False, valid_max_cache_size=None, exclude_weight_decay=False, exclude_weight_decay_conf={}, optim='adamw', optim_conf={'lr': 0.0003, 'betas': [0.8, 0.99], 'eps': 1e-09, 'weight_decay': 0.0}, scheduler='exponentiallr', scheduler_conf={'gamma': 0.999875}, optim2='adamw', optim2_conf={'lr': 0.0003, 'betas': [0.8, 0.99], 'eps': 1e-09, 'weight_decay': 0.0}, scheduler2='exponentiallr', scheduler2_conf={'gamma': 0.999875}, generator_first=False, token_list=['<blank>', '<unk>', '<space>', 'e', 'n', 'a', 'o', 't', 'i', 'r', 'd', 's', 'k', 'l', 'm', 'u', 'g', 'h', 'w', 'v', '.', 'z', 'b', 'p', ',', 'j', 'c', 'f', '‘', '’', ':', '?', 'ö', "'", '!', '-', ';', 'ò', 'è', 'ì', 'é', 'y', 'ë', 'x', 'q', '<sos/eos>'], odim=None, model_conf={}, use_preprocessor=True, token_type='char', bpemodel=None, non_linguistic_symbols=None, cleaner=None, g2p=None, feats_extract='fbank', feats_extract_conf={'n_fft': 1024, 'hop_length': 256, 'win_length': None, 'fs': 22050, 'fmin': 80, 'fmax': 7600, 'n_mels': 80}, normalize=None, normalize_conf={}, tts='vits', tts_conf={'generator_type': 'vits_generator', 'generator_params': {'hidden_channels': 192, 'spks': 4, 'global_channels': 256, 'segment_size': 32, 'text_encoder_attention_heads': 2, 'text_encoder_ffn_expand': 4, 'text_encoder_blocks': 6, 'text_encoder_positionwise_layer_type': 'conv1d', 'text_encoder_positionwise_conv_kernel_size': 3, 'text_encoder_positional_encoding_layer_type': 'rel_pos', 'text_encoder_self_attention_layer_type': 'rel_selfattn', 'text_encoder_activation_type': 'swish', 'text_encoder_normalize_before': True, 'text_encoder_dropout_rate': 0.1, 'text_encoder_positional_dropout_rate': 0.0, 'text_encoder_attention_dropout_rate': 0.1, 'use_macaron_style_in_text_encoder': True, 'use_conformer_conv_in_text_encoder': False, 'text_encoder_conformer_kernel_size': -1, 'decoder_kernel_size': 7, 'decoder_channels': 512, 'decoder_upsample_scales': [8, 8, 2, 2], 'decoder_upsample_kernel_sizes': [16, 16, 4, 4], 'decoder_resblock_kernel_sizes': [3, 7, 11], 'decoder_resblock_dilations': [[1, 3, 5], [1, 3, 5], [1, 3, 5]], 'use_weight_norm_in_decoder': True, 'posterior_encoder_kernel_size': 5, 'posterior_encoder_layers': 16, 'posterior_encoder_stacks': 1, 'posterior_encoder_base_dilation': 1, 'posterior_encoder_dropout_rate': 0.0, 'use_weight_norm_in_posterior_encoder': True, 'flow_flows': 4, 'flow_kernel_size': 5, 'flow_base_dilation': 1, 'flow_layers': 4, 'flow_dropout_rate': 0.0, 'use_weight_norm_in_flow': True, 'use_only_mean_in_flow': True, 'stochastic_duration_predictor_kernel_size': 3, 'stochastic_duration_predictor_dropout_rate': 0.5, 'stochastic_duration_predictor_flows': 4, 'stochastic_duration_predictor_dds_conv_layers': 3, 'vocabs': 46, 'aux_channels': 80}, 'discriminator_type': 'hifigan_multi_scale_multi_period_discriminator', 'discriminator_params': {'scales': 1, 'scale_downsample_pooling': 'AvgPool1d', 'scale_downsample_pooling_params': {'kernel_size': 4, 'stride': 2, 'padding': 2}, 'scale_discriminator_params': {'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [15, 41, 5, 3], 'channels': 128, 'max_downsample_channels': 1024, 'max_groups': 16, 'bias': True, 'downsample_scales': [2, 2, 4, 4, 1], 'nonlinear_activation': 'LeakyReLU', 'nonlinear_activation_params': {'negative_slope': 0.1}, 'use_weight_norm': False, 'use_spectral_norm': False}, 'follow_official_norm': False, 'periods': [2, 3, 5, 7, 11], 'period_discriminator_params': {'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'downsample_scales': [3, 3, 3, 3, 1], 'max_downsample_channels': 1024, 'bias': True, 'nonlinear_activation': 'LeakyReLU', 'nonlinear_activation_params': {'negative_slope': 0.1}, 'use_weight_norm': True, 'use_spectral_norm': False}}, 'generator_adv_loss_params': {'average_by_discriminators': False, 'loss_type': 'mse'}, 'discriminator_adv_loss_params': {'average_by_discriminators': False, 'loss_type': 'mse'}, 'feat_match_loss_params': {'average_by_discriminators': False, 'average_by_layers': False, 'include_final_outputs': True}, 'mel_loss_params': {'fs': 22050, 'n_fft': 1024, 'hop_length': 256, 'win_length': None, 'window': 'hann', 'n_mels': 80, 'fmin': 0, 'fmax': None, 'log_base': None}, 'lambda_adv': 1.0, 'lambda_mel': 45.0, 'lambda_feat_match': 2.0, 'lambda_dur': 1.0, 'lambda_kl': 1.0, 'sampling_rate': 22050, 'cache_generator_outputs': True}, pitch_extract=None, pitch_extract_conf={'fs': 22050, 'n_fft': 1024, 'hop_length': 256, 'f0max': 400, 'f0min': 80}, pitch_normalize=None, pitch_normalize_conf={}, energy_extract=None, energy_extract_conf={'fs': 22050, 'n_fft': 1024, 'hop_length': 256, 'win_length': None}, energy_normalize=None, energy_normalize_conf={}, required=['output_dir', 'token_list'], version='202310', distributed=False)
1151
- # Accounting: time=18 threads=1
1152
- # Ended (code 0) at Fri Dec 1 15:58:52 UTC 2023, elapsed time 18 seconds
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11/config.yaml DELETED
@@ -1,383 +0,0 @@
1
- config: conf/train_vits.yaml
2
- print_config: false
3
- log_level: INFO
4
- drop_last_iter: false
5
- dry_run: false
6
- iterator_type: sequence
7
- valid_iterator_type: null
8
- output_dir: exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11
9
- ngpu: 0
10
- seed: 67823
11
- num_workers: 4
12
- num_att_plot: 3
13
- dist_backend: nccl
14
- dist_init_method: env://
15
- dist_world_size: null
16
- dist_rank: null
17
- local_rank: null
18
- dist_master_addr: null
19
- dist_master_port: null
20
- dist_launcher: null
21
- multiprocessing_distributed: false
22
- unused_parameters: true
23
- sharded_ddp: false
24
- cudnn_enabled: true
25
- cudnn_benchmark: false
26
- cudnn_deterministic: false
27
- collect_stats: true
28
- write_collected_feats: false
29
- max_epoch: 1000
30
- patience: null
31
- val_scheduler_criterion:
32
- - valid
33
- - loss
34
- early_stopping_criterion:
35
- - valid
36
- - loss
37
- - min
38
- best_model_criterion:
39
- - - train
40
- - total_count
41
- - max
42
- keep_nbest_models: 10
43
- nbest_averaging_interval: 0
44
- grad_clip: -1
45
- grad_clip_type: 2.0
46
- grad_noise: false
47
- accum_grad: 1
48
- no_forward_run: false
49
- resume: false
50
- train_dtype: float32
51
- use_amp: false
52
- log_interval: 50
53
- use_matplotlib: true
54
- use_tensorboard: true
55
- create_graph_in_tensorboard: false
56
- use_wandb: true
57
- wandb_project: GROTTS
58
- wandb_id: null
59
- wandb_entity: null
60
- wandb_name: VITS_lr_3.0e-4
61
- wandb_model_log_interval: -1
62
- detect_anomaly: false
63
- use_lora: false
64
- save_lora_only: true
65
- lora_conf: {}
66
- pretrain_path: null
67
- init_param:
68
- - downloads/espnet/kan-bayashi_ljspeech_vits/exp/tts_train_vits_raw_phn_tacotron_g2p_en_no_space/train.total_count.ave_10best.pth:tts:tts:tts.generator.text_encoder,tts.generator.posterior_encoder.input_conv
69
- ignore_init_mismatch: false
70
- freeze_param: []
71
- num_iters_per_epoch: 1000
72
- batch_size: 40
73
- valid_batch_size: null
74
- batch_bins: 10000000
75
- valid_batch_bins: null
76
- train_shape_file:
77
- - exp-vits-lr-3e-4/tts_stats_raw_char/logdir/train.11.scp
78
- valid_shape_file:
79
- - exp-vits-lr-3e-4/tts_stats_raw_char/logdir/valid.11.scp
80
- batch_type: numel
81
- valid_batch_type: null
82
- fold_length: []
83
- sort_in_batch: descending
84
- shuffle_within_batch: false
85
- sort_batch: descending
86
- multiple_iterator: false
87
- chunk_length: 500
88
- chunk_shift_ratio: 0.5
89
- num_cache_chunks: 1024
90
- chunk_excluded_key_prefixes: []
91
- chunk_default_fs: null
92
- train_data_path_and_name_and_type:
93
- - - dump/raw/train_nodev/text
94
- - text
95
- - text
96
- - - dump/raw/train_nodev/wav.scp
97
- - speech
98
- - sound
99
- - - dump/raw/train_nodev/utt2sid
100
- - sids
101
- - text_int
102
- valid_data_path_and_name_and_type:
103
- - - dump/raw/train_dev/text
104
- - text
105
- - text
106
- - - dump/raw/train_dev/wav.scp
107
- - speech
108
- - sound
109
- - - dump/raw/train_dev/utt2sid
110
- - sids
111
- - text_int
112
- allow_variable_data_keys: false
113
- max_cache_size: 0.0
114
- max_cache_fd: 32
115
- allow_multi_rates: false
116
- valid_max_cache_size: null
117
- exclude_weight_decay: false
118
- exclude_weight_decay_conf: {}
119
- optim: adamw
120
- optim_conf:
121
- lr: 0.0003
122
- betas:
123
- - 0.8
124
- - 0.99
125
- eps: 1.0e-09
126
- weight_decay: 0.0
127
- scheduler: exponentiallr
128
- scheduler_conf:
129
- gamma: 0.999875
130
- optim2: adamw
131
- optim2_conf:
132
- lr: 0.0003
133
- betas:
134
- - 0.8
135
- - 0.99
136
- eps: 1.0e-09
137
- weight_decay: 0.0
138
- scheduler2: exponentiallr
139
- scheduler2_conf:
140
- gamma: 0.999875
141
- generator_first: false
142
- token_list:
143
- - <blank>
144
- - <unk>
145
- - <space>
146
- - e
147
- - n
148
- - a
149
- - o
150
- - t
151
- - i
152
- - r
153
- - d
154
- - s
155
- - k
156
- - l
157
- - m
158
- - u
159
- - g
160
- - h
161
- - w
162
- - v
163
- - .
164
- - z
165
- - b
166
- - p
167
- - ','
168
- - j
169
- - c
170
- - f
171
- - ‘
172
- - ’
173
- - ':'
174
- - '?'
175
- - ö
176
- - ''''
177
- - '!'
178
- - '-'
179
- - ;
180
- - ò
181
- - è
182
- - ì
183
- - é
184
- - y
185
- - ë
186
- - x
187
- - q
188
- - <sos/eos>
189
- odim: null
190
- model_conf: {}
191
- use_preprocessor: true
192
- token_type: char
193
- bpemodel: null
194
- non_linguistic_symbols: null
195
- cleaner: null
196
- g2p: null
197
- feats_extract: fbank
198
- feats_extract_conf:
199
- n_fft: 1024
200
- hop_length: 256
201
- win_length: null
202
- fs: 22050
203
- fmin: 80
204
- fmax: 7600
205
- n_mels: 80
206
- normalize: null
207
- normalize_conf: {}
208
- tts: vits
209
- tts_conf:
210
- generator_type: vits_generator
211
- generator_params:
212
- hidden_channels: 192
213
- spks: 4
214
- global_channels: 256
215
- segment_size: 32
216
- text_encoder_attention_heads: 2
217
- text_encoder_ffn_expand: 4
218
- text_encoder_blocks: 6
219
- text_encoder_positionwise_layer_type: conv1d
220
- text_encoder_positionwise_conv_kernel_size: 3
221
- text_encoder_positional_encoding_layer_type: rel_pos
222
- text_encoder_self_attention_layer_type: rel_selfattn
223
- text_encoder_activation_type: swish
224
- text_encoder_normalize_before: true
225
- text_encoder_dropout_rate: 0.1
226
- text_encoder_positional_dropout_rate: 0.0
227
- text_encoder_attention_dropout_rate: 0.1
228
- use_macaron_style_in_text_encoder: true
229
- use_conformer_conv_in_text_encoder: false
230
- text_encoder_conformer_kernel_size: -1
231
- decoder_kernel_size: 7
232
- decoder_channels: 512
233
- decoder_upsample_scales:
234
- - 8
235
- - 8
236
- - 2
237
- - 2
238
- decoder_upsample_kernel_sizes:
239
- - 16
240
- - 16
241
- - 4
242
- - 4
243
- decoder_resblock_kernel_sizes:
244
- - 3
245
- - 7
246
- - 11
247
- decoder_resblock_dilations:
248
- - - 1
249
- - 3
250
- - 5
251
- - - 1
252
- - 3
253
- - 5
254
- - - 1
255
- - 3
256
- - 5
257
- use_weight_norm_in_decoder: true
258
- posterior_encoder_kernel_size: 5
259
- posterior_encoder_layers: 16
260
- posterior_encoder_stacks: 1
261
- posterior_encoder_base_dilation: 1
262
- posterior_encoder_dropout_rate: 0.0
263
- use_weight_norm_in_posterior_encoder: true
264
- flow_flows: 4
265
- flow_kernel_size: 5
266
- flow_base_dilation: 1
267
- flow_layers: 4
268
- flow_dropout_rate: 0.0
269
- use_weight_norm_in_flow: true
270
- use_only_mean_in_flow: true
271
- stochastic_duration_predictor_kernel_size: 3
272
- stochastic_duration_predictor_dropout_rate: 0.5
273
- stochastic_duration_predictor_flows: 4
274
- stochastic_duration_predictor_dds_conv_layers: 3
275
- vocabs: 46
276
- aux_channels: 80
277
- discriminator_type: hifigan_multi_scale_multi_period_discriminator
278
- discriminator_params:
279
- scales: 1
280
- scale_downsample_pooling: AvgPool1d
281
- scale_downsample_pooling_params:
282
- kernel_size: 4
283
- stride: 2
284
- padding: 2
285
- scale_discriminator_params:
286
- in_channels: 1
287
- out_channels: 1
288
- kernel_sizes:
289
- - 15
290
- - 41
291
- - 5
292
- - 3
293
- channels: 128
294
- max_downsample_channels: 1024
295
- max_groups: 16
296
- bias: true
297
- downsample_scales:
298
- - 2
299
- - 2
300
- - 4
301
- - 4
302
- - 1
303
- nonlinear_activation: LeakyReLU
304
- nonlinear_activation_params:
305
- negative_slope: 0.1
306
- use_weight_norm: false
307
- use_spectral_norm: false
308
- follow_official_norm: false
309
- periods:
310
- - 2
311
- - 3
312
- - 5
313
- - 7
314
- - 11
315
- period_discriminator_params:
316
- in_channels: 1
317
- out_channels: 1
318
- kernel_sizes:
319
- - 5
320
- - 3
321
- channels: 32
322
- downsample_scales:
323
- - 3
324
- - 3
325
- - 3
326
- - 3
327
- - 1
328
- max_downsample_channels: 1024
329
- bias: true
330
- nonlinear_activation: LeakyReLU
331
- nonlinear_activation_params:
332
- negative_slope: 0.1
333
- use_weight_norm: true
334
- use_spectral_norm: false
335
- generator_adv_loss_params:
336
- average_by_discriminators: false
337
- loss_type: mse
338
- discriminator_adv_loss_params:
339
- average_by_discriminators: false
340
- loss_type: mse
341
- feat_match_loss_params:
342
- average_by_discriminators: false
343
- average_by_layers: false
344
- include_final_outputs: true
345
- mel_loss_params:
346
- fs: 22050
347
- n_fft: 1024
348
- hop_length: 256
349
- win_length: null
350
- window: hann
351
- n_mels: 80
352
- fmin: 0
353
- fmax: null
354
- log_base: null
355
- lambda_adv: 1.0
356
- lambda_mel: 45.0
357
- lambda_feat_match: 2.0
358
- lambda_dur: 1.0
359
- lambda_kl: 1.0
360
- sampling_rate: 22050
361
- cache_generator_outputs: true
362
- pitch_extract: null
363
- pitch_extract_conf:
364
- fs: 22050
365
- n_fft: 1024
366
- hop_length: 256
367
- f0max: 400
368
- f0min: 80
369
- pitch_normalize: null
370
- pitch_normalize_conf: {}
371
- energy_extract: null
372
- energy_extract_conf:
373
- fs: 22050
374
- n_fft: 1024
375
- hop_length: 256
376
- win_length: null
377
- energy_normalize: null
378
- energy_normalize_conf: {}
379
- required:
380
- - output_dir
381
- - token_list
382
- version: '202310'
383
- distributed: false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11/train/batch_keys DELETED
@@ -1,3 +0,0 @@
1
- text
2
- speech
3
- sids
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11/train/feats_lengths_stats.npz DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8fe5b424d637b31235fd2b5fab1dd19d7aae7171c7ba6a8652d681775beb1a09
3
- size 778
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11/train/feats_stats.npz DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b6b29e56a828f743a9e1137c738e65a6c821a5e18d1c4c8c801663798432712
3
- size 1402
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11/train/sids_shape DELETED
@@ -1,249 +0,0 @@
1
- Hoogelaandsters-2553-MoanMorn 1
2
- Hoogelaandsters-2557-MoanMorn 1
3
- Hoogelaandsters-2562-MoanMorn 1
4
- Hoogelaandsters-2567-MoanMorn 1
5
- Hoogelaandsters-2571-MoanMorn 1
6
- Hoogelaandsters-2575-MoanMorn 1
7
- Hoogelaandsters-2579-MoanMorn 1
8
- Hoogelaandsters-2583-MoanMorn 1
9
- Hoogelaandsters-2587-MoanMorn 1
10
- Hoogelaandsters-2592-MoanMorn 1
11
- Hoogelaandsters-2596-MoanMorn 1
12
- Hoogelaandsters-2600-MoanMorn 1
13
- Hoogelaandsters-2604-MoanMorn 1
14
- Hoogelaandsters-2608-MoanMorn 1
15
- Hoogelaandsters-2612-MoanMorn 1
16
- Hoogelaandsters-2616-MoanMorn 1
17
- Hoogelaandsters-2620-MoanMorn 1
18
- Hoogelaandsters-2624-MoanMorn 1
19
- Hoogelaandsters-2628-MoanMorn 1
20
- Hoogelaandsters-2633-MoanMorn 1
21
- Hoogelaandsters-2637-MoanMorn 1
22
- Hoogelaandsters-2641-MoanMorn 1
23
- Hoogelaandsters-2645-MoanMorn 1
24
- Hoogelaandsters-2649-MoanMorn 1
25
- Hoogelaandsters-2653-MoanMorn 1
26
- Hoogelaandsters-2657-MoanMorn 1
27
- Hoogelaandsters-2661-MoanMorn 1
28
- Hoogelaandsters-2665-MoanMorn 1
29
- Hoogelaandsters-2669-MoanMorn 1
30
- Hoogelaandsters-2673-MoanMorn 1
31
- Hoogelaandsters-2678-MoanMorn 1
32
- Hoogelaandsters-2682-MoanMorn 1
33
- Hoogelaandsters-2686-MoanMorn 1
34
- Hoogelaandsters-2690-MoanMorn 1
35
- Hoogelaandsters-2694-MoanMorn 1
36
- Hoogelaandsters-2699-MoanMorn 1
37
- Hoogelaandsters-2703-MoanMorn 1
38
- Hoogelaandsters-2707-MoanMorn 1
39
- Hoogelaandsters-2711-MoanMorn 1
40
- Hoogelaandsters-2715-MoanMorn 1
41
- Hoogelaandsters-2554-MoanMorn 1
42
- Hoogelaandsters-2558-MoanMorn 1
43
- Hoogelaandsters-2564-MoanMorn 1
44
- Hoogelaandsters-2568-MoanMorn 1
45
- Hoogelaandsters-2572-MoanMorn 1
46
- Hoogelaandsters-2576-MoanMorn 1
47
- Hoogelaandsters-2580-MoanMorn 1
48
- Hoogelaandsters-2584-MoanMorn 1
49
- Hoogelaandsters-2588-MoanMorn 1
50
- Hoogelaandsters-2593-MoanMorn 1
51
- Hoogelaandsters-2597-MoanMorn 1
52
- Hoogelaandsters-2601-MoanMorn 1
53
- Hoogelaandsters-2605-MoanMorn 1
54
- Hoogelaandsters-2609-MoanMorn 1
55
- Hoogelaandsters-2613-MoanMorn 1
56
- Hoogelaandsters-2617-MoanMorn 1
57
- Hoogelaandsters-2621-MoanMorn 1
58
- Hoogelaandsters-2625-MoanMorn 1
59
- Hoogelaandsters-2629-MoanMorn 1
60
- Hoogelaandsters-2634-MoanMorn 1
61
- Hoogelaandsters-2638-MoanMorn 1
62
- Hoogelaandsters-2642-MoanMorn 1
63
- Hoogelaandsters-2646-MoanMorn 1
64
- Hoogelaandsters-2650-MoanMorn 1
65
- Hoogelaandsters-2654-MoanMorn 1
66
- Hoogelaandsters-2658-MoanMorn 1
67
- Hoogelaandsters-2662-MoanMorn 1
68
- Hoogelaandsters-2666-MoanMorn 1
69
- Hoogelaandsters-2670-MoanMorn 1
70
- Hoogelaandsters-2674-MoanMorn 1
71
- Hoogelaandsters-2679-MoanMorn 1
72
- Hoogelaandsters-2683-MoanMorn 1
73
- Hoogelaandsters-2687-MoanMorn 1
74
- Hoogelaandsters-2691-MoanMorn 1
75
- Hoogelaandsters-2695-MoanMorn 1
76
- Hoogelaandsters-2700-MoanMorn 1
77
- Hoogelaandsters-2704-MoanMorn 1
78
- Hoogelaandsters-2708-MoanMorn 1
79
- Hoogelaandsters-2712-MoanMorn 1
80
- Hoogelaandsters-2716-MoanMorn 1
81
- Hoogelaandsters-2555-MoanMorn 1
82
- Hoogelaandsters-2559-MoanMorn 1
83
- Hoogelaandsters-2565-MoanMorn 1
84
- Hoogelaandsters-2569-MoanMorn 1
85
- Hoogelaandsters-2573-MoanMorn 1
86
- Hoogelaandsters-2577-MoanMorn 1
87
- Hoogelaandsters-2581-MoanMorn 1
88
- Hoogelaandsters-2585-MoanMorn 1
89
- Hoogelaandsters-2589-MoanMorn 1
90
- Hoogelaandsters-2594-MoanMorn 1
91
- Hoogelaandsters-2598-MoanMorn 1
92
- Hoogelaandsters-2602-MoanMorn 1
93
- Hoogelaandsters-2606-MoanMorn 1
94
- Hoogelaandsters-2610-MoanMorn 1
95
- Hoogelaandsters-2614-MoanMorn 1
96
- Hoogelaandsters-2618-MoanMorn 1
97
- Hoogelaandsters-2622-MoanMorn 1
98
- Hoogelaandsters-2626-MoanMorn 1
99
- Hoogelaandsters-2631-MoanMorn 1
100
- Hoogelaandsters-2635-MoanMorn 1
101
- Hoogelaandsters-2639-MoanMorn 1
102
- Hoogelaandsters-2643-MoanMorn 1
103
- Hoogelaandsters-2647-MoanMorn 1
104
- Hoogelaandsters-2651-MoanMorn 1
105
- Hoogelaandsters-2655-MoanMorn 1
106
- Hoogelaandsters-2659-MoanMorn 1
107
- Hoogelaandsters-2663-MoanMorn 1
108
- Hoogelaandsters-2667-MoanMorn 1
109
- Hoogelaandsters-2671-MoanMorn 1
110
- Hoogelaandsters-2675-MoanMorn 1
111
- Hoogelaandsters-2680-MoanMorn 1
112
- Hoogelaandsters-2684-MoanMorn 1
113
- Hoogelaandsters-2688-MoanMorn 1
114
- Hoogelaandsters-2692-MoanMorn 1
115
- Hoogelaandsters-2696-MoanMorn 1
116
- Hoogelaandsters-2701-MoanMorn 1
117
- Hoogelaandsters-2705-MoanMorn 1
118
- Hoogelaandsters-2709-MoanMorn 1
119
- Hoogelaandsters-2713-MoanMorn 1
120
- Hoogelaandsters-2717-MoanMorn 1
121
- Hoogelaandsters-2556-MoanMorn 1
122
- Hoogelaandsters-2560-MoanMorn 1
123
- Hoogelaandsters-2566-MoanMorn 1
124
- Hoogelaandsters-2570-MoanMorn 1
125
- Hoogelaandsters-2574-MoanMorn 1
126
- Hoogelaandsters-2578-MoanMorn 1
127
- Hoogelaandsters-2582-MoanMorn 1
128
- Hoogelaandsters-2586-MoanMorn 1
129
- Hoogelaandsters-2590-MoanMorn 1
130
- Hoogelaandsters-2595-MoanMorn 1
131
- Hoogelaandsters-2599-MoanMorn 1
132
- Hoogelaandsters-2603-MoanMorn 1
133
- Hoogelaandsters-2607-MoanMorn 1
134
- Hoogelaandsters-2611-MoanMorn 1
135
- Hoogelaandsters-2615-MoanMorn 1
136
- Hoogelaandsters-2619-MoanMorn 1
137
- Hoogelaandsters-2623-MoanMorn 1
138
- Hoogelaandsters-2627-MoanMorn 1
139
- Hoogelaandsters-2632-MoanMorn 1
140
- Hoogelaandsters-2636-MoanMorn 1
141
- Hoogelaandsters-2640-MoanMorn 1
142
- Hoogelaandsters-2644-MoanMorn 1
143
- Hoogelaandsters-2648-MoanMorn 1
144
- Hoogelaandsters-2652-MoanMorn 1
145
- Hoogelaandsters-2656-MoanMorn 1
146
- Hoogelaandsters-2660-MoanMorn 1
147
- Hoogelaandsters-2664-MoanMorn 1
148
- Hoogelaandsters-2668-MoanMorn 1
149
- Hoogelaandsters-2672-MoanMorn 1
150
- Hoogelaandsters-2677-MoanMorn 1
151
- Hoogelaandsters-2681-MoanMorn 1
152
- Hoogelaandsters-2685-MoanMorn 1
153
- Hoogelaandsters-2689-MoanMorn 1
154
- Hoogelaandsters-2693-MoanMorn 1
155
- Hoogelaandsters-2698-MoanMorn 1
156
- Hoogelaandsters-2702-MoanMorn 1
157
- Hoogelaandsters-2706-MoanMorn 1
158
- Hoogelaandsters-2710-MoanMorn 1
159
- Hoogelaandsters-2714-MoanMorn 1
160
- Hoogelaandsters-2718-MoanMorn 1
161
- Hoogelaandsters-2719-MoanMorn 1
162
- Hoogelaandsters-2723-MoanMorn 1
163
- Hoogelaandsters-2727-MoanMorn 1
164
- Hoogelaandsters-2731-MoanMorn 1
165
- Hoogelaandsters-2735-MoanMorn 1
166
- Hoogelaandsters-2740-MoanMorn 1
167
- Hoogelaandsters-2744-MoanMorn 1
168
- Hoogelaandsters-2748-MoanMorn 1
169
- Hoogelaandsters-2752-MoanMorn 1
170
- Hoogelaandsters-2756-MoanMorn 1
171
- Hoogelaandsters-2760-MoanMorn 1
172
- Hoogelaandsters-2765-MoanMorn 1
173
- Hoogelaandsters-2769-MoanMorn 1
174
- Hoogelaandsters-2773-MoanMorn 1
175
- Hoogelaandsters-2777-MoanMorn 1
176
- Hoogelaandsters-2781-MoanMorn 1
177
- Hoogelaandsters-2785-MoanMorn 1
178
- Hoogelaandsters-2789-MoanMorn 1
179
- Hoogelaandsters-2793-MoanMorn 1
180
- Hoogelaandsters-2797-MoanMorn 1
181
- Hoogelaandsters-2802-MoanMorn 1
182
- Hoogelaandsters-2810-MoanMorn 1
183
- Hoogelaandsters-2814-MoanMorn 1
184
- Hoogelaandsters-2720-MoanMorn 1
185
- Hoogelaandsters-2724-MoanMorn 1
186
- Hoogelaandsters-2728-MoanMorn 1
187
- Hoogelaandsters-2732-MoanMorn 1
188
- Hoogelaandsters-2736-MoanMorn 1
189
- Hoogelaandsters-2741-MoanMorn 1
190
- Hoogelaandsters-2745-MoanMorn 1
191
- Hoogelaandsters-2749-MoanMorn 1
192
- Hoogelaandsters-2753-MoanMorn 1
193
- Hoogelaandsters-2757-MoanMorn 1
194
- Hoogelaandsters-2761-MoanMorn 1
195
- Hoogelaandsters-2766-MoanMorn 1
196
- Hoogelaandsters-2770-MoanMorn 1
197
- Hoogelaandsters-2774-MoanMorn 1
198
- Hoogelaandsters-2778-MoanMorn 1
199
- Hoogelaandsters-2782-MoanMorn 1
200
- Hoogelaandsters-2786-MoanMorn 1
201
- Hoogelaandsters-2790-MoanMorn 1
202
- Hoogelaandsters-2794-MoanMorn 1
203
- Hoogelaandsters-2799-MoanMorn 1
204
- Hoogelaandsters-2807-MoanMorn 1
205
- Hoogelaandsters-2811-MoanMorn 1
206
- Hoogelaandsters-2721-MoanMorn 1
207
- Hoogelaandsters-2725-MoanMorn 1
208
- Hoogelaandsters-2729-MoanMorn 1
209
- Hoogelaandsters-2733-MoanMorn 1
210
- Hoogelaandsters-2737-MoanMorn 1
211
- Hoogelaandsters-2742-MoanMorn 1
212
- Hoogelaandsters-2746-MoanMorn 1
213
- Hoogelaandsters-2750-MoanMorn 1
214
- Hoogelaandsters-2754-MoanMorn 1
215
- Hoogelaandsters-2758-MoanMorn 1
216
- Hoogelaandsters-2762-MoanMorn 1
217
- Hoogelaandsters-2767-MoanMorn 1
218
- Hoogelaandsters-2771-MoanMorn 1
219
- Hoogelaandsters-2775-MoanMorn 1
220
- Hoogelaandsters-2779-MoanMorn 1
221
- Hoogelaandsters-2783-MoanMorn 1
222
- Hoogelaandsters-2787-MoanMorn 1
223
- Hoogelaandsters-2791-MoanMorn 1
224
- Hoogelaandsters-2795-MoanMorn 1
225
- Hoogelaandsters-2800-MoanMorn 1
226
- Hoogelaandsters-2808-MoanMorn 1
227
- Hoogelaandsters-2812-MoanMorn 1
228
- Hoogelaandsters-2722-MoanMorn 1
229
- Hoogelaandsters-2726-MoanMorn 1
230
- Hoogelaandsters-2730-MoanMorn 1
231
- Hoogelaandsters-2734-MoanMorn 1
232
- Hoogelaandsters-2738-MoanMorn 1
233
- Hoogelaandsters-2743-MoanMorn 1
234
- Hoogelaandsters-2747-MoanMorn 1
235
- Hoogelaandsters-2751-MoanMorn 1
236
- Hoogelaandsters-2755-MoanMorn 1
237
- Hoogelaandsters-2759-MoanMorn 1
238
- Hoogelaandsters-2763-MoanMorn 1
239
- Hoogelaandsters-2768-MoanMorn 1
240
- Hoogelaandsters-2772-MoanMorn 1
241
- Hoogelaandsters-2776-MoanMorn 1
242
- Hoogelaandsters-2780-MoanMorn 1
243
- Hoogelaandsters-2784-MoanMorn 1
244
- Hoogelaandsters-2788-MoanMorn 1
245
- Hoogelaandsters-2792-MoanMorn 1
246
- Hoogelaandsters-2796-MoanMorn 1
247
- Hoogelaandsters-2801-MoanMorn 1
248
- Hoogelaandsters-2809-MoanMorn 1
249
- Hoogelaandsters-2813-MoanMorn 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11/train/speech_shape DELETED
@@ -1,249 +0,0 @@
1
- Hoogelaandsters-2553-MoanMorn 102968
2
- Hoogelaandsters-2557-MoanMorn 132784
3
- Hoogelaandsters-2562-MoanMorn 45382
4
- Hoogelaandsters-2567-MoanMorn 179756
5
- Hoogelaandsters-2571-MoanMorn 154171
6
- Hoogelaandsters-2575-MoanMorn 226928
7
- Hoogelaandsters-2579-MoanMorn 396289
8
- Hoogelaandsters-2583-MoanMorn 57484
9
- Hoogelaandsters-2587-MoanMorn 63091
10
- Hoogelaandsters-2592-MoanMorn 281058
11
- Hoogelaandsters-2596-MoanMorn 399175
12
- Hoogelaandsters-2600-MoanMorn 339081
13
- Hoogelaandsters-2604-MoanMorn 98893
14
- Hoogelaandsters-2608-MoanMorn 65152
15
- Hoogelaandsters-2612-MoanMorn 57739
16
- Hoogelaandsters-2616-MoanMorn 143051
17
- Hoogelaandsters-2620-MoanMorn 151777
18
- Hoogelaandsters-2624-MoanMorn 186646
19
- Hoogelaandsters-2628-MoanMorn 159049
20
- Hoogelaandsters-2633-MoanMorn 205382
21
- Hoogelaandsters-2637-MoanMorn 95216
22
- Hoogelaandsters-2641-MoanMorn 194325
23
- Hoogelaandsters-2645-MoanMorn 105721
24
- Hoogelaandsters-2649-MoanMorn 329583
25
- Hoogelaandsters-2653-MoanMorn 83826
26
- Hoogelaandsters-2657-MoanMorn 64112
27
- Hoogelaandsters-2661-MoanMorn 111091
28
- Hoogelaandsters-2665-MoanMorn 40805
29
- Hoogelaandsters-2669-MoanMorn 179279
30
- Hoogelaandsters-2673-MoanMorn 102519
31
- Hoogelaandsters-2678-MoanMorn 104510
32
- Hoogelaandsters-2682-MoanMorn 36691
33
- Hoogelaandsters-2686-MoanMorn 381632
34
- Hoogelaandsters-2690-MoanMorn 121206
35
- Hoogelaandsters-2694-MoanMorn 112389
36
- Hoogelaandsters-2699-MoanMorn 65936
37
- Hoogelaandsters-2703-MoanMorn 121981
38
- Hoogelaandsters-2707-MoanMorn 141227
39
- Hoogelaandsters-2711-MoanMorn 165911
40
- Hoogelaandsters-2715-MoanMorn 344118
41
- Hoogelaandsters-2554-MoanMorn 110765
42
- Hoogelaandsters-2558-MoanMorn 82847
43
- Hoogelaandsters-2564-MoanMorn 59270
44
- Hoogelaandsters-2568-MoanMorn 189950
45
- Hoogelaandsters-2572-MoanMorn 147342
46
- Hoogelaandsters-2576-MoanMorn 220274
47
- Hoogelaandsters-2580-MoanMorn 354925
48
- Hoogelaandsters-2584-MoanMorn 142341
49
- Hoogelaandsters-2588-MoanMorn 127062
50
- Hoogelaandsters-2593-MoanMorn 137165
51
- Hoogelaandsters-2597-MoanMorn 396984
52
- Hoogelaandsters-2601-MoanMorn 89865
53
- Hoogelaandsters-2605-MoanMorn 70560
54
- Hoogelaandsters-2609-MoanMorn 216938
55
- Hoogelaandsters-2613-MoanMorn 140629
56
- Hoogelaandsters-2617-MoanMorn 120813
57
- Hoogelaandsters-2621-MoanMorn 92039
58
- Hoogelaandsters-2625-MoanMorn 81238
59
- Hoogelaandsters-2629-MoanMorn 64944
60
- Hoogelaandsters-2634-MoanMorn 111251
61
- Hoogelaandsters-2638-MoanMorn 167512
62
- Hoogelaandsters-2642-MoanMorn 367904
63
- Hoogelaandsters-2646-MoanMorn 46324
64
- Hoogelaandsters-2650-MoanMorn 79285
65
- Hoogelaandsters-2654-MoanMorn 47784
66
- Hoogelaandsters-2658-MoanMorn 297483
67
- Hoogelaandsters-2662-MoanMorn 37814
68
- Hoogelaandsters-2666-MoanMorn 162690
69
- Hoogelaandsters-2670-MoanMorn 103191
70
- Hoogelaandsters-2674-MoanMorn 92654
71
- Hoogelaandsters-2679-MoanMorn 197560
72
- Hoogelaandsters-2683-MoanMorn 87616
73
- Hoogelaandsters-2687-MoanMorn 100706
74
- Hoogelaandsters-2691-MoanMorn 102971
75
- Hoogelaandsters-2695-MoanMorn 52497
76
- Hoogelaandsters-2700-MoanMorn 160658
77
- Hoogelaandsters-2704-MoanMorn 129668
78
- Hoogelaandsters-2708-MoanMorn 128206
79
- Hoogelaandsters-2712-MoanMorn 88044
80
- Hoogelaandsters-2716-MoanMorn 60322
81
- Hoogelaandsters-2555-MoanMorn 287540
82
- Hoogelaandsters-2559-MoanMorn 330822
83
- Hoogelaandsters-2565-MoanMorn 258697
84
- Hoogelaandsters-2569-MoanMorn 122475
85
- Hoogelaandsters-2573-MoanMorn 280673
86
- Hoogelaandsters-2577-MoanMorn 144473
87
- Hoogelaandsters-2581-MoanMorn 141996
88
- Hoogelaandsters-2585-MoanMorn 212081
89
- Hoogelaandsters-2589-MoanMorn 185519
90
- Hoogelaandsters-2594-MoanMorn 50803
91
- Hoogelaandsters-2598-MoanMorn 149919
92
- Hoogelaandsters-2602-MoanMorn 38852
93
- Hoogelaandsters-2606-MoanMorn 120520
94
- Hoogelaandsters-2610-MoanMorn 154787
95
- Hoogelaandsters-2614-MoanMorn 68877
96
- Hoogelaandsters-2618-MoanMorn 152273
97
- Hoogelaandsters-2622-MoanMorn 110079
98
- Hoogelaandsters-2626-MoanMorn 82118
99
- Hoogelaandsters-2631-MoanMorn 197675
100
- Hoogelaandsters-2635-MoanMorn 165061
101
- Hoogelaandsters-2639-MoanMorn 169140
102
- Hoogelaandsters-2643-MoanMorn 38432
103
- Hoogelaandsters-2647-MoanMorn 230497
104
- Hoogelaandsters-2651-MoanMorn 155389
105
- Hoogelaandsters-2655-MoanMorn 265827
106
- Hoogelaandsters-2659-MoanMorn 156964
107
- Hoogelaandsters-2663-MoanMorn 118006
108
- Hoogelaandsters-2667-MoanMorn 180077
109
- Hoogelaandsters-2671-MoanMorn 187349
110
- Hoogelaandsters-2675-MoanMorn 100155
111
- Hoogelaandsters-2680-MoanMorn 108959
112
- Hoogelaandsters-2684-MoanMorn 129830
113
- Hoogelaandsters-2688-MoanMorn 87494
114
- Hoogelaandsters-2692-MoanMorn 139736
115
- Hoogelaandsters-2696-MoanMorn 122070
116
- Hoogelaandsters-2701-MoanMorn 319907
117
- Hoogelaandsters-2705-MoanMorn 89592
118
- Hoogelaandsters-2709-MoanMorn 81790
119
- Hoogelaandsters-2713-MoanMorn 62093
120
- Hoogelaandsters-2717-MoanMorn 105123
121
- Hoogelaandsters-2556-MoanMorn 115423
122
- Hoogelaandsters-2560-MoanMorn 82091
123
- Hoogelaandsters-2566-MoanMorn 163000
124
- Hoogelaandsters-2570-MoanMorn 274954
125
- Hoogelaandsters-2574-MoanMorn 158271
126
- Hoogelaandsters-2578-MoanMorn 98639
127
- Hoogelaandsters-2582-MoanMorn 108427
128
- Hoogelaandsters-2586-MoanMorn 130138
129
- Hoogelaandsters-2590-MoanMorn 210595
130
- Hoogelaandsters-2595-MoanMorn 385675
131
- Hoogelaandsters-2599-MoanMorn 159944
132
- Hoogelaandsters-2603-MoanMorn 71226
133
- Hoogelaandsters-2607-MoanMorn 154605
134
- Hoogelaandsters-2611-MoanMorn 306866
135
- Hoogelaandsters-2615-MoanMorn 103504
136
- Hoogelaandsters-2619-MoanMorn 119682
137
- Hoogelaandsters-2623-MoanMorn 195466
138
- Hoogelaandsters-2627-MoanMorn 81931
139
- Hoogelaandsters-2632-MoanMorn 230483
140
- Hoogelaandsters-2636-MoanMorn 175105
141
- Hoogelaandsters-2640-MoanMorn 52143
142
- Hoogelaandsters-2644-MoanMorn 81376
143
- Hoogelaandsters-2648-MoanMorn 311615
144
- Hoogelaandsters-2652-MoanMorn 66501
145
- Hoogelaandsters-2656-MoanMorn 110732
146
- Hoogelaandsters-2660-MoanMorn 218546
147
- Hoogelaandsters-2664-MoanMorn 434104
148
- Hoogelaandsters-2668-MoanMorn 234849
149
- Hoogelaandsters-2672-MoanMorn 67330
150
- Hoogelaandsters-2677-MoanMorn 217662
151
- Hoogelaandsters-2681-MoanMorn 96112
152
- Hoogelaandsters-2685-MoanMorn 242708
153
- Hoogelaandsters-2689-MoanMorn 114105
154
- Hoogelaandsters-2693-MoanMorn 173509
155
- Hoogelaandsters-2698-MoanMorn 255138
156
- Hoogelaandsters-2702-MoanMorn 182133
157
- Hoogelaandsters-2706-MoanMorn 92569
158
- Hoogelaandsters-2710-MoanMorn 52726
159
- Hoogelaandsters-2714-MoanMorn 119379
160
- Hoogelaandsters-2718-MoanMorn 249613
161
- Hoogelaandsters-2719-MoanMorn 207030
162
- Hoogelaandsters-2723-MoanMorn 164027
163
- Hoogelaandsters-2727-MoanMorn 131255
164
- Hoogelaandsters-2731-MoanMorn 138091
165
- Hoogelaandsters-2735-MoanMorn 131698
166
- Hoogelaandsters-2740-MoanMorn 261244
167
- Hoogelaandsters-2744-MoanMorn 219641
168
- Hoogelaandsters-2748-MoanMorn 357717
169
- Hoogelaandsters-2752-MoanMorn 167116
170
- Hoogelaandsters-2756-MoanMorn 129072
171
- Hoogelaandsters-2760-MoanMorn 69985
172
- Hoogelaandsters-2765-MoanMorn 365140
173
- Hoogelaandsters-2769-MoanMorn 171180
174
- Hoogelaandsters-2773-MoanMorn 145536
175
- Hoogelaandsters-2777-MoanMorn 303805
176
- Hoogelaandsters-2781-MoanMorn 341668
177
- Hoogelaandsters-2785-MoanMorn 208293
178
- Hoogelaandsters-2789-MoanMorn 225033
179
- Hoogelaandsters-2793-MoanMorn 189487
180
- Hoogelaandsters-2797-MoanMorn 243739
181
- Hoogelaandsters-2802-MoanMorn 95392
182
- Hoogelaandsters-2810-MoanMorn 158738
183
- Hoogelaandsters-2814-MoanMorn 120438
184
- Hoogelaandsters-2720-MoanMorn 85956
185
- Hoogelaandsters-2724-MoanMorn 170079
186
- Hoogelaandsters-2728-MoanMorn 299268
187
- Hoogelaandsters-2732-MoanMorn 285525
188
- Hoogelaandsters-2736-MoanMorn 110971
189
- Hoogelaandsters-2741-MoanMorn 83711
190
- Hoogelaandsters-2745-MoanMorn 144678
191
- Hoogelaandsters-2749-MoanMorn 67738
192
- Hoogelaandsters-2753-MoanMorn 196508
193
- Hoogelaandsters-2757-MoanMorn 215266
194
- Hoogelaandsters-2761-MoanMorn 75337
195
- Hoogelaandsters-2766-MoanMorn 64602
196
- Hoogelaandsters-2770-MoanMorn 395729
197
- Hoogelaandsters-2774-MoanMorn 49597
198
- Hoogelaandsters-2778-MoanMorn 131446
199
- Hoogelaandsters-2782-MoanMorn 61127
200
- Hoogelaandsters-2786-MoanMorn 254529
201
- Hoogelaandsters-2790-MoanMorn 126016
202
- Hoogelaandsters-2794-MoanMorn 94688
203
- Hoogelaandsters-2799-MoanMorn 30315
204
- Hoogelaandsters-2807-MoanMorn 40291
205
- Hoogelaandsters-2811-MoanMorn 103372
206
- Hoogelaandsters-2721-MoanMorn 70010
207
- Hoogelaandsters-2725-MoanMorn 142302
208
- Hoogelaandsters-2729-MoanMorn 218066
209
- Hoogelaandsters-2733-MoanMorn 113371
210
- Hoogelaandsters-2737-MoanMorn 51787
211
- Hoogelaandsters-2742-MoanMorn 115021
212
- Hoogelaandsters-2746-MoanMorn 102278
213
- Hoogelaandsters-2750-MoanMorn 113960
214
- Hoogelaandsters-2754-MoanMorn 127820
215
- Hoogelaandsters-2758-MoanMorn 132424
216
- Hoogelaandsters-2762-MoanMorn 267801
217
- Hoogelaandsters-2767-MoanMorn 175120
218
- Hoogelaandsters-2771-MoanMorn 190916
219
- Hoogelaandsters-2775-MoanMorn 220794
220
- Hoogelaandsters-2779-MoanMorn 53626
221
- Hoogelaandsters-2783-MoanMorn 90153
222
- Hoogelaandsters-2787-MoanMorn 130704
223
- Hoogelaandsters-2791-MoanMorn 47366
224
- Hoogelaandsters-2795-MoanMorn 255622
225
- Hoogelaandsters-2800-MoanMorn 81850
226
- Hoogelaandsters-2808-MoanMorn 80081
227
- Hoogelaandsters-2812-MoanMorn 73333
228
- Hoogelaandsters-2722-MoanMorn 199048
229
- Hoogelaandsters-2726-MoanMorn 68327
230
- Hoogelaandsters-2730-MoanMorn 103049
231
- Hoogelaandsters-2734-MoanMorn 55949
232
- Hoogelaandsters-2738-MoanMorn 168007
233
- Hoogelaandsters-2743-MoanMorn 101916
234
- Hoogelaandsters-2747-MoanMorn 235788
235
- Hoogelaandsters-2751-MoanMorn 63234
236
- Hoogelaandsters-2755-MoanMorn 268211
237
- Hoogelaandsters-2759-MoanMorn 121221
238
- Hoogelaandsters-2763-MoanMorn 300360
239
- Hoogelaandsters-2768-MoanMorn 96182
240
- Hoogelaandsters-2772-MoanMorn 131793
241
- Hoogelaandsters-2776-MoanMorn 179261
242
- Hoogelaandsters-2780-MoanMorn 306314
243
- Hoogelaandsters-2784-MoanMorn 347307
244
- Hoogelaandsters-2788-MoanMorn 136168
245
- Hoogelaandsters-2792-MoanMorn 168299
246
- Hoogelaandsters-2796-MoanMorn 124545
247
- Hoogelaandsters-2801-MoanMorn 102298
248
- Hoogelaandsters-2809-MoanMorn 168456
249
- Hoogelaandsters-2813-MoanMorn 101606
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11/train/stats_keys DELETED
@@ -1,2 +0,0 @@
1
- feats
2
- feats_lengths
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11/train/text_shape DELETED
@@ -1,249 +0,0 @@
1
- Hoogelaandsters-2553-MoanMorn 58
2
- Hoogelaandsters-2557-MoanMorn 77
3
- Hoogelaandsters-2562-MoanMorn 23
4
- Hoogelaandsters-2567-MoanMorn 106
5
- Hoogelaandsters-2571-MoanMorn 85
6
- Hoogelaandsters-2575-MoanMorn 124
7
- Hoogelaandsters-2579-MoanMorn 244
8
- Hoogelaandsters-2583-MoanMorn 34
9
- Hoogelaandsters-2587-MoanMorn 25
10
- Hoogelaandsters-2592-MoanMorn 176
11
- Hoogelaandsters-2596-MoanMorn 254
12
- Hoogelaandsters-2600-MoanMorn 230
13
- Hoogelaandsters-2604-MoanMorn 43
14
- Hoogelaandsters-2608-MoanMorn 22
15
- Hoogelaandsters-2612-MoanMorn 24
16
- Hoogelaandsters-2616-MoanMorn 80
17
- Hoogelaandsters-2620-MoanMorn 101
18
- Hoogelaandsters-2624-MoanMorn 108
19
- Hoogelaandsters-2628-MoanMorn 103
20
- Hoogelaandsters-2633-MoanMorn 147
21
- Hoogelaandsters-2637-MoanMorn 56
22
- Hoogelaandsters-2641-MoanMorn 116
23
- Hoogelaandsters-2645-MoanMorn 62
24
- Hoogelaandsters-2649-MoanMorn 200
25
- Hoogelaandsters-2653-MoanMorn 53
26
- Hoogelaandsters-2657-MoanMorn 20
27
- Hoogelaandsters-2661-MoanMorn 58
28
- Hoogelaandsters-2665-MoanMorn 27
29
- Hoogelaandsters-2669-MoanMorn 122
30
- Hoogelaandsters-2673-MoanMorn 64
31
- Hoogelaandsters-2678-MoanMorn 73
32
- Hoogelaandsters-2682-MoanMorn 20
33
- Hoogelaandsters-2686-MoanMorn 244
34
- Hoogelaandsters-2690-MoanMorn 66
35
- Hoogelaandsters-2694-MoanMorn 79
36
- Hoogelaandsters-2699-MoanMorn 38
37
- Hoogelaandsters-2703-MoanMorn 62
38
- Hoogelaandsters-2707-MoanMorn 74
39
- Hoogelaandsters-2711-MoanMorn 73
40
- Hoogelaandsters-2715-MoanMorn 193
41
- Hoogelaandsters-2554-MoanMorn 57
42
- Hoogelaandsters-2558-MoanMorn 46
43
- Hoogelaandsters-2564-MoanMorn 29
44
- Hoogelaandsters-2568-MoanMorn 107
45
- Hoogelaandsters-2572-MoanMorn 78
46
- Hoogelaandsters-2576-MoanMorn 137
47
- Hoogelaandsters-2580-MoanMorn 227
48
- Hoogelaandsters-2584-MoanMorn 84
49
- Hoogelaandsters-2588-MoanMorn 96
50
- Hoogelaandsters-2593-MoanMorn 97
51
- Hoogelaandsters-2597-MoanMorn 252
52
- Hoogelaandsters-2601-MoanMorn 51
53
- Hoogelaandsters-2605-MoanMorn 46
54
- Hoogelaandsters-2609-MoanMorn 165
55
- Hoogelaandsters-2613-MoanMorn 89
56
- Hoogelaandsters-2617-MoanMorn 66
57
- Hoogelaandsters-2621-MoanMorn 53
58
- Hoogelaandsters-2625-MoanMorn 48
59
- Hoogelaandsters-2629-MoanMorn 39
60
- Hoogelaandsters-2634-MoanMorn 65
61
- Hoogelaandsters-2638-MoanMorn 78
62
- Hoogelaandsters-2642-MoanMorn 219
63
- Hoogelaandsters-2646-MoanMorn 31
64
- Hoogelaandsters-2650-MoanMorn 23
65
- Hoogelaandsters-2654-MoanMorn 25
66
- Hoogelaandsters-2658-MoanMorn 183
67
- Hoogelaandsters-2662-MoanMorn 15
68
- Hoogelaandsters-2666-MoanMorn 82
69
- Hoogelaandsters-2670-MoanMorn 54
70
- Hoogelaandsters-2674-MoanMorn 45
71
- Hoogelaandsters-2679-MoanMorn 115
72
- Hoogelaandsters-2683-MoanMorn 48
73
- Hoogelaandsters-2687-MoanMorn 52
74
- Hoogelaandsters-2691-MoanMorn 66
75
- Hoogelaandsters-2695-MoanMorn 32
76
- Hoogelaandsters-2700-MoanMorn 90
77
- Hoogelaandsters-2704-MoanMorn 86
78
- Hoogelaandsters-2708-MoanMorn 85
79
- Hoogelaandsters-2712-MoanMorn 52
80
- Hoogelaandsters-2716-MoanMorn 34
81
- Hoogelaandsters-2555-MoanMorn 158
82
- Hoogelaandsters-2559-MoanMorn 213
83
- Hoogelaandsters-2565-MoanMorn 154
84
- Hoogelaandsters-2569-MoanMorn 70
85
- Hoogelaandsters-2573-MoanMorn 159
86
- Hoogelaandsters-2577-MoanMorn 77
87
- Hoogelaandsters-2581-MoanMorn 82
88
- Hoogelaandsters-2585-MoanMorn 123
89
- Hoogelaandsters-2589-MoanMorn 125
90
- Hoogelaandsters-2594-MoanMorn 36
91
- Hoogelaandsters-2598-MoanMorn 97
92
- Hoogelaandsters-2602-MoanMorn 15
93
- Hoogelaandsters-2606-MoanMorn 79
94
- Hoogelaandsters-2610-MoanMorn 86
95
- Hoogelaandsters-2614-MoanMorn 35
96
- Hoogelaandsters-2618-MoanMorn 98
97
- Hoogelaandsters-2622-MoanMorn 61
98
- Hoogelaandsters-2626-MoanMorn 57
99
- Hoogelaandsters-2631-MoanMorn 127
100
- Hoogelaandsters-2635-MoanMorn 114
101
- Hoogelaandsters-2639-MoanMorn 85
102
- Hoogelaandsters-2643-MoanMorn 16
103
- Hoogelaandsters-2647-MoanMorn 139
104
- Hoogelaandsters-2651-MoanMorn 87
105
- Hoogelaandsters-2655-MoanMorn 152
106
- Hoogelaandsters-2659-MoanMorn 103
107
- Hoogelaandsters-2663-MoanMorn 60
108
- Hoogelaandsters-2667-MoanMorn 128
109
- Hoogelaandsters-2671-MoanMorn 104
110
- Hoogelaandsters-2675-MoanMorn 52
111
- Hoogelaandsters-2680-MoanMorn 69
112
- Hoogelaandsters-2684-MoanMorn 94
113
- Hoogelaandsters-2688-MoanMorn 53
114
- Hoogelaandsters-2692-MoanMorn 75
115
- Hoogelaandsters-2696-MoanMorn 64
116
- Hoogelaandsters-2701-MoanMorn 184
117
- Hoogelaandsters-2705-MoanMorn 58
118
- Hoogelaandsters-2709-MoanMorn 49
119
- Hoogelaandsters-2713-MoanMorn 36
120
- Hoogelaandsters-2717-MoanMorn 60
121
- Hoogelaandsters-2556-MoanMorn 61
122
- Hoogelaandsters-2560-MoanMorn 40
123
- Hoogelaandsters-2566-MoanMorn 91
124
- Hoogelaandsters-2570-MoanMorn 138
125
- Hoogelaandsters-2574-MoanMorn 85
126
- Hoogelaandsters-2578-MoanMorn 53
127
- Hoogelaandsters-2582-MoanMorn 74
128
- Hoogelaandsters-2586-MoanMorn 67
129
- Hoogelaandsters-2590-MoanMorn 151
130
- Hoogelaandsters-2595-MoanMorn 267
131
- Hoogelaandsters-2599-MoanMorn 104
132
- Hoogelaandsters-2603-MoanMorn 39
133
- Hoogelaandsters-2607-MoanMorn 86
134
- Hoogelaandsters-2611-MoanMorn 193
135
- Hoogelaandsters-2615-MoanMorn 73
136
- Hoogelaandsters-2619-MoanMorn 78
137
- Hoogelaandsters-2623-MoanMorn 113
138
- Hoogelaandsters-2627-MoanMorn 40
139
- Hoogelaandsters-2632-MoanMorn 153
140
- Hoogelaandsters-2636-MoanMorn 123
141
- Hoogelaandsters-2640-MoanMorn 32
142
- Hoogelaandsters-2644-MoanMorn 48
143
- Hoogelaandsters-2648-MoanMorn 171
144
- Hoogelaandsters-2652-MoanMorn 27
145
- Hoogelaandsters-2656-MoanMorn 80
146
- Hoogelaandsters-2660-MoanMorn 126
147
- Hoogelaandsters-2664-MoanMorn 282
148
- Hoogelaandsters-2668-MoanMorn 143
149
- Hoogelaandsters-2672-MoanMorn 35
150
- Hoogelaandsters-2677-MoanMorn 150
151
- Hoogelaandsters-2681-MoanMorn 72
152
- Hoogelaandsters-2685-MoanMorn 142
153
- Hoogelaandsters-2689-MoanMorn 76
154
- Hoogelaandsters-2693-MoanMorn 115
155
- Hoogelaandsters-2698-MoanMorn 145
156
- Hoogelaandsters-2702-MoanMorn 108
157
- Hoogelaandsters-2706-MoanMorn 56
158
- Hoogelaandsters-2710-MoanMorn 34
159
- Hoogelaandsters-2714-MoanMorn 63
160
- Hoogelaandsters-2718-MoanMorn 164
161
- Hoogelaandsters-2719-MoanMorn 129
162
- Hoogelaandsters-2723-MoanMorn 125
163
- Hoogelaandsters-2727-MoanMorn 93
164
- Hoogelaandsters-2731-MoanMorn 89
165
- Hoogelaandsters-2735-MoanMorn 86
166
- Hoogelaandsters-2740-MoanMorn 151
167
- Hoogelaandsters-2744-MoanMorn 153
168
- Hoogelaandsters-2748-MoanMorn 197
169
- Hoogelaandsters-2752-MoanMorn 102
170
- Hoogelaandsters-2756-MoanMorn 75
171
- Hoogelaandsters-2760-MoanMorn 33
172
- Hoogelaandsters-2765-MoanMorn 257
173
- Hoogelaandsters-2769-MoanMorn 86
174
- Hoogelaandsters-2773-MoanMorn 85
175
- Hoogelaandsters-2777-MoanMorn 169
176
- Hoogelaandsters-2781-MoanMorn 244
177
- Hoogelaandsters-2785-MoanMorn 133
178
- Hoogelaandsters-2789-MoanMorn 132
179
- Hoogelaandsters-2793-MoanMorn 94
180
- Hoogelaandsters-2797-MoanMorn 151
181
- Hoogelaandsters-2802-MoanMorn 45
182
- Hoogelaandsters-2810-MoanMorn 105
183
- Hoogelaandsters-2814-MoanMorn 61
184
- Hoogelaandsters-2720-MoanMorn 54
185
- Hoogelaandsters-2724-MoanMorn 95
186
- Hoogelaandsters-2728-MoanMorn 195
187
- Hoogelaandsters-2732-MoanMorn 171
188
- Hoogelaandsters-2736-MoanMorn 64
189
- Hoogelaandsters-2741-MoanMorn 52
190
- Hoogelaandsters-2745-MoanMorn 89
191
- Hoogelaandsters-2749-MoanMorn 37
192
- Hoogelaandsters-2753-MoanMorn 108
193
- Hoogelaandsters-2757-MoanMorn 118
194
- Hoogelaandsters-2761-MoanMorn 44
195
- Hoogelaandsters-2766-MoanMorn 32
196
- Hoogelaandsters-2770-MoanMorn 252
197
- Hoogelaandsters-2774-MoanMorn 26
198
- Hoogelaandsters-2778-MoanMorn 81
199
- Hoogelaandsters-2782-MoanMorn 29
200
- Hoogelaandsters-2786-MoanMorn 144
201
- Hoogelaandsters-2790-MoanMorn 74
202
- Hoogelaandsters-2794-MoanMorn 46
203
- Hoogelaandsters-2799-MoanMorn 17
204
- Hoogelaandsters-2807-MoanMorn 24
205
- Hoogelaandsters-2811-MoanMorn 60
206
- Hoogelaandsters-2721-MoanMorn 43
207
- Hoogelaandsters-2725-MoanMorn 112
208
- Hoogelaandsters-2729-MoanMorn 143
209
- Hoogelaandsters-2733-MoanMorn 84
210
- Hoogelaandsters-2737-MoanMorn 36
211
- Hoogelaandsters-2742-MoanMorn 64
212
- Hoogelaandsters-2746-MoanMorn 65
213
- Hoogelaandsters-2750-MoanMorn 59
214
- Hoogelaandsters-2754-MoanMorn 74
215
- Hoogelaandsters-2758-MoanMorn 72
216
- Hoogelaandsters-2762-MoanMorn 160
217
- Hoogelaandsters-2767-MoanMorn 108
218
- Hoogelaandsters-2771-MoanMorn 102
219
- Hoogelaandsters-2775-MoanMorn 126
220
- Hoogelaandsters-2779-MoanMorn 35
221
- Hoogelaandsters-2783-MoanMorn 48
222
- Hoogelaandsters-2787-MoanMorn 83
223
- Hoogelaandsters-2791-MoanMorn 21
224
- Hoogelaandsters-2795-MoanMorn 178
225
- Hoogelaandsters-2800-MoanMorn 43
226
- Hoogelaandsters-2808-MoanMorn 49
227
- Hoogelaandsters-2812-MoanMorn 40
228
- Hoogelaandsters-2722-MoanMorn 123
229
- Hoogelaandsters-2726-MoanMorn 46
230
- Hoogelaandsters-2730-MoanMorn 65
231
- Hoogelaandsters-2734-MoanMorn 29
232
- Hoogelaandsters-2738-MoanMorn 102
233
- Hoogelaandsters-2743-MoanMorn 63
234
- Hoogelaandsters-2747-MoanMorn 140
235
- Hoogelaandsters-2751-MoanMorn 34
236
- Hoogelaandsters-2755-MoanMorn 170
237
- Hoogelaandsters-2759-MoanMorn 89
238
- Hoogelaandsters-2763-MoanMorn 170
239
- Hoogelaandsters-2768-MoanMorn 57
240
- Hoogelaandsters-2772-MoanMorn 82
241
- Hoogelaandsters-2776-MoanMorn 108
242
- Hoogelaandsters-2780-MoanMorn 203
243
- Hoogelaandsters-2784-MoanMorn 219
244
- Hoogelaandsters-2788-MoanMorn 86
245
- Hoogelaandsters-2792-MoanMorn 115
246
- Hoogelaandsters-2796-MoanMorn 72
247
- Hoogelaandsters-2801-MoanMorn 75
248
- Hoogelaandsters-2809-MoanMorn 82
249
- Hoogelaandsters-2813-MoanMorn 56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11/valid/batch_keys DELETED
@@ -1,3 +0,0 @@
1
- text
2
- speech
3
- sids
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11/valid/feats_lengths_stats.npz DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d91f08590d0800f915c76c97f826494001a888019e15e8cb5906b40df85e42f5
3
- size 778
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11/valid/feats_stats.npz DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2bcc2334a15eb36d64c9c834c251b257c11f2f3609d48e3b645a451272efd35f
3
- size 1402
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11/valid/sids_shape DELETED
@@ -1,5 +0,0 @@
1
- Oldambsters-0001-AigenOardegheden 1
2
- Oldambsters-0215-AigenOardegheden 1
3
- Oldambsters-0054-AigenOardegheden 1
4
- Oldambsters-0106-AigenOardegheden 1
5
- Oldambsters-0160-AigenOardegheden 1
 
 
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11/valid/speech_shape DELETED
@@ -1,5 +0,0 @@
1
- Oldambsters-0001-AigenOardegheden 115718
2
- Oldambsters-0215-AigenOardegheden 156715
3
- Oldambsters-0054-AigenOardegheden 129824
4
- Oldambsters-0106-AigenOardegheden 70560
5
- Oldambsters-0160-AigenOardegheden 50803
 
 
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11/valid/stats_keys DELETED
@@ -1,2 +0,0 @@
1
- feats
2
- feats_lengths
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.11/valid/text_shape DELETED
@@ -1,5 +0,0 @@
1
- Oldambsters-0001-AigenOardegheden 89
2
- Oldambsters-0215-AigenOardegheden 110
3
- Oldambsters-0054-AigenOardegheden 7
4
- Oldambsters-0106-AigenOardegheden 44
5
- Oldambsters-0160-AigenOardegheden 34
 
 
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.12.log DELETED
@@ -1,1152 +0,0 @@
1
- # python3 -m espnet2.bin.gan_tts_train --collect_stats true --write_collected_feats false --use_preprocessor true --token_type char --token_list dump/token_list/char/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/train_nodev/text,text,text --train_data_path_and_name_and_type dump/raw/train_nodev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/train_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/train_dev/wav.scp,speech,sound --train_shape_file exp-vits-lr-3e-4/tts_stats_raw_char/logdir/train.12.scp --valid_shape_file exp-vits-lr-3e-4/tts_stats_raw_char/logdir/valid.12.scp --output_dir exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.12 --config conf/train_vits.yaml --feats_extract fbank --feats_extract_conf n_fft=1024 --feats_extract_conf hop_length=256 --feats_extract_conf win_length=null --feats_extract_conf fs=22050 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=22050 --pitch_extract_conf n_fft=1024 --pitch_extract_conf hop_length=256 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=22050 --energy_extract_conf n_fft=1024 --energy_extract_conf hop_length=256 --energy_extract_conf win_length=null --train_data_path_and_name_and_type dump/raw/train_nodev/utt2sid,sids,text_int --valid_data_path_and_name_and_type dump/raw/train_dev/utt2sid,sids,text_int --use_wandb true --wandb_project GROTTS --wandb_name VITS_lr_3.0e-4 --init_param downloads/espnet/kan-bayashi_ljspeech_vits/exp/tts_train_vits_raw_phn_tacotron_g2p_en_no_space/train.total_count.ave_10best.pth:tts:tts:tts.generator.text_encoder,tts.generator.posterior_encoder.input_conv --batch_size 40 --batch_bins 10000000
2
- # Started at Fri Dec 1 15:58:34 UTC 2023
3
- #
4
- /data2/p280965/tts/espnet/tools/venv/bin/python3 /data2/p280965/tts/espnet/espnet2/bin/gan_tts_train.py --collect_stats true --write_collected_feats false --use_preprocessor true --token_type char --token_list dump/token_list/char/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/train_nodev/text,text,text --train_data_path_and_name_and_type dump/raw/train_nodev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/train_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/train_dev/wav.scp,speech,sound --train_shape_file exp-vits-lr-3e-4/tts_stats_raw_char/logdir/train.12.scp --valid_shape_file exp-vits-lr-3e-4/tts_stats_raw_char/logdir/valid.12.scp --output_dir exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.12 --config conf/train_vits.yaml --feats_extract fbank --feats_extract_conf n_fft=1024 --feats_extract_conf hop_length=256 --feats_extract_conf win_length=null --feats_extract_conf fs=22050 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=22050 --pitch_extract_conf n_fft=1024 --pitch_extract_conf hop_length=256 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=22050 --energy_extract_conf n_fft=1024 --energy_extract_conf hop_length=256 --energy_extract_conf win_length=null --train_data_path_and_name_and_type dump/raw/train_nodev/utt2sid,sids,text_int --valid_data_path_and_name_and_type dump/raw/train_dev/utt2sid,sids,text_int --use_wandb true --wandb_project GROTTS --wandb_name VITS_lr_3.0e-4 --init_param downloads/espnet/kan-bayashi_ljspeech_vits/exp/tts_train_vits_raw_phn_tacotron_g2p_en_no_space/train.total_count.ave_10best.pth:tts:tts:tts.generator.text_encoder,tts.generator.posterior_encoder.input_conv --batch_size 40 --batch_bins 10000000
5
- [wieling-3-a100] 2023-12-01 15:58:40,886 (gan_tts:293) INFO: Vocabulary size: 46
6
- [wieling-3-a100] 2023-12-01 15:58:41,003 (encoder:174) INFO: encoder self-attention layer type = relative self-attention
7
- /data2/p280965/tts/espnet/tools/venv/lib/python3.9/site-packages/torch/nn/utils/weight_norm.py:30: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.
8
- warnings.warn("torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.")
9
- /data2/p280965/tts/espnet/espnet2/gan_tts/vits/monotonic_align/__init__.py:19: UserWarning: Cython version is not available. Fallback to 'EXPERIMETAL' numba version. If you want to use the cython version, please build it as follows: `cd espnet2/gan_tts/vits/monotonic_align; python setup.py build_ext --inplace`
10
- warnings.warn(
11
- [wieling-3-a100] 2023-12-01 15:58:42,381 (abs_task:1268) INFO: pytorch.version=2.1.0+cu121, cuda.available=True, cudnn.version=8902, cudnn.benchmark=False, cudnn.deterministic=False
12
- [wieling-3-a100] 2023-12-01 15:58:42,453 (abs_task:1269) INFO: Model structure:
13
- ESPnetGANTTSModel(
14
- (feats_extract): LogMelFbank(
15
- (stft): Stft(n_fft=1024, win_length=1024, hop_length=256, center=True, normalized=False, onesided=True)
16
- (logmel): LogMel(sr=22050, n_fft=1024, n_mels=80, fmin=80, fmax=7600, htk=False)
17
- )
18
- (tts): VITS(
19
- (generator): VITSGenerator(
20
- (text_encoder): TextEncoder(
21
- (emb): Embedding(46, 192)
22
- (encoder): Encoder(
23
- (embed): Sequential(
24
- (0): RelPositionalEncoding(
25
- (dropout): Dropout(p=0.0, inplace=False)
26
- )
27
- )
28
- (encoders): MultiSequential(
29
- (0): EncoderLayer(
30
- (self_attn): RelPositionMultiHeadedAttention(
31
- (linear_q): Linear(in_features=192, out_features=192, bias=True)
32
- (linear_k): Linear(in_features=192, out_features=192, bias=True)
33
- (linear_v): Linear(in_features=192, out_features=192, bias=True)
34
- (linear_out): Linear(in_features=192, out_features=192, bias=True)
35
- (dropout): Dropout(p=0.1, inplace=False)
36
- (linear_pos): Linear(in_features=192, out_features=192, bias=False)
37
- )
38
- (feed_forward): MultiLayeredConv1d(
39
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
40
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
41
- (dropout): Dropout(p=0.1, inplace=False)
42
- )
43
- (feed_forward_macaron): MultiLayeredConv1d(
44
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
45
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
46
- (dropout): Dropout(p=0.1, inplace=False)
47
- )
48
- (norm_ff): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
49
- (norm_mha): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
50
- (norm_ff_macaron): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
51
- (dropout): Dropout(p=0.1, inplace=False)
52
- )
53
- (1): EncoderLayer(
54
- (self_attn): RelPositionMultiHeadedAttention(
55
- (linear_q): Linear(in_features=192, out_features=192, bias=True)
56
- (linear_k): Linear(in_features=192, out_features=192, bias=True)
57
- (linear_v): Linear(in_features=192, out_features=192, bias=True)
58
- (linear_out): Linear(in_features=192, out_features=192, bias=True)
59
- (dropout): Dropout(p=0.1, inplace=False)
60
- (linear_pos): Linear(in_features=192, out_features=192, bias=False)
61
- )
62
- (feed_forward): MultiLayeredConv1d(
63
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
64
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
65
- (dropout): Dropout(p=0.1, inplace=False)
66
- )
67
- (feed_forward_macaron): MultiLayeredConv1d(
68
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
69
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
70
- (dropout): Dropout(p=0.1, inplace=False)
71
- )
72
- (norm_ff): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
73
- (norm_mha): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
74
- (norm_ff_macaron): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
75
- (dropout): Dropout(p=0.1, inplace=False)
76
- )
77
- (2): EncoderLayer(
78
- (self_attn): RelPositionMultiHeadedAttention(
79
- (linear_q): Linear(in_features=192, out_features=192, bias=True)
80
- (linear_k): Linear(in_features=192, out_features=192, bias=True)
81
- (linear_v): Linear(in_features=192, out_features=192, bias=True)
82
- (linear_out): Linear(in_features=192, out_features=192, bias=True)
83
- (dropout): Dropout(p=0.1, inplace=False)
84
- (linear_pos): Linear(in_features=192, out_features=192, bias=False)
85
- )
86
- (feed_forward): MultiLayeredConv1d(
87
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
88
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
89
- (dropout): Dropout(p=0.1, inplace=False)
90
- )
91
- (feed_forward_macaron): MultiLayeredConv1d(
92
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
93
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
94
- (dropout): Dropout(p=0.1, inplace=False)
95
- )
96
- (norm_ff): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
97
- (norm_mha): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
98
- (norm_ff_macaron): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
99
- (dropout): Dropout(p=0.1, inplace=False)
100
- )
101
- (3): EncoderLayer(
102
- (self_attn): RelPositionMultiHeadedAttention(
103
- (linear_q): Linear(in_features=192, out_features=192, bias=True)
104
- (linear_k): Linear(in_features=192, out_features=192, bias=True)
105
- (linear_v): Linear(in_features=192, out_features=192, bias=True)
106
- (linear_out): Linear(in_features=192, out_features=192, bias=True)
107
- (dropout): Dropout(p=0.1, inplace=False)
108
- (linear_pos): Linear(in_features=192, out_features=192, bias=False)
109
- )
110
- (feed_forward): MultiLayeredConv1d(
111
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
112
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
113
- (dropout): Dropout(p=0.1, inplace=False)
114
- )
115
- (feed_forward_macaron): MultiLayeredConv1d(
116
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
117
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
118
- (dropout): Dropout(p=0.1, inplace=False)
119
- )
120
- (norm_ff): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
121
- (norm_mha): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
122
- (norm_ff_macaron): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
123
- (dropout): Dropout(p=0.1, inplace=False)
124
- )
125
- (4): EncoderLayer(
126
- (self_attn): RelPositionMultiHeadedAttention(
127
- (linear_q): Linear(in_features=192, out_features=192, bias=True)
128
- (linear_k): Linear(in_features=192, out_features=192, bias=True)
129
- (linear_v): Linear(in_features=192, out_features=192, bias=True)
130
- (linear_out): Linear(in_features=192, out_features=192, bias=True)
131
- (dropout): Dropout(p=0.1, inplace=False)
132
- (linear_pos): Linear(in_features=192, out_features=192, bias=False)
133
- )
134
- (feed_forward): MultiLayeredConv1d(
135
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
136
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
137
- (dropout): Dropout(p=0.1, inplace=False)
138
- )
139
- (feed_forward_macaron): MultiLayeredConv1d(
140
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
141
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
142
- (dropout): Dropout(p=0.1, inplace=False)
143
- )
144
- (norm_ff): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
145
- (norm_mha): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
146
- (norm_ff_macaron): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
147
- (dropout): Dropout(p=0.1, inplace=False)
148
- )
149
- (5): EncoderLayer(
150
- (self_attn): RelPositionMultiHeadedAttention(
151
- (linear_q): Linear(in_features=192, out_features=192, bias=True)
152
- (linear_k): Linear(in_features=192, out_features=192, bias=True)
153
- (linear_v): Linear(in_features=192, out_features=192, bias=True)
154
- (linear_out): Linear(in_features=192, out_features=192, bias=True)
155
- (dropout): Dropout(p=0.1, inplace=False)
156
- (linear_pos): Linear(in_features=192, out_features=192, bias=False)
157
- )
158
- (feed_forward): MultiLayeredConv1d(
159
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
160
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
161
- (dropout): Dropout(p=0.1, inplace=False)
162
- )
163
- (feed_forward_macaron): MultiLayeredConv1d(
164
- (w_1): Conv1d(192, 768, kernel_size=(3,), stride=(1,), padding=(1,))
165
- (w_2): Conv1d(768, 192, kernel_size=(3,), stride=(1,), padding=(1,))
166
- (dropout): Dropout(p=0.1, inplace=False)
167
- )
168
- (norm_ff): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
169
- (norm_mha): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
170
- (norm_ff_macaron): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
171
- (dropout): Dropout(p=0.1, inplace=False)
172
- )
173
- )
174
- (after_norm): LayerNorm((192,), eps=1e-12, elementwise_affine=True)
175
- )
176
- (proj): Conv1d(192, 384, kernel_size=(1,), stride=(1,))
177
- )
178
- (decoder): HiFiGANGenerator(
179
- (input_conv): Conv1d(192, 512, kernel_size=(7,), stride=(1,), padding=(3,))
180
- (upsamples): ModuleList(
181
- (0): Sequential(
182
- (0): LeakyReLU(negative_slope=0.1)
183
- (1): ConvTranspose1d(512, 256, kernel_size=(16,), stride=(8,), padding=(4,))
184
- )
185
- (1): Sequential(
186
- (0): LeakyReLU(negative_slope=0.1)
187
- (1): ConvTranspose1d(256, 128, kernel_size=(16,), stride=(8,), padding=(4,))
188
- )
189
- (2): Sequential(
190
- (0): LeakyReLU(negative_slope=0.1)
191
- (1): ConvTranspose1d(128, 64, kernel_size=(4,), stride=(2,), padding=(1,))
192
- )
193
- (3): Sequential(
194
- (0): LeakyReLU(negative_slope=0.1)
195
- (1): ConvTranspose1d(64, 32, kernel_size=(4,), stride=(2,), padding=(1,))
196
- )
197
- )
198
- (blocks): ModuleList(
199
- (0): ResidualBlock(
200
- (convs1): ModuleList(
201
- (0): Sequential(
202
- (0): LeakyReLU(negative_slope=0.1)
203
- (1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
204
- )
205
- (1): Sequential(
206
- (0): LeakyReLU(negative_slope=0.1)
207
- (1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,))
208
- )
209
- (2): Sequential(
210
- (0): LeakyReLU(negative_slope=0.1)
211
- (1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,))
212
- )
213
- )
214
- (convs2): ModuleList(
215
- (0-2): 3 x Sequential(
216
- (0): LeakyReLU(negative_slope=0.1)
217
- (1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
218
- )
219
- )
220
- )
221
- (1): ResidualBlock(
222
- (convs1): ModuleList(
223
- (0): Sequential(
224
- (0): LeakyReLU(negative_slope=0.1)
225
- (1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(3,))
226
- )
227
- (1): Sequential(
228
- (0): LeakyReLU(negative_slope=0.1)
229
- (1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,))
230
- )
231
- (2): Sequential(
232
- (0): LeakyReLU(negative_slope=0.1)
233
- (1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,))
234
- )
235
- )
236
- (convs2): ModuleList(
237
- (0-2): 3 x Sequential(
238
- (0): LeakyReLU(negative_slope=0.1)
239
- (1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(3,))
240
- )
241
- )
242
- )
243
- (2): ResidualBlock(
244
- (convs1): ModuleList(
245
- (0): Sequential(
246
- (0): LeakyReLU(negative_slope=0.1)
247
- (1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(5,))
248
- )
249
- (1): Sequential(
250
- (0): LeakyReLU(negative_slope=0.1)
251
- (1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,))
252
- )
253
- (2): Sequential(
254
- (0): LeakyReLU(negative_slope=0.1)
255
- (1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,))
256
- )
257
- )
258
- (convs2): ModuleList(
259
- (0-2): 3 x Sequential(
260
- (0): LeakyReLU(negative_slope=0.1)
261
- (1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(5,))
262
- )
263
- )
264
- )
265
- (3): ResidualBlock(
266
- (convs1): ModuleList(
267
- (0): Sequential(
268
- (0): LeakyReLU(negative_slope=0.1)
269
- (1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
270
- )
271
- (1): Sequential(
272
- (0): LeakyReLU(negative_slope=0.1)
273
- (1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,))
274
- )
275
- (2): Sequential(
276
- (0): LeakyReLU(negative_slope=0.1)
277
- (1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,))
278
- )
279
- )
280
- (convs2): ModuleList(
281
- (0-2): 3 x Sequential(
282
- (0): LeakyReLU(negative_slope=0.1)
283
- (1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
284
- )
285
- )
286
- )
287
- (4): ResidualBlock(
288
- (convs1): ModuleList(
289
- (0): Sequential(
290
- (0): LeakyReLU(negative_slope=0.1)
291
- (1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(3,))
292
- )
293
- (1): Sequential(
294
- (0): LeakyReLU(negative_slope=0.1)
295
- (1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,))
296
- )
297
- (2): Sequential(
298
- (0): LeakyReLU(negative_slope=0.1)
299
- (1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,))
300
- )
301
- )
302
- (convs2): ModuleList(
303
- (0-2): 3 x Sequential(
304
- (0): LeakyReLU(negative_slope=0.1)
305
- (1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(3,))
306
- )
307
- )
308
- )
309
- (5): ResidualBlock(
310
- (convs1): ModuleList(
311
- (0): Sequential(
312
- (0): LeakyReLU(negative_slope=0.1)
313
- (1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(5,))
314
- )
315
- (1): Sequential(
316
- (0): LeakyReLU(negative_slope=0.1)
317
- (1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,))
318
- )
319
- (2): Sequential(
320
- (0): LeakyReLU(negative_slope=0.1)
321
- (1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,))
322
- )
323
- )
324
- (convs2): ModuleList(
325
- (0-2): 3 x Sequential(
326
- (0): LeakyReLU(negative_slope=0.1)
327
- (1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(5,))
328
- )
329
- )
330
- )
331
- (6): ResidualBlock(
332
- (convs1): ModuleList(
333
- (0): Sequential(
334
- (0): LeakyReLU(negative_slope=0.1)
335
- (1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
336
- )
337
- (1): Sequential(
338
- (0): LeakyReLU(negative_slope=0.1)
339
- (1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,))
340
- )
341
- (2): Sequential(
342
- (0): LeakyReLU(negative_slope=0.1)
343
- (1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,))
344
- )
345
- )
346
- (convs2): ModuleList(
347
- (0-2): 3 x Sequential(
348
- (0): LeakyReLU(negative_slope=0.1)
349
- (1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
350
- )
351
- )
352
- )
353
- (7): ResidualBlock(
354
- (convs1): ModuleList(
355
- (0): Sequential(
356
- (0): LeakyReLU(negative_slope=0.1)
357
- (1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(3,))
358
- )
359
- (1): Sequential(
360
- (0): LeakyReLU(negative_slope=0.1)
361
- (1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,))
362
- )
363
- (2): Sequential(
364
- (0): LeakyReLU(negative_slope=0.1)
365
- (1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,))
366
- )
367
- )
368
- (convs2): ModuleList(
369
- (0-2): 3 x Sequential(
370
- (0): LeakyReLU(negative_slope=0.1)
371
- (1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(3,))
372
- )
373
- )
374
- )
375
- (8): ResidualBlock(
376
- (convs1): ModuleList(
377
- (0): Sequential(
378
- (0): LeakyReLU(negative_slope=0.1)
379
- (1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(5,))
380
- )
381
- (1): Sequential(
382
- (0): LeakyReLU(negative_slope=0.1)
383
- (1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,))
384
- )
385
- (2): Sequential(
386
- (0): LeakyReLU(negative_slope=0.1)
387
- (1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,))
388
- )
389
- )
390
- (convs2): ModuleList(
391
- (0-2): 3 x Sequential(
392
- (0): LeakyReLU(negative_slope=0.1)
393
- (1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(5,))
394
- )
395
- )
396
- )
397
- (9): ResidualBlock(
398
- (convs1): ModuleList(
399
- (0): Sequential(
400
- (0): LeakyReLU(negative_slope=0.1)
401
- (1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
402
- )
403
- (1): Sequential(
404
- (0): LeakyReLU(negative_slope=0.1)
405
- (1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,))
406
- )
407
- (2): Sequential(
408
- (0): LeakyReLU(negative_slope=0.1)
409
- (1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,))
410
- )
411
- )
412
- (convs2): ModuleList(
413
- (0-2): 3 x Sequential(
414
- (0): LeakyReLU(negative_slope=0.1)
415
- (1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
416
- )
417
- )
418
- )
419
- (10): ResidualBlock(
420
- (convs1): ModuleList(
421
- (0): Sequential(
422
- (0): LeakyReLU(negative_slope=0.1)
423
- (1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(3,))
424
- )
425
- (1): Sequential(
426
- (0): LeakyReLU(negative_slope=0.1)
427
- (1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,))
428
- )
429
- (2): Sequential(
430
- (0): LeakyReLU(negative_slope=0.1)
431
- (1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,))
432
- )
433
- )
434
- (convs2): ModuleList(
435
- (0-2): 3 x Sequential(
436
- (0): LeakyReLU(negative_slope=0.1)
437
- (1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(3,))
438
- )
439
- )
440
- )
441
- (11): ResidualBlock(
442
- (convs1): ModuleList(
443
- (0): Sequential(
444
- (0): LeakyReLU(negative_slope=0.1)
445
- (1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(5,))
446
- )
447
- (1): Sequential(
448
- (0): LeakyReLU(negative_slope=0.1)
449
- (1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,))
450
- )
451
- (2): Sequential(
452
- (0): LeakyReLU(negative_slope=0.1)
453
- (1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,))
454
- )
455
- )
456
- (convs2): ModuleList(
457
- (0-2): 3 x Sequential(
458
- (0): LeakyReLU(negative_slope=0.1)
459
- (1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(5,))
460
- )
461
- )
462
- )
463
- )
464
- (output_conv): Sequential(
465
- (0): LeakyReLU(negative_slope=0.01)
466
- (1): Conv1d(32, 1, kernel_size=(7,), stride=(1,), padding=(3,))
467
- (2): Tanh()
468
- )
469
- (global_conv): Conv1d(256, 512, kernel_size=(1,), stride=(1,))
470
- )
471
- (posterior_encoder): PosteriorEncoder(
472
- (input_conv): Conv1d(80, 192, kernel_size=(1,), stride=(1,))
473
- (encoder): WaveNet(
474
- (conv_layers): ModuleList(
475
- (0-15): 16 x ResidualBlock(
476
- (conv): Conv1d(192, 384, kernel_size=(5,), stride=(1,), padding=(2,))
477
- (conv1x1_glo): Conv1d1x1(256, 384, kernel_size=(1,), stride=(1,), bias=False)
478
- (conv1x1_out): Conv1d1x1(192, 384, kernel_size=(1,), stride=(1,))
479
- )
480
- )
481
- )
482
- (proj): Conv1d(192, 384, kernel_size=(1,), stride=(1,))
483
- )
484
- (flow): ResidualAffineCouplingBlock(
485
- (flows): ModuleList(
486
- (0): ResidualAffineCouplingLayer(
487
- (input_conv): Conv1d(96, 192, kernel_size=(1,), stride=(1,))
488
- (encoder): WaveNet(
489
- (conv_layers): ModuleList(
490
- (0-3): 4 x ResidualBlock(
491
- (conv): Conv1d(192, 384, kernel_size=(5,), stride=(1,), padding=(2,))
492
- (conv1x1_glo): Conv1d1x1(256, 384, kernel_size=(1,), stride=(1,), bias=False)
493
- (conv1x1_out): Conv1d1x1(192, 384, kernel_size=(1,), stride=(1,))
494
- )
495
- )
496
- )
497
- (proj): Conv1d(192, 96, kernel_size=(1,), stride=(1,))
498
- )
499
- (1): FlipFlow()
500
- (2): ResidualAffineCouplingLayer(
501
- (input_conv): Conv1d(96, 192, kernel_size=(1,), stride=(1,))
502
- (encoder): WaveNet(
503
- (conv_layers): ModuleList(
504
- (0-3): 4 x ResidualBlock(
505
- (conv): Conv1d(192, 384, kernel_size=(5,), stride=(1,), padding=(2,))
506
- (conv1x1_glo): Conv1d1x1(256, 384, kernel_size=(1,), stride=(1,), bias=False)
507
- (conv1x1_out): Conv1d1x1(192, 384, kernel_size=(1,), stride=(1,))
508
- )
509
- )
510
- )
511
- (proj): Conv1d(192, 96, kernel_size=(1,), stride=(1,))
512
- )
513
- (3): FlipFlow()
514
- (4): ResidualAffineCouplingLayer(
515
- (input_conv): Conv1d(96, 192, kernel_size=(1,), stride=(1,))
516
- (encoder): WaveNet(
517
- (conv_layers): ModuleList(
518
- (0-3): 4 x ResidualBlock(
519
- (conv): Conv1d(192, 384, kernel_size=(5,), stride=(1,), padding=(2,))
520
- (conv1x1_glo): Conv1d1x1(256, 384, kernel_size=(1,), stride=(1,), bias=False)
521
- (conv1x1_out): Conv1d1x1(192, 384, kernel_size=(1,), stride=(1,))
522
- )
523
- )
524
- )
525
- (proj): Conv1d(192, 96, kernel_size=(1,), stride=(1,))
526
- )
527
- (5): FlipFlow()
528
- (6): ResidualAffineCouplingLayer(
529
- (input_conv): Conv1d(96, 192, kernel_size=(1,), stride=(1,))
530
- (encoder): WaveNet(
531
- (conv_layers): ModuleList(
532
- (0-3): 4 x ResidualBlock(
533
- (conv): Conv1d(192, 384, kernel_size=(5,), stride=(1,), padding=(2,))
534
- (conv1x1_glo): Conv1d1x1(256, 384, kernel_size=(1,), stride=(1,), bias=False)
535
- (conv1x1_out): Conv1d1x1(192, 384, kernel_size=(1,), stride=(1,))
536
- )
537
- )
538
- )
539
- (proj): Conv1d(192, 96, kernel_size=(1,), stride=(1,))
540
- )
541
- (7): FlipFlow()
542
- )
543
- )
544
- (duration_predictor): StochasticDurationPredictor(
545
- (pre): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
546
- (dds): DilatedDepthSeparableConv(
547
- (convs): ModuleList(
548
- (0): Sequential(
549
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
550
- (1): Transpose()
551
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
552
- (3): Transpose()
553
- (4): GELU(approximate='none')
554
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
555
- (6): Transpose()
556
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
557
- (8): Transpose()
558
- (9): GELU(approximate='none')
559
- (10): Dropout(p=0.5, inplace=False)
560
- )
561
- (1): Sequential(
562
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
563
- (1): Transpose()
564
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
565
- (3): Transpose()
566
- (4): GELU(approximate='none')
567
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
568
- (6): Transpose()
569
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
570
- (8): Transpose()
571
- (9): GELU(approximate='none')
572
- (10): Dropout(p=0.5, inplace=False)
573
- )
574
- (2): Sequential(
575
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
576
- (1): Transpose()
577
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
578
- (3): Transpose()
579
- (4): GELU(approximate='none')
580
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
581
- (6): Transpose()
582
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
583
- (8): Transpose()
584
- (9): GELU(approximate='none')
585
- (10): Dropout(p=0.5, inplace=False)
586
- )
587
- )
588
- )
589
- (proj): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
590
- (log_flow): LogFlow()
591
- (flows): ModuleList(
592
- (0): ElementwiseAffineFlow()
593
- (1): ConvFlow(
594
- (input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
595
- (dds_conv): DilatedDepthSeparableConv(
596
- (convs): ModuleList(
597
- (0): Sequential(
598
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
599
- (1): Transpose()
600
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
601
- (3): Transpose()
602
- (4): GELU(approximate='none')
603
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
604
- (6): Transpose()
605
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
606
- (8): Transpose()
607
- (9): GELU(approximate='none')
608
- (10): Dropout(p=0.0, inplace=False)
609
- )
610
- (1): Sequential(
611
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
612
- (1): Transpose()
613
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
614
- (3): Transpose()
615
- (4): GELU(approximate='none')
616
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
617
- (6): Transpose()
618
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
619
- (8): Transpose()
620
- (9): GELU(approximate='none')
621
- (10): Dropout(p=0.0, inplace=False)
622
- )
623
- (2): Sequential(
624
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
625
- (1): Transpose()
626
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
627
- (3): Transpose()
628
- (4): GELU(approximate='none')
629
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
630
- (6): Transpose()
631
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
632
- (8): Transpose()
633
- (9): GELU(approximate='none')
634
- (10): Dropout(p=0.0, inplace=False)
635
- )
636
- )
637
- )
638
- (proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,))
639
- )
640
- (2): FlipFlow()
641
- (3): ConvFlow(
642
- (input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
643
- (dds_conv): DilatedDepthSeparableConv(
644
- (convs): ModuleList(
645
- (0): Sequential(
646
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
647
- (1): Transpose()
648
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
649
- (3): Transpose()
650
- (4): GELU(approximate='none')
651
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
652
- (6): Transpose()
653
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
654
- (8): Transpose()
655
- (9): GELU(approximate='none')
656
- (10): Dropout(p=0.0, inplace=False)
657
- )
658
- (1): Sequential(
659
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
660
- (1): Transpose()
661
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
662
- (3): Transpose()
663
- (4): GELU(approximate='none')
664
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
665
- (6): Transpose()
666
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
667
- (8): Transpose()
668
- (9): GELU(approximate='none')
669
- (10): Dropout(p=0.0, inplace=False)
670
- )
671
- (2): Sequential(
672
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
673
- (1): Transpose()
674
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
675
- (3): Transpose()
676
- (4): GELU(approximate='none')
677
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
678
- (6): Transpose()
679
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
680
- (8): Transpose()
681
- (9): GELU(approximate='none')
682
- (10): Dropout(p=0.0, inplace=False)
683
- )
684
- )
685
- )
686
- (proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,))
687
- )
688
- (4): FlipFlow()
689
- (5): ConvFlow(
690
- (input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
691
- (dds_conv): DilatedDepthSeparableConv(
692
- (convs): ModuleList(
693
- (0): Sequential(
694
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
695
- (1): Transpose()
696
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
697
- (3): Transpose()
698
- (4): GELU(approximate='none')
699
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
700
- (6): Transpose()
701
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
702
- (8): Transpose()
703
- (9): GELU(approximate='none')
704
- (10): Dropout(p=0.0, inplace=False)
705
- )
706
- (1): Sequential(
707
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
708
- (1): Transpose()
709
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
710
- (3): Transpose()
711
- (4): GELU(approximate='none')
712
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
713
- (6): Transpose()
714
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
715
- (8): Transpose()
716
- (9): GELU(approximate='none')
717
- (10): Dropout(p=0.0, inplace=False)
718
- )
719
- (2): Sequential(
720
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
721
- (1): Transpose()
722
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
723
- (3): Transpose()
724
- (4): GELU(approximate='none')
725
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
726
- (6): Transpose()
727
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
728
- (8): Transpose()
729
- (9): GELU(approximate='none')
730
- (10): Dropout(p=0.0, inplace=False)
731
- )
732
- )
733
- )
734
- (proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,))
735
- )
736
- (6): FlipFlow()
737
- (7): ConvFlow(
738
- (input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
739
- (dds_conv): DilatedDepthSeparableConv(
740
- (convs): ModuleList(
741
- (0): Sequential(
742
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
743
- (1): Transpose()
744
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
745
- (3): Transpose()
746
- (4): GELU(approximate='none')
747
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
748
- (6): Transpose()
749
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
750
- (8): Transpose()
751
- (9): GELU(approximate='none')
752
- (10): Dropout(p=0.0, inplace=False)
753
- )
754
- (1): Sequential(
755
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
756
- (1): Transpose()
757
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
758
- (3): Transpose()
759
- (4): GELU(approximate='none')
760
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
761
- (6): Transpose()
762
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
763
- (8): Transpose()
764
- (9): GELU(approximate='none')
765
- (10): Dropout(p=0.0, inplace=False)
766
- )
767
- (2): Sequential(
768
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
769
- (1): Transpose()
770
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
771
- (3): Transpose()
772
- (4): GELU(approximate='none')
773
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
774
- (6): Transpose()
775
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
776
- (8): Transpose()
777
- (9): GELU(approximate='none')
778
- (10): Dropout(p=0.0, inplace=False)
779
- )
780
- )
781
- )
782
- (proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,))
783
- )
784
- (8): FlipFlow()
785
- )
786
- (post_pre): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
787
- (post_dds): DilatedDepthSeparableConv(
788
- (convs): ModuleList(
789
- (0): Sequential(
790
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
791
- (1): Transpose()
792
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
793
- (3): Transpose()
794
- (4): GELU(approximate='none')
795
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
796
- (6): Transpose()
797
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
798
- (8): Transpose()
799
- (9): GELU(approximate='none')
800
- (10): Dropout(p=0.5, inplace=False)
801
- )
802
- (1): Sequential(
803
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
804
- (1): Transpose()
805
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
806
- (3): Transpose()
807
- (4): GELU(approximate='none')
808
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
809
- (6): Transpose()
810
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
811
- (8): Transpose()
812
- (9): GELU(approximate='none')
813
- (10): Dropout(p=0.5, inplace=False)
814
- )
815
- (2): Sequential(
816
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
817
- (1): Transpose()
818
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
819
- (3): Transpose()
820
- (4): GELU(approximate='none')
821
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
822
- (6): Transpose()
823
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
824
- (8): Transpose()
825
- (9): GELU(approximate='none')
826
- (10): Dropout(p=0.5, inplace=False)
827
- )
828
- )
829
- )
830
- (post_proj): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
831
- (post_flows): ModuleList(
832
- (0): ElementwiseAffineFlow()
833
- (1): ConvFlow(
834
- (input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
835
- (dds_conv): DilatedDepthSeparableConv(
836
- (convs): ModuleList(
837
- (0): Sequential(
838
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
839
- (1): Transpose()
840
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
841
- (3): Transpose()
842
- (4): GELU(approximate='none')
843
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
844
- (6): Transpose()
845
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
846
- (8): Transpose()
847
- (9): GELU(approximate='none')
848
- (10): Dropout(p=0.0, inplace=False)
849
- )
850
- (1): Sequential(
851
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
852
- (1): Transpose()
853
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
854
- (3): Transpose()
855
- (4): GELU(approximate='none')
856
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
857
- (6): Transpose()
858
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
859
- (8): Transpose()
860
- (9): GELU(approximate='none')
861
- (10): Dropout(p=0.0, inplace=False)
862
- )
863
- (2): Sequential(
864
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
865
- (1): Transpose()
866
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
867
- (3): Transpose()
868
- (4): GELU(approximate='none')
869
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
870
- (6): Transpose()
871
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
872
- (8): Transpose()
873
- (9): GELU(approximate='none')
874
- (10): Dropout(p=0.0, inplace=False)
875
- )
876
- )
877
- )
878
- (proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,))
879
- )
880
- (2): FlipFlow()
881
- (3): ConvFlow(
882
- (input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
883
- (dds_conv): DilatedDepthSeparableConv(
884
- (convs): ModuleList(
885
- (0): Sequential(
886
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
887
- (1): Transpose()
888
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
889
- (3): Transpose()
890
- (4): GELU(approximate='none')
891
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
892
- (6): Transpose()
893
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
894
- (8): Transpose()
895
- (9): GELU(approximate='none')
896
- (10): Dropout(p=0.0, inplace=False)
897
- )
898
- (1): Sequential(
899
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
900
- (1): Transpose()
901
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
902
- (3): Transpose()
903
- (4): GELU(approximate='none')
904
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
905
- (6): Transpose()
906
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
907
- (8): Transpose()
908
- (9): GELU(approximate='none')
909
- (10): Dropout(p=0.0, inplace=False)
910
- )
911
- (2): Sequential(
912
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
913
- (1): Transpose()
914
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
915
- (3): Transpose()
916
- (4): GELU(approximate='none')
917
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
918
- (6): Transpose()
919
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
920
- (8): Transpose()
921
- (9): GELU(approximate='none')
922
- (10): Dropout(p=0.0, inplace=False)
923
- )
924
- )
925
- )
926
- (proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,))
927
- )
928
- (4): FlipFlow()
929
- (5): ConvFlow(
930
- (input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
931
- (dds_conv): DilatedDepthSeparableConv(
932
- (convs): ModuleList(
933
- (0): Sequential(
934
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
935
- (1): Transpose()
936
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
937
- (3): Transpose()
938
- (4): GELU(approximate='none')
939
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
940
- (6): Transpose()
941
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
942
- (8): Transpose()
943
- (9): GELU(approximate='none')
944
- (10): Dropout(p=0.0, inplace=False)
945
- )
946
- (1): Sequential(
947
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
948
- (1): Transpose()
949
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
950
- (3): Transpose()
951
- (4): GELU(approximate='none')
952
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
953
- (6): Transpose()
954
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
955
- (8): Transpose()
956
- (9): GELU(approximate='none')
957
- (10): Dropout(p=0.0, inplace=False)
958
- )
959
- (2): Sequential(
960
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
961
- (1): Transpose()
962
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
963
- (3): Transpose()
964
- (4): GELU(approximate='none')
965
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
966
- (6): Transpose()
967
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
968
- (8): Transpose()
969
- (9): GELU(approximate='none')
970
- (10): Dropout(p=0.0, inplace=False)
971
- )
972
- )
973
- )
974
- (proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,))
975
- )
976
- (6): FlipFlow()
977
- (7): ConvFlow(
978
- (input_conv): Conv1d(1, 192, kernel_size=(1,), stride=(1,))
979
- (dds_conv): DilatedDepthSeparableConv(
980
- (convs): ModuleList(
981
- (0): Sequential(
982
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(1,), groups=192)
983
- (1): Transpose()
984
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
985
- (3): Transpose()
986
- (4): GELU(approximate='none')
987
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
988
- (6): Transpose()
989
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
990
- (8): Transpose()
991
- (9): GELU(approximate='none')
992
- (10): Dropout(p=0.0, inplace=False)
993
- )
994
- (1): Sequential(
995
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,), groups=192)
996
- (1): Transpose()
997
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
998
- (3): Transpose()
999
- (4): GELU(approximate='none')
1000
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
1001
- (6): Transpose()
1002
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
1003
- (8): Transpose()
1004
- (9): GELU(approximate='none')
1005
- (10): Dropout(p=0.0, inplace=False)
1006
- )
1007
- (2): Sequential(
1008
- (0): Conv1d(192, 192, kernel_size=(3,), stride=(1,), padding=(9,), dilation=(9,), groups=192)
1009
- (1): Transpose()
1010
- (2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
1011
- (3): Transpose()
1012
- (4): GELU(approximate='none')
1013
- (5): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
1014
- (6): Transpose()
1015
- (7): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
1016
- (8): Transpose()
1017
- (9): GELU(approximate='none')
1018
- (10): Dropout(p=0.0, inplace=False)
1019
- )
1020
- )
1021
- )
1022
- (proj): Conv1d(192, 29, kernel_size=(1,), stride=(1,))
1023
- )
1024
- (8): FlipFlow()
1025
- )
1026
- (global_conv): Conv1d(256, 192, kernel_size=(1,), stride=(1,))
1027
- )
1028
- (global_emb): Embedding(4, 256)
1029
- )
1030
- (discriminator): HiFiGANMultiScaleMultiPeriodDiscriminator(
1031
- (msd): HiFiGANMultiScaleDiscriminator(
1032
- (discriminators): ModuleList(
1033
- (0): HiFiGANScaleDiscriminator(
1034
- (layers): ModuleList(
1035
- (0): Sequential(
1036
- (0): Conv1d(1, 128, kernel_size=(15,), stride=(1,), padding=(7,))
1037
- (1): LeakyReLU(negative_slope=0.1)
1038
- )
1039
- (1): Sequential(
1040
- (0): Conv1d(128, 128, kernel_size=(41,), stride=(2,), padding=(20,), groups=4)
1041
- (1): LeakyReLU(negative_slope=0.1)
1042
- )
1043
- (2): Sequential(
1044
- (0): Conv1d(128, 256, kernel_size=(41,), stride=(2,), padding=(20,), groups=16)
1045
- (1): LeakyReLU(negative_slope=0.1)
1046
- )
1047
- (3): Sequential(
1048
- (0): Conv1d(256, 512, kernel_size=(41,), stride=(4,), padding=(20,), groups=16)
1049
- (1): LeakyReLU(negative_slope=0.1)
1050
- )
1051
- (4): Sequential(
1052
- (0): Conv1d(512, 1024, kernel_size=(41,), stride=(4,), padding=(20,), groups=16)
1053
- (1): LeakyReLU(negative_slope=0.1)
1054
- )
1055
- (5): Sequential(
1056
- (0): Conv1d(1024, 1024, kernel_size=(41,), stride=(1,), padding=(20,), groups=16)
1057
- (1): LeakyReLU(negative_slope=0.1)
1058
- )
1059
- (6): Sequential(
1060
- (0): Conv1d(1024, 1024, kernel_size=(5,), stride=(1,), padding=(2,))
1061
- (1): LeakyReLU(negative_slope=0.1)
1062
- )
1063
- (7): Conv1d(1024, 1, kernel_size=(3,), stride=(1,), padding=(1,))
1064
- )
1065
- )
1066
- )
1067
- )
1068
- (mpd): HiFiGANMultiPeriodDiscriminator(
1069
- (discriminators): ModuleList(
1070
- (0-4): 5 x HiFiGANPeriodDiscriminator(
1071
- (convs): ModuleList(
1072
- (0): Sequential(
1073
- (0): Conv2d(1, 32, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
1074
- (1): LeakyReLU(negative_slope=0.1)
1075
- )
1076
- (1): Sequential(
1077
- (0): Conv2d(32, 128, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
1078
- (1): LeakyReLU(negative_slope=0.1)
1079
- )
1080
- (2): Sequential(
1081
- (0): Conv2d(128, 512, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
1082
- (1): LeakyReLU(negative_slope=0.1)
1083
- )
1084
- (3): Sequential(
1085
- (0): Conv2d(512, 1024, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
1086
- (1): LeakyReLU(negative_slope=0.1)
1087
- )
1088
- (4): Sequential(
1089
- (0): Conv2d(1024, 1024, kernel_size=(5, 1), stride=(1, 1), padding=(2, 0))
1090
- (1): LeakyReLU(negative_slope=0.1)
1091
- )
1092
- )
1093
- (output_conv): Conv2d(1024, 1, kernel_size=(2, 1), stride=(1, 1), padding=(1, 0))
1094
- )
1095
- )
1096
- )
1097
- )
1098
- (generator_adv_loss): GeneratorAdversarialLoss()
1099
- (discriminator_adv_loss): DiscriminatorAdversarialLoss()
1100
- (feat_match_loss): FeatureMatchLoss()
1101
- (mel_loss): MelSpectrogramLoss(
1102
- (wav_to_mel): LogMelFbank(
1103
- (stft): Stft(n_fft=1024, win_length=1024, hop_length=256, center=True, normalized=False, onesided=True)
1104
- (logmel): LogMel(sr=22050, n_fft=1024, n_mels=80, fmin=0, fmax=11025.0, htk=False)
1105
- )
1106
- )
1107
- (kl_loss): KLDivergenceLoss()
1108
- )
1109
- )
1110
-
1111
- Model summary:
1112
- Class Name: ESPnetGANTTSModel
1113
- Total Number of model parameters: 96.24 M
1114
- Number of trainable parameters: 96.24 M (100.0%)
1115
- Size: 384.96 MB
1116
- Type: torch.float32
1117
- [wieling-3-a100] 2023-12-01 15:58:42,453 (abs_task:1272) INFO: Optimizer:
1118
- AdamW (
1119
- Parameter Group 0
1120
- amsgrad: False
1121
- betas: [0.8, 0.99]
1122
- capturable: False
1123
- differentiable: False
1124
- eps: 1e-09
1125
- foreach: None
1126
- fused: None
1127
- initial_lr: 0.0003
1128
- lr: 0.0003
1129
- maximize: False
1130
- weight_decay: 0.0
1131
- )
1132
- [wieling-3-a100] 2023-12-01 15:58:42,453 (abs_task:1273) INFO: Scheduler: <torch.optim.lr_scheduler.ExponentialLR object at 0x7f03bc341880>
1133
- [wieling-3-a100] 2023-12-01 15:58:42,453 (abs_task:1272) INFO: Optimizer2:
1134
- AdamW (
1135
- Parameter Group 0
1136
- amsgrad: False
1137
- betas: [0.8, 0.99]
1138
- capturable: False
1139
- differentiable: False
1140
- eps: 1e-09
1141
- foreach: None
1142
- fused: None
1143
- initial_lr: 0.0003
1144
- lr: 0.0003
1145
- maximize: False
1146
- weight_decay: 0.0
1147
- )
1148
- [wieling-3-a100] 2023-12-01 15:58:42,454 (abs_task:1273) INFO: Scheduler2: <torch.optim.lr_scheduler.ExponentialLR object at 0x7f03bc341820>
1149
- [wieling-3-a100] 2023-12-01 15:58:42,454 (abs_task:1282) INFO: Saving the configuration in exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.12/config.yaml
1150
- [wieling-3-a100] 2023-12-01 15:58:42,480 (abs_task:1293) INFO: Namespace(config='conf/train_vits.yaml', print_config=False, log_level='INFO', drop_last_iter=False, dry_run=False, iterator_type='sequence', valid_iterator_type=None, output_dir='exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.12', ngpu=0, seed=67823, num_workers=4, num_att_plot=3, dist_backend='nccl', dist_init_method='env://', dist_world_size=None, dist_rank=None, local_rank=None, dist_master_addr=None, dist_master_port=None, dist_launcher=None, multiprocessing_distributed=False, unused_parameters=True, sharded_ddp=False, cudnn_enabled=True, cudnn_benchmark=False, cudnn_deterministic=False, collect_stats=True, write_collected_feats=False, max_epoch=1000, patience=None, val_scheduler_criterion=('valid', 'loss'), early_stopping_criterion=('valid', 'loss', 'min'), best_model_criterion=[['train', 'total_count', 'max']], keep_nbest_models=10, nbest_averaging_interval=0, grad_clip=-1, grad_clip_type=2.0, grad_noise=False, accum_grad=1, no_forward_run=False, resume=False, train_dtype='float32', use_amp=False, log_interval=50, use_matplotlib=True, use_tensorboard=True, create_graph_in_tensorboard=False, use_wandb=True, wandb_project='GROTTS', wandb_id=None, wandb_entity=None, wandb_name='VITS_lr_3.0e-4', wandb_model_log_interval=-1, detect_anomaly=False, use_lora=False, save_lora_only=True, lora_conf={}, pretrain_path=None, init_param=['downloads/espnet/kan-bayashi_ljspeech_vits/exp/tts_train_vits_raw_phn_tacotron_g2p_en_no_space/train.total_count.ave_10best.pth:tts:tts:tts.generator.text_encoder,tts.generator.posterior_encoder.input_conv'], ignore_init_mismatch=False, freeze_param=[], num_iters_per_epoch=1000, batch_size=40, valid_batch_size=None, batch_bins=10000000, valid_batch_bins=None, train_shape_file=['exp-vits-lr-3e-4/tts_stats_raw_char/logdir/train.12.scp'], valid_shape_file=['exp-vits-lr-3e-4/tts_stats_raw_char/logdir/valid.12.scp'], batch_type='numel', valid_batch_type=None, fold_length=[], sort_in_batch='descending', shuffle_within_batch=False, sort_batch='descending', multiple_iterator=False, chunk_length=500, chunk_shift_ratio=0.5, num_cache_chunks=1024, chunk_excluded_key_prefixes=[], chunk_default_fs=None, train_data_path_and_name_and_type=[('dump/raw/train_nodev/text', 'text', 'text'), ('dump/raw/train_nodev/wav.scp', 'speech', 'sound'), ('dump/raw/train_nodev/utt2sid', 'sids', 'text_int')], valid_data_path_and_name_and_type=[('dump/raw/train_dev/text', 'text', 'text'), ('dump/raw/train_dev/wav.scp', 'speech', 'sound'), ('dump/raw/train_dev/utt2sid', 'sids', 'text_int')], allow_variable_data_keys=False, max_cache_size=0.0, max_cache_fd=32, allow_multi_rates=False, valid_max_cache_size=None, exclude_weight_decay=False, exclude_weight_decay_conf={}, optim='adamw', optim_conf={'lr': 0.0003, 'betas': [0.8, 0.99], 'eps': 1e-09, 'weight_decay': 0.0}, scheduler='exponentiallr', scheduler_conf={'gamma': 0.999875}, optim2='adamw', optim2_conf={'lr': 0.0003, 'betas': [0.8, 0.99], 'eps': 1e-09, 'weight_decay': 0.0}, scheduler2='exponentiallr', scheduler2_conf={'gamma': 0.999875}, generator_first=False, token_list=['<blank>', '<unk>', '<space>', 'e', 'n', 'a', 'o', 't', 'i', 'r', 'd', 's', 'k', 'l', 'm', 'u', 'g', 'h', 'w', 'v', '.', 'z', 'b', 'p', ',', 'j', 'c', 'f', '‘', '’', ':', '?', 'ö', "'", '!', '-', ';', 'ò', 'è', 'ì', 'é', 'y', 'ë', 'x', 'q', '<sos/eos>'], odim=None, model_conf={}, use_preprocessor=True, token_type='char', bpemodel=None, non_linguistic_symbols=None, cleaner=None, g2p=None, feats_extract='fbank', feats_extract_conf={'n_fft': 1024, 'hop_length': 256, 'win_length': None, 'fs': 22050, 'fmin': 80, 'fmax': 7600, 'n_mels': 80}, normalize=None, normalize_conf={}, tts='vits', tts_conf={'generator_type': 'vits_generator', 'generator_params': {'hidden_channels': 192, 'spks': 4, 'global_channels': 256, 'segment_size': 32, 'text_encoder_attention_heads': 2, 'text_encoder_ffn_expand': 4, 'text_encoder_blocks': 6, 'text_encoder_positionwise_layer_type': 'conv1d', 'text_encoder_positionwise_conv_kernel_size': 3, 'text_encoder_positional_encoding_layer_type': 'rel_pos', 'text_encoder_self_attention_layer_type': 'rel_selfattn', 'text_encoder_activation_type': 'swish', 'text_encoder_normalize_before': True, 'text_encoder_dropout_rate': 0.1, 'text_encoder_positional_dropout_rate': 0.0, 'text_encoder_attention_dropout_rate': 0.1, 'use_macaron_style_in_text_encoder': True, 'use_conformer_conv_in_text_encoder': False, 'text_encoder_conformer_kernel_size': -1, 'decoder_kernel_size': 7, 'decoder_channels': 512, 'decoder_upsample_scales': [8, 8, 2, 2], 'decoder_upsample_kernel_sizes': [16, 16, 4, 4], 'decoder_resblock_kernel_sizes': [3, 7, 11], 'decoder_resblock_dilations': [[1, 3, 5], [1, 3, 5], [1, 3, 5]], 'use_weight_norm_in_decoder': True, 'posterior_encoder_kernel_size': 5, 'posterior_encoder_layers': 16, 'posterior_encoder_stacks': 1, 'posterior_encoder_base_dilation': 1, 'posterior_encoder_dropout_rate': 0.0, 'use_weight_norm_in_posterior_encoder': True, 'flow_flows': 4, 'flow_kernel_size': 5, 'flow_base_dilation': 1, 'flow_layers': 4, 'flow_dropout_rate': 0.0, 'use_weight_norm_in_flow': True, 'use_only_mean_in_flow': True, 'stochastic_duration_predictor_kernel_size': 3, 'stochastic_duration_predictor_dropout_rate': 0.5, 'stochastic_duration_predictor_flows': 4, 'stochastic_duration_predictor_dds_conv_layers': 3, 'vocabs': 46, 'aux_channels': 80}, 'discriminator_type': 'hifigan_multi_scale_multi_period_discriminator', 'discriminator_params': {'scales': 1, 'scale_downsample_pooling': 'AvgPool1d', 'scale_downsample_pooling_params': {'kernel_size': 4, 'stride': 2, 'padding': 2}, 'scale_discriminator_params': {'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [15, 41, 5, 3], 'channels': 128, 'max_downsample_channels': 1024, 'max_groups': 16, 'bias': True, 'downsample_scales': [2, 2, 4, 4, 1], 'nonlinear_activation': 'LeakyReLU', 'nonlinear_activation_params': {'negative_slope': 0.1}, 'use_weight_norm': False, 'use_spectral_norm': False}, 'follow_official_norm': False, 'periods': [2, 3, 5, 7, 11], 'period_discriminator_params': {'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'downsample_scales': [3, 3, 3, 3, 1], 'max_downsample_channels': 1024, 'bias': True, 'nonlinear_activation': 'LeakyReLU', 'nonlinear_activation_params': {'negative_slope': 0.1}, 'use_weight_norm': True, 'use_spectral_norm': False}}, 'generator_adv_loss_params': {'average_by_discriminators': False, 'loss_type': 'mse'}, 'discriminator_adv_loss_params': {'average_by_discriminators': False, 'loss_type': 'mse'}, 'feat_match_loss_params': {'average_by_discriminators': False, 'average_by_layers': False, 'include_final_outputs': True}, 'mel_loss_params': {'fs': 22050, 'n_fft': 1024, 'hop_length': 256, 'win_length': None, 'window': 'hann', 'n_mels': 80, 'fmin': 0, 'fmax': None, 'log_base': None}, 'lambda_adv': 1.0, 'lambda_mel': 45.0, 'lambda_feat_match': 2.0, 'lambda_dur': 1.0, 'lambda_kl': 1.0, 'sampling_rate': 22050, 'cache_generator_outputs': True}, pitch_extract=None, pitch_extract_conf={'fs': 22050, 'n_fft': 1024, 'hop_length': 256, 'f0max': 400, 'f0min': 80}, pitch_normalize=None, pitch_normalize_conf={}, energy_extract=None, energy_extract_conf={'fs': 22050, 'n_fft': 1024, 'hop_length': 256, 'win_length': None}, energy_normalize=None, energy_normalize_conf={}, required=['output_dir', 'token_list'], version='202310', distributed=False)
1151
- # Accounting: time=18 threads=1
1152
- # Ended (code 0) at Fri Dec 1 15:58:52 UTC 2023, elapsed time 18 seconds
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.12/config.yaml DELETED
@@ -1,383 +0,0 @@
1
- config: conf/train_vits.yaml
2
- print_config: false
3
- log_level: INFO
4
- drop_last_iter: false
5
- dry_run: false
6
- iterator_type: sequence
7
- valid_iterator_type: null
8
- output_dir: exp-vits-lr-3e-4/tts_stats_raw_char/logdir/stats.12
9
- ngpu: 0
10
- seed: 67823
11
- num_workers: 4
12
- num_att_plot: 3
13
- dist_backend: nccl
14
- dist_init_method: env://
15
- dist_world_size: null
16
- dist_rank: null
17
- local_rank: null
18
- dist_master_addr: null
19
- dist_master_port: null
20
- dist_launcher: null
21
- multiprocessing_distributed: false
22
- unused_parameters: true
23
- sharded_ddp: false
24
- cudnn_enabled: true
25
- cudnn_benchmark: false
26
- cudnn_deterministic: false
27
- collect_stats: true
28
- write_collected_feats: false
29
- max_epoch: 1000
30
- patience: null
31
- val_scheduler_criterion:
32
- - valid
33
- - loss
34
- early_stopping_criterion:
35
- - valid
36
- - loss
37
- - min
38
- best_model_criterion:
39
- - - train
40
- - total_count
41
- - max
42
- keep_nbest_models: 10
43
- nbest_averaging_interval: 0
44
- grad_clip: -1
45
- grad_clip_type: 2.0
46
- grad_noise: false
47
- accum_grad: 1
48
- no_forward_run: false
49
- resume: false
50
- train_dtype: float32
51
- use_amp: false
52
- log_interval: 50
53
- use_matplotlib: true
54
- use_tensorboard: true
55
- create_graph_in_tensorboard: false
56
- use_wandb: true
57
- wandb_project: GROTTS
58
- wandb_id: null
59
- wandb_entity: null
60
- wandb_name: VITS_lr_3.0e-4
61
- wandb_model_log_interval: -1
62
- detect_anomaly: false
63
- use_lora: false
64
- save_lora_only: true
65
- lora_conf: {}
66
- pretrain_path: null
67
- init_param:
68
- - downloads/espnet/kan-bayashi_ljspeech_vits/exp/tts_train_vits_raw_phn_tacotron_g2p_en_no_space/train.total_count.ave_10best.pth:tts:tts:tts.generator.text_encoder,tts.generator.posterior_encoder.input_conv
69
- ignore_init_mismatch: false
70
- freeze_param: []
71
- num_iters_per_epoch: 1000
72
- batch_size: 40
73
- valid_batch_size: null
74
- batch_bins: 10000000
75
- valid_batch_bins: null
76
- train_shape_file:
77
- - exp-vits-lr-3e-4/tts_stats_raw_char/logdir/train.12.scp
78
- valid_shape_file:
79
- - exp-vits-lr-3e-4/tts_stats_raw_char/logdir/valid.12.scp
80
- batch_type: numel
81
- valid_batch_type: null
82
- fold_length: []
83
- sort_in_batch: descending
84
- shuffle_within_batch: false
85
- sort_batch: descending
86
- multiple_iterator: false
87
- chunk_length: 500
88
- chunk_shift_ratio: 0.5
89
- num_cache_chunks: 1024
90
- chunk_excluded_key_prefixes: []
91
- chunk_default_fs: null
92
- train_data_path_and_name_and_type:
93
- - - dump/raw/train_nodev/text
94
- - text
95
- - text
96
- - - dump/raw/train_nodev/wav.scp
97
- - speech
98
- - sound
99
- - - dump/raw/train_nodev/utt2sid
100
- - sids
101
- - text_int
102
- valid_data_path_and_name_and_type:
103
- - - dump/raw/train_dev/text
104
- - text
105
- - text
106
- - - dump/raw/train_dev/wav.scp
107
- - speech
108
- - sound
109
- - - dump/raw/train_dev/utt2sid
110
- - sids
111
- - text_int
112
- allow_variable_data_keys: false
113
- max_cache_size: 0.0
114
- max_cache_fd: 32
115
- allow_multi_rates: false
116
- valid_max_cache_size: null
117
- exclude_weight_decay: false
118
- exclude_weight_decay_conf: {}
119
- optim: adamw
120
- optim_conf:
121
- lr: 0.0003
122
- betas:
123
- - 0.8
124
- - 0.99
125
- eps: 1.0e-09
126
- weight_decay: 0.0
127
- scheduler: exponentiallr
128
- scheduler_conf:
129
- gamma: 0.999875
130
- optim2: adamw
131
- optim2_conf:
132
- lr: 0.0003
133
- betas:
134
- - 0.8
135
- - 0.99
136
- eps: 1.0e-09
137
- weight_decay: 0.0
138
- scheduler2: exponentiallr
139
- scheduler2_conf:
140
- gamma: 0.999875
141
- generator_first: false
142
- token_list:
143
- - <blank>
144
- - <unk>
145
- - <space>
146
- - e
147
- - n
148
- - a
149
- - o
150
- - t
151
- - i
152
- - r
153
- - d
154
- - s
155
- - k
156
- - l
157
- - m
158
- - u
159
- - g
160
- - h
161
- - w
162
- - v
163
- - .
164
- - z
165
- - b
166
- - p
167
- - ','
168
- - j
169
- - c
170
- - f
171
- - ‘
172
- - ’
173
- - ':'
174
- - '?'
175
- - ö
176
- - ''''
177
- - '!'
178
- - '-'
179
- - ;
180
- - ò
181
- - è
182
- - ì
183
- - é
184
- - y
185
- - ë
186
- - x
187
- - q
188
- - <sos/eos>
189
- odim: null
190
- model_conf: {}
191
- use_preprocessor: true
192
- token_type: char
193
- bpemodel: null
194
- non_linguistic_symbols: null
195
- cleaner: null
196
- g2p: null
197
- feats_extract: fbank
198
- feats_extract_conf:
199
- n_fft: 1024
200
- hop_length: 256
201
- win_length: null
202
- fs: 22050
203
- fmin: 80
204
- fmax: 7600
205
- n_mels: 80
206
- normalize: null
207
- normalize_conf: {}
208
- tts: vits
209
- tts_conf:
210
- generator_type: vits_generator
211
- generator_params:
212
- hidden_channels: 192
213
- spks: 4
214
- global_channels: 256
215
- segment_size: 32
216
- text_encoder_attention_heads: 2
217
- text_encoder_ffn_expand: 4
218
- text_encoder_blocks: 6
219
- text_encoder_positionwise_layer_type: conv1d
220
- text_encoder_positionwise_conv_kernel_size: 3
221
- text_encoder_positional_encoding_layer_type: rel_pos
222
- text_encoder_self_attention_layer_type: rel_selfattn
223
- text_encoder_activation_type: swish
224
- text_encoder_normalize_before: true
225
- text_encoder_dropout_rate: 0.1
226
- text_encoder_positional_dropout_rate: 0.0
227
- text_encoder_attention_dropout_rate: 0.1
228
- use_macaron_style_in_text_encoder: true
229
- use_conformer_conv_in_text_encoder: false
230
- text_encoder_conformer_kernel_size: -1
231
- decoder_kernel_size: 7
232
- decoder_channels: 512
233
- decoder_upsample_scales:
234
- - 8
235
- - 8
236
- - 2
237
- - 2
238
- decoder_upsample_kernel_sizes:
239
- - 16
240
- - 16
241
- - 4
242
- - 4
243
- decoder_resblock_kernel_sizes:
244
- - 3
245
- - 7
246
- - 11
247
- decoder_resblock_dilations:
248
- - - 1
249
- - 3
250
- - 5
251
- - - 1
252
- - 3
253
- - 5
254
- - - 1
255
- - 3
256
- - 5
257
- use_weight_norm_in_decoder: true
258
- posterior_encoder_kernel_size: 5
259
- posterior_encoder_layers: 16
260
- posterior_encoder_stacks: 1
261
- posterior_encoder_base_dilation: 1
262
- posterior_encoder_dropout_rate: 0.0
263
- use_weight_norm_in_posterior_encoder: true
264
- flow_flows: 4
265
- flow_kernel_size: 5
266
- flow_base_dilation: 1
267
- flow_layers: 4
268
- flow_dropout_rate: 0.0
269
- use_weight_norm_in_flow: true
270
- use_only_mean_in_flow: true
271
- stochastic_duration_predictor_kernel_size: 3
272
- stochastic_duration_predictor_dropout_rate: 0.5
273
- stochastic_duration_predictor_flows: 4
274
- stochastic_duration_predictor_dds_conv_layers: 3
275
- vocabs: 46
276
- aux_channels: 80
277
- discriminator_type: hifigan_multi_scale_multi_period_discriminator
278
- discriminator_params:
279
- scales: 1
280
- scale_downsample_pooling: AvgPool1d
281
- scale_downsample_pooling_params:
282
- kernel_size: 4
283
- stride: 2
284
- padding: 2
285
- scale_discriminator_params:
286
- in_channels: 1
287
- out_channels: 1
288
- kernel_sizes:
289
- - 15
290
- - 41
291
- - 5
292
- - 3
293
- channels: 128
294
- max_downsample_channels: 1024
295
- max_groups: 16
296
- bias: true
297
- downsample_scales:
298
- - 2
299
- - 2
300
- - 4
301
- - 4
302
- - 1
303
- nonlinear_activation: LeakyReLU
304
- nonlinear_activation_params:
305
- negative_slope: 0.1
306
- use_weight_norm: false
307
- use_spectral_norm: false
308
- follow_official_norm: false
309
- periods:
310
- - 2
311
- - 3
312
- - 5
313
- - 7
314
- - 11
315
- period_discriminator_params:
316
- in_channels: 1
317
- out_channels: 1
318
- kernel_sizes:
319
- - 5
320
- - 3
321
- channels: 32
322
- downsample_scales:
323
- - 3
324
- - 3
325
- - 3
326
- - 3
327
- - 1
328
- max_downsample_channels: 1024
329
- bias: true
330
- nonlinear_activation: LeakyReLU
331
- nonlinear_activation_params:
332
- negative_slope: 0.1
333
- use_weight_norm: true
334
- use_spectral_norm: false
335
- generator_adv_loss_params:
336
- average_by_discriminators: false
337
- loss_type: mse
338
- discriminator_adv_loss_params:
339
- average_by_discriminators: false
340
- loss_type: mse
341
- feat_match_loss_params:
342
- average_by_discriminators: false
343
- average_by_layers: false
344
- include_final_outputs: true
345
- mel_loss_params:
346
- fs: 22050
347
- n_fft: 1024
348
- hop_length: 256
349
- win_length: null
350
- window: hann
351
- n_mels: 80
352
- fmin: 0
353
- fmax: null
354
- log_base: null
355
- lambda_adv: 1.0
356
- lambda_mel: 45.0
357
- lambda_feat_match: 2.0
358
- lambda_dur: 1.0
359
- lambda_kl: 1.0
360
- sampling_rate: 22050
361
- cache_generator_outputs: true
362
- pitch_extract: null
363
- pitch_extract_conf:
364
- fs: 22050
365
- n_fft: 1024
366
- hop_length: 256
367
- f0max: 400
368
- f0min: 80
369
- pitch_normalize: null
370
- pitch_normalize_conf: {}
371
- energy_extract: null
372
- energy_extract_conf:
373
- fs: 22050
374
- n_fft: 1024
375
- hop_length: 256
376
- win_length: null
377
- energy_normalize: null
378
- energy_normalize_conf: {}
379
- required:
380
- - output_dir
381
- - token_list
382
- version: '202310'
383
- distributed: false