pradnya-hf-dev commited on
Commit
d8714d0
1 Parent(s): 0dd23e2

Upload 2 files

Browse files
Files changed (2) hide show
  1. hyperparams.yaml +126 -0
  2. model.ckpt +3 -0
hyperparams.yaml ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ################################
2
+ # Model: Fastspeech2 for TTS
3
+ # Authors: Sathvik Udupa, Yingzhi Wang, Pradnya Kandarkar
4
+ # ################################
5
+ # Input parameters
6
+ lexicon:
7
+ - AA
8
+ - AE
9
+ - AH
10
+ - AO
11
+ - AW
12
+ - AY
13
+ - B
14
+ - CH
15
+ - D
16
+ - DH
17
+ - EH
18
+ - ER
19
+ - EY
20
+ - F
21
+ - G
22
+ - HH
23
+ - IH
24
+ - IY
25
+ - JH
26
+ - K
27
+ - L
28
+ - M
29
+ - N
30
+ - NG
31
+ - OW
32
+ - OY
33
+ - P
34
+ - R
35
+ - S
36
+ - SH
37
+ - T
38
+ - TH
39
+ - UH
40
+ - UW
41
+ - V
42
+ - W
43
+ - Y
44
+ - Z
45
+ - ZH
46
+ - spn
47
+
48
+ n_symbols: 41 #fixed deppending on symbols in the lexicon +1 for a dummy symbol used for padding
49
+ padding_idx: 0
50
+ n_mel_channels: 80
51
+
52
+ # Encoder parameters
53
+ enc_num_layers: 4
54
+ enc_num_head: 2
55
+ enc_d_model: 256
56
+ enc_ffn_dim: 1024
57
+ enc_k_dim: 256
58
+ enc_v_dim: 256
59
+ enc_dropout: 0.2
60
+
61
+ # Decoder parameters
62
+ dec_num_layers: 4
63
+ dec_num_head: 2
64
+ dec_d_model: 256
65
+ dec_ffn_dim: 1024
66
+ dec_k_dim: 256
67
+ dec_v_dim: 256
68
+ dec_dropout: 0.2
69
+
70
+ # Postnet parameters
71
+ postnet_embedding_dim: 512
72
+ postnet_kernel_size: 5
73
+ postnet_n_convolutions: 5
74
+ postnet_dropout: 0.5
75
+
76
+ # Common
77
+ normalize_before: True
78
+ ffn_type: 1dcnn #1dcnn or ffn
79
+ ffn_cnn_kernel_size_list: [9, 1]
80
+
81
+ # Variance predictor
82
+ dur_pred_kernel_size: 3
83
+ pitch_pred_kernel_size: 3
84
+ energy_pred_kernel_size: 3
85
+ variance_predictor_dropout: 0.5
86
+
87
+ # Model
88
+ model: !new:speechbrain.lobes.models.FastSpeech2.FastSpeech2
89
+ enc_num_layers: !ref <enc_num_layers>
90
+ enc_num_head: !ref <enc_num_head>
91
+ enc_d_model: !ref <enc_d_model>
92
+ enc_ffn_dim: !ref <enc_ffn_dim>
93
+ enc_k_dim: !ref <enc_k_dim>
94
+ enc_v_dim: !ref <enc_v_dim>
95
+ enc_dropout: !ref <enc_dropout>
96
+ dec_num_layers: !ref <dec_num_layers>
97
+ dec_num_head: !ref <dec_num_head>
98
+ dec_d_model: !ref <dec_d_model>
99
+ dec_ffn_dim: !ref <dec_ffn_dim>
100
+ dec_k_dim: !ref <dec_k_dim>
101
+ dec_v_dim: !ref <dec_v_dim>
102
+ dec_dropout: !ref <dec_dropout>
103
+ normalize_before: !ref <normalize_before>
104
+ ffn_type: !ref <ffn_type>
105
+ ffn_cnn_kernel_size_list: !ref <ffn_cnn_kernel_size_list>
106
+ n_char: !ref <n_symbols>
107
+ n_mels: !ref <n_mel_channels>
108
+ postnet_embedding_dim: !ref <postnet_embedding_dim>
109
+ postnet_kernel_size: !ref <postnet_kernel_size>
110
+ postnet_n_convolutions: !ref <postnet_n_convolutions>
111
+ postnet_dropout: !ref <postnet_dropout>
112
+ padding_idx: !ref <padding_idx>
113
+ dur_pred_kernel_size: !ref <dur_pred_kernel_size>
114
+ pitch_pred_kernel_size: !ref <pitch_pred_kernel_size>
115
+ energy_pred_kernel_size: !ref <energy_pred_kernel_size>
116
+ variance_predictor_dropout: !ref <variance_predictor_dropout>
117
+
118
+
119
+ input_encoder: !new:speechbrain.dataio.encoder.TextEncoder
120
+
121
+ modules:
122
+ model: !ref <model>
123
+
124
+ pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
125
+ loadables:
126
+ model: !ref <model>
model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfc40f7ad123936bb18f56d6f2392198c09b50eb416fdfb0895ec2077f4ed6cc
3
+ size 114702155