GunnarThor commited on
Commit
c885237
1 Parent(s): aa1d68c

add model files

Browse files
Files changed (4) hide show
  1. README.md +9 -0
  2. checkpoint-1500000steps.pkl +3 -0
  3. config.yml +167 -0
  4. stats.h5 +3 -0
README.md CHANGED
@@ -1,3 +1,12 @@
1
  ---
2
  license: apache-2.0
3
  ---
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
  ---
4
+
5
+ This model was trained by Gunnar Thor Örnólfsson in 2023
6
+
7
+ It uses a combination of the Talrómur 1 and 2 corpora, totalling 44 speakers (with 4 speakers being held back for evaluation.
8
+
9
+ All 8 voices in Talrómur 1 were used.
10
+ 36 out of the 40 voices in Talrómur 2 were used.
11
+
12
+ The model was trained for 1.500.000 steps using Tomoki Hayashi's implementation of Style-MelGAN: https://github.com/kan-bayashi/ParallelWaveGAN/
checkpoint-1500000steps.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fb6b455b89a754472d208e2e33d8536ed426de6c597ae48f27ed91de98425a4
3
+ size 114014636
config.yml ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ allow_cache: false
2
+ batch_max_steps: 22528
3
+ batch_size: 32
4
+ config: conf/style_melgan.v1.yaml
5
+ dev_dumpdir: dump/dev_1and2/norm
6
+ dev_feats_scp: null
7
+ dev_segments: null
8
+ dev_wav_scp: null
9
+ discriminator_adv_loss_params:
10
+ average_by_discriminators: false
11
+ discriminator_grad_norm: -1
12
+ discriminator_optimizer_params:
13
+ betas:
14
+ - 0.5
15
+ - 0.9
16
+ lr: 0.0002
17
+ weight_decay: 0.0
18
+ discriminator_optimizer_type: Adam
19
+ discriminator_params:
20
+ discriminator_params:
21
+ bias: true
22
+ channels: 16
23
+ downsample_scales:
24
+ - 4
25
+ - 4
26
+ - 4
27
+ - 1
28
+ kernel_sizes:
29
+ - 5
30
+ - 3
31
+ max_downsample_channels: 512
32
+ nonlinear_activation: LeakyReLU
33
+ nonlinear_activation_params:
34
+ negative_slope: 0.2
35
+ out_channels: 1
36
+ pqmf_params:
37
+ - - 1
38
+ - None
39
+ - None
40
+ - None
41
+ - - 2
42
+ - 62
43
+ - 0.267
44
+ - 9.0
45
+ - - 4
46
+ - 62
47
+ - 0.142
48
+ - 9.0
49
+ - - 8
50
+ - 62
51
+ - 0.07949
52
+ - 9.0
53
+ repeats: 4
54
+ use_weight_norm: true
55
+ window_sizes:
56
+ - 512
57
+ - 1024
58
+ - 2048
59
+ - 4096
60
+ discriminator_scheduler_params:
61
+ gamma: 0.5
62
+ milestones:
63
+ - 200000
64
+ - 400000
65
+ - 600000
66
+ - 800000
67
+ discriminator_scheduler_type: MultiStepLR
68
+ discriminator_train_start_steps: 100000
69
+ discriminator_type: StyleMelGANDiscriminator
70
+ distributed: false
71
+ eval_interval_steps: 1000
72
+ fft_size: 1024
73
+ fmax: 7600
74
+ fmin: 80
75
+ format: hdf5
76
+ generator_adv_loss_params:
77
+ average_by_discriminators: false
78
+ generator_grad_norm: -1
79
+ generator_optimizer_params:
80
+ betas:
81
+ - 0.5
82
+ - 0.9
83
+ lr: 0.0001
84
+ weight_decay: 0.0
85
+ generator_optimizer_type: Adam
86
+ generator_params:
87
+ aux_channels: 80
88
+ bias: true
89
+ channels: 64
90
+ dilation: 2
91
+ gated_function: softmax
92
+ in_channels: 128
93
+ kernel_size: 9
94
+ noise_upsample_activation: LeakyReLU
95
+ noise_upsample_activation_params:
96
+ negative_slope: 0.2
97
+ noise_upsample_scales:
98
+ - 11
99
+ - 2
100
+ - 2
101
+ - 2
102
+ out_channels: 1
103
+ upsample_mode: nearest
104
+ upsample_scales:
105
+ - 2
106
+ - 2
107
+ - 2
108
+ - 2
109
+ - 2
110
+ - 2
111
+ - 2
112
+ - 2
113
+ - 1
114
+ use_weight_norm: true
115
+ generator_scheduler_params:
116
+ gamma: 0.5
117
+ milestones:
118
+ - 100000
119
+ - 300000
120
+ - 500000
121
+ - 700000
122
+ - 900000
123
+ generator_scheduler_type: MultiStepLR
124
+ generator_type: StyleMelGANGenerator
125
+ global_gain_scale: 1.0
126
+ hop_size: 256
127
+ lambda_adv: 1.0
128
+ lambda_aux: 1.0
129
+ log_interval_steps: 100
130
+ num_mels: 80
131
+ num_save_intermediate_results: 4
132
+ num_workers: 2
133
+ outdir: exp/train_1and2_style_melgan.v1
134
+ pin_memory: true
135
+ pretrain: ''
136
+ rank: 0
137
+ remove_short_samples: false
138
+ resume: ''
139
+ sampling_rate: 22050
140
+ save_interval_steps: 50000
141
+ stft_loss_params:
142
+ fft_sizes:
143
+ - 1024
144
+ - 2048
145
+ - 512
146
+ hop_sizes:
147
+ - 120
148
+ - 240
149
+ - 50
150
+ win_lengths:
151
+ - 600
152
+ - 1200
153
+ - 240
154
+ window: hann_window
155
+ train_dumpdir: dump/train_1and2/norm
156
+ train_feats_scp: null
157
+ train_max_steps: 1500000
158
+ train_segments: null
159
+ train_wav_scp: null
160
+ trim_frame_size: 1024
161
+ trim_hop_size: 256
162
+ trim_silence: false
163
+ trim_threshold_in_db: 60
164
+ verbose: 1
165
+ version: 0.6.0
166
+ win_length: null
167
+ window: hann
stats.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54f1528f410ec426951fc6e30ec4cc266805f8477599c6ad2a26f07e2797bb71
3
+ size 4736