taihan commited on
Commit
a745fc9
1 Parent(s): 3ee34af

Upload 2 files

Browse files
Files changed (2) hide show
  1. config.yml +116 -0
  2. model.h5 +3 -0
config.yml ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # This is the hyperparameter configuration file for Hifigan.
3
+ # Please make sure this is adjusted for the LJSpeech dataset. If you want to
4
+ # apply to the other dataset, you might need to carefully change some parameters.
5
+ # This configuration performs 4000k iters.
6
+
7
+ ###########################################################
8
+ # FEATURE EXTRACTION SETTING #
9
+ ###########################################################
10
+ sampling_rate: 22050 # Sampling rate of dataset.
11
+ hop_size: 256 # Hop size.
12
+ format: "npy"
13
+
14
+
15
+ ###########################################################
16
+ # GENERATOR NETWORK ARCHITECTURE SETTING #
17
+ ###########################################################
18
+ model_type: "hifigan_generator"
19
+
20
+ hifigan_generator_params:
21
+ out_channels: 1
22
+ kernel_size: 7
23
+ filters: 512
24
+ use_bias: true
25
+ upsample_scales: [8, 8, 2, 2]
26
+ stacks: 3
27
+ stack_kernel_size: [3, 7, 11]
28
+ stack_dilation_rate: [[1, 3, 5], [1, 3, 5], [1, 3, 5]]
29
+ use_final_nolinear_activation: true
30
+ is_weight_norm: false
31
+
32
+ ###########################################################
33
+ # DISCRIMINATOR NETWORK ARCHITECTURE SETTING #
34
+ ###########################################################
35
+ hifigan_discriminator_params:
36
+ out_channels: 1 # Number of output channels (number of subbands).
37
+ period_scales: [2, 3, 5, 7, 11] # List of period scales.
38
+ n_layers: 5 # Number of layer of each period discriminator.
39
+ kernel_size: 5 # Kernel size.
40
+ strides: 3 # Strides
41
+ filters: 8 # In Conv filters of each period discriminator
42
+ filter_scales: 4 # Filter scales.
43
+ max_filters: 1024 # maximum filters of period discriminator's conv.
44
+ is_weight_norm: false # Use weight-norm or not.
45
+
46
+ melgan_discriminator_params:
47
+ out_channels: 1 # Number of output channels.
48
+ scales: 3 # Number of multi-scales.
49
+ downsample_pooling: "AveragePooling1D" # Pooling type for the input downsampling.
50
+ downsample_pooling_params: # Parameters of the above pooling function.
51
+ pool_size: 4
52
+ strides: 2
53
+ kernel_sizes: [5, 3] # List of kernel size.
54
+ filters: 16 # Number of channels of the initial conv layer.
55
+ max_downsample_filters: 1024 # Maximum number of channels of downsampling layers.
56
+ downsample_scales: [4, 4, 4, 4] # List of downsampling scales.
57
+ nonlinear_activation: "LeakyReLU" # Nonlinear activation function.
58
+ nonlinear_activation_params: # Parameters of nonlinear activation function.
59
+ alpha: 0.2
60
+ is_weight_norm: false # Use weight-norm or not.
61
+
62
+ ###########################################################
63
+ # STFT LOSS SETTING #
64
+ ###########################################################
65
+ stft_loss_params:
66
+ fft_lengths: [1024, 2048, 512] # List of FFT size for STFT-based loss.
67
+ frame_steps: [120, 240, 50] # List of hop size for STFT-based loss
68
+ frame_lengths: [600, 1200, 240] # List of window length for STFT-based loss.
69
+
70
+ ###########################################################
71
+ # ADVERSARIAL LOSS SETTING #
72
+ ###########################################################
73
+ lambda_feat_match: 10.0
74
+ lambda_adv: 4.0
75
+
76
+ ###########################################################
77
+ # DATA LOADER SETTING #
78
+ ###########################################################
79
+ batch_size: 16 # Batch size for each GPU with assuming that gradient_accumulation_steps == 1.
80
+ batch_max_steps: 8192 # Length of each audio in batch for training. Make sure dividable by hop_size.
81
+ batch_max_steps_valid: 81920 # Length of each audio for validation. Make sure dividable by hope_size.
82
+ remove_short_samples: true # Whether to remove samples the length of which are less than batch_max_steps.
83
+ allow_cache: true # Whether to allow cache in dataset. If true, it requires cpu memory.
84
+ is_shuffle: true # shuffle dataset after each epoch.
85
+
86
+ ###########################################################
87
+ # OPTIMIZER & SCHEDULER SETTING #
88
+ ###########################################################
89
+ generator_optimizer_params:
90
+ lr_fn: "PiecewiseConstantDecay"
91
+ lr_params:
92
+ boundaries: [100000, 200000, 300000, 400000, 500000, 600000, 700000]
93
+ values: [0.000125, 0.000125, 0.0000625, 0.0000625, 0.0000625, 0.00003125, 0.000015625, 0.000001]
94
+ amsgrad: false
95
+
96
+ discriminator_optimizer_params:
97
+ lr_fn: "PiecewiseConstantDecay"
98
+ lr_params:
99
+ boundaries: [100000, 200000, 300000, 400000, 500000]
100
+ values: [0.00025, 0.000125, 0.0000625, 0.00003125, 0.000015625, 0.000001]
101
+ amsgrad: false
102
+
103
+ gradient_accumulation_steps: 1 # should be even number or 1.
104
+ ###########################################################
105
+ # INTERVAL SETTING #
106
+ ###########################################################
107
+ discriminator_train_start_steps: 100000 # steps begin training discriminator
108
+ train_max_steps: 4000000 # Number of training steps.
109
+ save_interval_steps: 20000 # Interval steps to save checkpoint.
110
+ eval_interval_steps: 5000 # Interval steps to evaluate the network.
111
+ log_interval_steps: 200 # Interval steps to record the training log.
112
+
113
+ ###########################################################
114
+ # OTHER SETTING #
115
+ ###########################################################
116
+ num_save_intermediate_results: 1 # Number of batch to be saved as intermediate results.
model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c567276bfff1d64653d912fddaa3846554f924d7ba4146be039ec751f19f98a1
3
+ size 55877376