{ | |
"architectures": [ | |
"SpeechT5HifiGan" | |
], | |
"initializer_range": 0.01, | |
"leaky_relu_slope": 0.1, | |
"model_in_dim": 64, | |
"model_type": "hifigan", | |
"normalize_before": false, | |
"resblock_dilation_sizes": [ | |
[ | |
1, | |
3, | |
5 | |
], | |
[ | |
1, | |
3, | |
5 | |
], | |
[ | |
1, | |
3, | |
5 | |
] | |
], | |
"resblock_kernel_sizes": [ | |
3, | |
7, | |
11 | |
], | |
"sampling_rate": 16000, | |
"torch_dtype": "float32", | |
"transformers_version": "4.28.0.dev0", | |
"upsample_initial_channel": 1024, | |
"upsample_kernel_sizes": [ | |
16, | |
16, | |
8, | |
4, | |
4 | |
], | |
"upsample_rates": [ | |
5, | |
4, | |
2, | |
2, | |
2 | |
] | |
} | |