{ "architectures": [ "ConditionalFlowMatchingWithHifiGan" ], "model_config": { "_attn_implementation_autoset": true, "architectures": [ "ConditionalFlowMatchingModel" ], "model_type": "", "torch_dtype": "float32" }, "torch_dtype": "float32", "transformers_version": "4.49.0", "vocoder_config": { "_attn_implementation_autoset": true, "architectures": [ "FastSpeech2ConformerHifiGan" ], "model_type": "hifigan", "normalize_before": false, "torch_dtype": "float32", "upsample_kernel_sizes": [ 10, 9, 8, 4, 4 ], "upsample_rates": [ 5, 4, 4, 2, 2 ] } }