{
  "architectures": [
    "ConditionalFlowMatchingWithHifiGan"
  ],
  "model_config": {
    "_attn_implementation_autoset": true,
    "architectures": [
      "ConditionalFlowMatchingModel"
    ],
    "model_type": "",
    "torch_dtype": "float32"
  },
  "torch_dtype": "float32",
  "transformers_version": "4.49.0",
  "vocoder_config": {
    "_attn_implementation_autoset": true,
    "architectures": [
      "FastSpeech2ConformerHifiGan"
    ],
    "model_type": "hifigan",
    "normalize_before": false,
    "torch_dtype": "float32",
    "upsample_kernel_sizes": [
      10,
      9,
      8,
      4,
      4
    ],
    "upsample_rates": [
      5,
      4,
      4,
      2,
      2
    ]
  }
}