Upload 3 files

Files changed (3) hide show

README.md CHANGED Viewed

@@ -1,3 +1,15 @@
----
-license: mit
----

+---
+license: mit
+datasets:
+- ryota-komatsu/libritts-r-mhubert-2000units
+language:
+- en
+base_model:
+- ryota-komatsu/fastspeech2_conformer_hifigan
+---
+[Conditional Flow Matching-based acoustic model](https://arxiv.org/abs/2306.15687) with a [HiFi-GAN](https://arxiv.org/abs/2010.05646) vocoder.
+This is a model repository of [a GitHub project](https://github.com/ryota-komatsu/speech_resynth).
+The model was trained on 16 kHz downsampled [LibriTTS-R](https://arxiv.org/abs/2305.18802) and [EXPRESSO](https://arxiv.org/abs/2308.05725) HuBERT units.

config.json ADDED Viewed

+{
+  "architectures": [
+    "ConditionalFlowMatchingWithHifiGan"
+  ],
+  "model_config": {
+    "_attn_implementation_autoset": true,
+    "architectures": [
+      "ConditionalFlowMatchingModel"
+    ],
+    "model_type": "",
+    "torch_dtype": "float32"
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "vocoder_config": {
+    "_attn_implementation_autoset": true,
+    "architectures": [
+      "FastSpeech2ConformerHifiGan"
+    ],
+    "model_type": "hifigan",
+    "normalize_before": false,
+    "torch_dtype": "float32",
+    "upsample_kernel_sizes": [
+      10,
+      9,
+      8,
+      4,
+      4
+    ],
+    "upsample_rates": [
+      5,
+      4,
+      4,
+      2,
+      2
+    ]
+  }
+}

model.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:2c2480f8ad80ea0454ed482f678c22e0bfc39c8f49be08e3d65f809a4aa1cc04
+size 98687020