Safetensors
English
ryota-komatsu commited on
Commit
bb97029
·
verified ·
1 Parent(s): 29e67c2

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +15 -3
  2. config.json +38 -0
  3. model.safetensors +3 -0
README.md CHANGED
@@ -1,3 +1,15 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ datasets:
4
+ - ryota-komatsu/libritts-r-mhubert-2000units
5
+ language:
6
+ - en
7
+ base_model:
8
+ - ryota-komatsu/fastspeech2_conformer_hifigan
9
+ ---
10
+
11
+ [Conditional Flow Matching-based acoustic model](https://arxiv.org/abs/2306.15687) with a [HiFi-GAN](https://arxiv.org/abs/2010.05646) vocoder.
12
+
13
+ This is a model repository of [a GitHub project](https://github.com/ryota-komatsu/speech_resynth).
14
+
15
+ The model was trained on 16 kHz downsampled [LibriTTS-R](https://arxiv.org/abs/2305.18802) and [EXPRESSO](https://arxiv.org/abs/2308.05725) HuBERT units.
config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ConditionalFlowMatchingWithHifiGan"
4
+ ],
5
+ "model_config": {
6
+ "_attn_implementation_autoset": true,
7
+ "architectures": [
8
+ "ConditionalFlowMatchingModel"
9
+ ],
10
+ "model_type": "",
11
+ "torch_dtype": "float32"
12
+ },
13
+ "torch_dtype": "float32",
14
+ "transformers_version": "4.49.0",
15
+ "vocoder_config": {
16
+ "_attn_implementation_autoset": true,
17
+ "architectures": [
18
+ "FastSpeech2ConformerHifiGan"
19
+ ],
20
+ "model_type": "hifigan",
21
+ "normalize_before": false,
22
+ "torch_dtype": "float32",
23
+ "upsample_kernel_sizes": [
24
+ 10,
25
+ 9,
26
+ 8,
27
+ 4,
28
+ 4
29
+ ],
30
+ "upsample_rates": [
31
+ 5,
32
+ 4,
33
+ 4,
34
+ 2,
35
+ 2
36
+ ]
37
+ }
38
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c2480f8ad80ea0454ed482f678c22e0bfc39c8f49be08e3d65f809a4aa1cc04
3
+ size 98687020