sep moods
Browse files- README.md +11 -1
- config.yaml +33 -0
- gcmvn_stats.npz +0 -0
- hifigan.bin +3 -0
- hifigan.json +37 -0
- pytorch_model.pt +3 -0
- speakers.txt +4 -0
- vocab.txt +70 -0
README.md
CHANGED
@@ -1,3 +1,13 @@
|
|
1 |
---
|
2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
---
|
|
|
|
1 |
---
|
2 |
+
library_name: fairseq
|
3 |
+
task: text-to-speech
|
4 |
+
tags:
|
5 |
+
- fairseq
|
6 |
+
- audio
|
7 |
+
- text-to-speech
|
8 |
+
language: en
|
9 |
+
widget:
|
10 |
+
- text: "Hello, this is a test run."
|
11 |
+
example_title: "Hello, this is a test run."
|
12 |
---
|
13 |
+
# fastspeech2-mf4
|
config.yaml
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
audio_root: ./
|
2 |
+
features:
|
3 |
+
energy_max: 5.850490093231201
|
4 |
+
energy_min: 0.0
|
5 |
+
eps: 1.0e-05
|
6 |
+
f_max: 8000
|
7 |
+
f_min: 0
|
8 |
+
hop_len_t: 0.011609977324263039
|
9 |
+
hop_length: 256
|
10 |
+
n_fft: 1024
|
11 |
+
n_mels: 80
|
12 |
+
n_stft: 513
|
13 |
+
pitch_max: 6.524898211542482
|
14 |
+
pitch_min: 1.0e-08
|
15 |
+
sample_rate: 22050
|
16 |
+
type: spectrogram+melscale+log
|
17 |
+
win_len_t: 0.046439909297052155
|
18 |
+
win_length: 1024
|
19 |
+
window_fn: hann
|
20 |
+
global_cmvn:
|
21 |
+
stats_npz_path: gcmvn_stats.npz
|
22 |
+
sample_rate: 22050
|
23 |
+
transforms:
|
24 |
+
'*':
|
25 |
+
- global_cmvn
|
26 |
+
vocab_filename: vocab.txt
|
27 |
+
speaker_set_filename: speakers.txt
|
28 |
+
vocoder:
|
29 |
+
type: hifigan
|
30 |
+
config: hifigan.json
|
31 |
+
checkpoint: hifigan.bin
|
32 |
+
hub:
|
33 |
+
phonemizer: g2p
|
gcmvn_stats.npz
ADDED
Binary file (1.14 kB). View file
|
|
hifigan.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc43f2a7cff67b91696c67cb577391be2ed64cd9bc10aedcc9e08bb7c0b03e44
|
3 |
+
size 55819885
|
hifigan.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"resblock": "1",
|
3 |
+
"num_gpus": 0,
|
4 |
+
"batch_size": 16,
|
5 |
+
"learning_rate": 0.0002,
|
6 |
+
"adam_b1": 0.8,
|
7 |
+
"adam_b2": 0.99,
|
8 |
+
"lr_decay": 0.999,
|
9 |
+
"seed": 1234,
|
10 |
+
|
11 |
+
"upsample_rates": [8,8,2,2],
|
12 |
+
"upsample_kernel_sizes": [16,16,4,4],
|
13 |
+
"upsample_initial_channel": 512,
|
14 |
+
"resblock_kernel_sizes": [3,7,11],
|
15 |
+
"resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
|
16 |
+
|
17 |
+
"segment_size": 8192,
|
18 |
+
"num_mels": 80,
|
19 |
+
"num_freq": 1025,
|
20 |
+
"n_fft": 1024,
|
21 |
+
"hop_size": 256,
|
22 |
+
"win_size": 1024,
|
23 |
+
|
24 |
+
"sampling_rate": 22050,
|
25 |
+
|
26 |
+
"fmin": 0,
|
27 |
+
"fmax": 8000,
|
28 |
+
"fmax_for_loss": null,
|
29 |
+
|
30 |
+
"num_workers": 4,
|
31 |
+
|
32 |
+
"dist_config": {
|
33 |
+
"dist_backend": "nccl",
|
34 |
+
"dist_url": "tcp://localhost:54321",
|
35 |
+
"world_size": 1
|
36 |
+
}
|
37 |
+
}
|
pytorch_model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fea0fd38660ca493154d720cbf2a111b5a7ba135e59da4c4f5d44841333b604a
|
3 |
+
size 387441213
|
speakers.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Freeman angry
|
2 |
+
Freeman happy
|
3 |
+
Freeman narration
|
4 |
+
Freeman normal
|
vocab.txt
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
N 333705
|
2 |
+
T 328991
|
3 |
+
AH0 293550
|
4 |
+
S 266464
|
5 |
+
R 228299
|
6 |
+
L 206080
|
7 |
+
D 205594
|
8 |
+
IH0 182689
|
9 |
+
K 164159
|
10 |
+
sp 142640
|
11 |
+
M 138027
|
12 |
+
Z 122502
|
13 |
+
IH1 116897
|
14 |
+
AA1 113508
|
15 |
+
TH 113159
|
16 |
+
F 112260
|
17 |
+
EH1 104541
|
18 |
+
P 101009
|
19 |
+
W 93292
|
20 |
+
ER0 89396
|
21 |
+
B 87421
|
22 |
+
IY0 83572
|
23 |
+
AE1 80417
|
24 |
+
HH 77016
|
25 |
+
EY1 76588
|
26 |
+
AO1 75558
|
27 |
+
AY1 70751
|
28 |
+
AH1 63903
|
29 |
+
V 58163
|
30 |
+
UW1 54663
|
31 |
+
IY1 51596
|
32 |
+
NG 49090
|
33 |
+
OW0 47402
|
34 |
+
G 44006
|
35 |
+
Y 41567
|
36 |
+
SH 41278
|
37 |
+
DH 40317
|
38 |
+
OW1 37952
|
39 |
+
AE2 36196
|
40 |
+
ER1 30813
|
41 |
+
JH 29213
|
42 |
+
CH 24021
|
43 |
+
AW1 20297
|
44 |
+
UH1 18645
|
45 |
+
EH2 12445
|
46 |
+
EY2 10405
|
47 |
+
IH2 8282
|
48 |
+
OW2 7128
|
49 |
+
AY2 7050
|
50 |
+
UW0 6859
|
51 |
+
AA2 5578
|
52 |
+
OY1 5382
|
53 |
+
AA0 5067
|
54 |
+
AO2 4570
|
55 |
+
AW2 4526
|
56 |
+
AO0 3906
|
57 |
+
EH0 3684
|
58 |
+
AE0 3635
|
59 |
+
IY2 3150
|
60 |
+
ZH 3126
|
61 |
+
UW2 3107
|
62 |
+
AY0 2345
|
63 |
+
AH2 2238
|
64 |
+
UH2 1020
|
65 |
+
ER2 973
|
66 |
+
EY0 816
|
67 |
+
AW0 558
|
68 |
+
UH0 295
|
69 |
+
OY2 266
|
70 |
+
OY0 84
|