mrfakename commited on
Commit
9640c47
1 Parent(s): 9df0fee

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Vocoder/LibriTTS/g_00935000 filter=lfs diff=lfs merge=lfs -text
Models/VCTK/config.yml ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ log_dir: "Models/VCTK"
2
+ first_stage_path: "first_stage.pth"
3
+ save_freq: 2
4
+ log_interval: 10
5
+ device: "cuda"
6
+ multigpu: false
7
+ epochs_1st: 150 # number of epochs for first stage training
8
+ epochs_2nd: 100 # number of peochs for second stage training
9
+ batch_size: 32
10
+ pretrained_model: ""
11
+ second_stage_load_pretrained: false # set to true if the pre-trained model is for 2nd stage
12
+ load_only_params: false # set to true if do not want to load epoch numbers and optimizer parameters
13
+
14
+ train_data: "Data/train_list.txt"
15
+ val_data: "Data/val_list.txt"
16
+
17
+ F0_path: "Utils/JDC/bst.t7"
18
+ ASR_config: "Utils/ASR/config.yml"
19
+ ASR_path: "Utils/ASR/epoch_00080.pth"
20
+
21
+ preprocess_params:
22
+ sr: 24000
23
+ spect_params:
24
+ n_fft: 2048
25
+ win_length: 1200
26
+ hop_length: 300
27
+
28
+ model_params:
29
+ hidden_dim: 512
30
+ n_token: 178
31
+ style_dim: 128
32
+ n_layer: 3
33
+ dim_in: 64
34
+ max_conv_dim: 512
35
+ n_mels: 80
36
+ dropout: 0.2
37
+ n_domain: 108
38
+
39
+ loss_params:
40
+ lambda_mel: 5. # mel reconstruction loss (1st & 2nd stage)
41
+ lambda_adv: 1. # adversarial loss (1st & 2nd stage)
42
+ lambda_reg: 1. # adversarial regularization loss (1st & 2nd stage)
43
+ lambda_fm: 0.1 # feature matching loss (1st & 2nd stage)
44
+
45
+ lambda_mono: 1. # monotonic alignment loss (1st stage, TMA)
46
+ lambda_s2s: 1. # sequence-to-sequence loss (1st stage, TMA)
47
+ lambda_sty: 1. # style reconstruction loss (1st stage)
48
+
49
+ TMA_epoch: 10 # TMA starting epoch (1st stage)
50
+ VC_epoch: -5 # VC starting epoch (1st stage), only fine-tune for VC for 5 epochs
51
+ TMA_CEloss: false # see https://github.com/yl4579/StyleTTS/issues/7
52
+
53
+ lambda_feat: 1. # feature reconstruction loss (1st stage)
54
+ lambda_pim: 1. # phoneme information maximazation loss (2nd stage)
55
+ lambda_cyc: 1. # cycle consistency loss (2nd stage)
56
+
57
+
58
+ optimizer_params:
59
+ lr: 0.0001
Models/VCTK/epoch_2nd_00100.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:841dd6a36943d7bc6c127b9f1857db920eef59da5658108cb5751dac94143457
3
+ size 335469173
Vocoder/LibriTTS/config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "resblock": "1",
3
+ "num_gpus": 0,
4
+ "batch_size": 16,
5
+ "learning_rate": 0.0002,
6
+ "adam_b1": 0.8,
7
+ "adam_b2": 0.99,
8
+ "lr_decay": 0.999,
9
+ "seed": 1234,
10
+ "freeze_level": 2,
11
+
12
+ "upsample_rates": [10,5,3,2],
13
+ "upsample_kernel_sizes": [20,10,6,4],
14
+ "upsample_initial_channel": 512,
15
+ "resblock_kernel_sizes": [3,7,11],
16
+ "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
17
+
18
+ "segment_size": 57600,
19
+ "num_mels": 80,
20
+ "num_freq": 1025,
21
+ "n_fft": 2048,
22
+ "hop_size": 300,
23
+ "win_size": 1200,
24
+
25
+ "sampling_rate": 24000,
26
+
27
+ "fmin": 0,
28
+ "fmax": 8000,
29
+ "fmax_for_loss": null,
30
+
31
+ "num_workers": 4,
32
+
33
+ "dist_config": {
34
+ "dist_backend": "nccl",
35
+ "dist_url": "tcp://localhost:54321",
36
+ "world_size": 1
37
+ }
38
+ }
Vocoder/LibriTTS/g_00935000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:349789d9c2ed411b564223093d3fe54de09be0ded8d19a16e011e828d547c4a3
3
+ size 57205417