novateur commited on
Commit
3e73800
·
verified ·
1 Parent(s): 3188c79

Upload jsp_wavtokenizer_dit_stage2_vocoder_noreduce_center_mel100_node3.yaml

Browse files
jsp_wavtokenizer_dit_stage2_vocoder_noreduce_center_mel100_node3.yaml ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed_everything: 3407
2
+
3
+ data:
4
+ class_path: decoder.dataset.VocosDataModule
5
+ init_args:
6
+ train_params:
7
+ filelist_path: /cpfs01/shared/jishengpeng/Data/DataPath/libritts_train_dev_test
8
+ sampling_rate: 24000
9
+ num_samples: 144000
10
+ batch_size: 22 # 20
11
+ num_workers: 8
12
+ n_mels: 100
13
+
14
+ val_params:
15
+ filelist_path: /cpfs01/shared/jishengpeng/Data/DataPath/libritts_testclean
16
+ sampling_rate: 24000
17
+ num_samples: 144000
18
+ batch_size: 2 # 10
19
+ num_workers: 8
20
+ n_mels: 100
21
+
22
+ model:
23
+ class_path: decoder.experiment.WavTokenizer
24
+ init_args:
25
+ sample_rate: 24000
26
+ train_vocoder: true
27
+ reduce_length: false
28
+ initial_learning_rate: 2e-4
29
+ mel_loss_coeff: 360
30
+ mrd_loss_coeff: 1.0
31
+ num_warmup_steps: 0 # Optimizers warmup steps
32
+ pretrain_mel_steps: 0 # 0 means GAN objective from the first iteration
33
+
34
+ # automatic evaluation
35
+ evaluate_utmos: true
36
+ evaluate_pesq: true
37
+ evaluate_periodicty: true
38
+
39
+ resume: false
40
+ resume_config: ./WavTokenizer/configs/wavtokenizer_smalldata_frame75_3s_nq1_code16384_dim512_kmeans800_attn.yaml
41
+ resume_model: /home/xj_data/jishengpeng/Qwen_Codec/WavTokenizer_fm_dit/result/train/dit_stage2_trainvocoder_noreduce_center/lightning_logs/version_4/checkpoints/wavtokenizer_checkpoint_epoch=1_step=96596_val_loss=0.1223.ckpt
42
+
43
+ feature_extractor:
44
+ class_path: decoder.feature_extractors.EncodecFeatures
45
+ init_args:
46
+ encodec_model: encodec_24khz
47
+ bandwidths: [6.6, 6.6, 6.6, 6.6]
48
+ train_codebooks: true
49
+ num_quantizers: 1
50
+ dowmsamples: [8, 5, 4, 2]
51
+ vq_bins: 4096
52
+ vq_kmeans: 200
53
+
54
+ backbone:
55
+ class_path: decoder.models.WavTokenizerDiT
56
+ init_args:
57
+ train_vocoder: true
58
+ reduce_length: false
59
+ input_channels: 100
60
+ dim: 768
61
+ intermediate_dim: 1536
62
+ num_layers: 12
63
+ adanorm_num_embeddings: 4
64
+
65
+ head:
66
+ class_path: decoder.heads.ISTFTHead
67
+ init_args:
68
+ dim: 768
69
+ n_fft: 1280
70
+ hop_length: 320
71
+ padding: center
72
+
73
+ trainer:
74
+ logger:
75
+ class_path: pytorch_lightning.loggers.TensorBoardLogger
76
+ init_args:
77
+ save_dir: /home/xj_data/jishengpeng/Qwen_Codec/WavTokenizerDiT/result/train/dit_stage2_trainvocoder_noreduce_center_mel100_node3/
78
+ # save_dir: /home/xj_data/jishengpeng/Qwen_Codec/WavTokenizer_fm_gtmel/result/train/wavtokenizer_smalldata_flowing_matching_frame75_3s_nq1_code4096_dim512_kmeans200_attn/
79
+ callbacks:
80
+ - class_path: pytorch_lightning.callbacks.LearningRateMonitor
81
+ - class_path: pytorch_lightning.callbacks.ModelSummary
82
+ init_args:
83
+ max_depth: 2
84
+ - class_path: pytorch_lightning.callbacks.ModelCheckpoint
85
+ init_args:
86
+ monitor: val_loss
87
+ filename: wavtokenizer_checkpoint_{epoch}_{step}_{val_loss:.4f}
88
+ save_top_k: 6
89
+ save_last: true
90
+ - class_path: decoder.helpers.GradNormCallback
91
+
92
+ # Lightning calculates max_steps across all optimizer steps (rather than number of batches)
93
+ # This equals to 1M steps per generator and 1M per discriminator
94
+ max_steps: 1000000
95
+ # You might want to limit val batches when evaluating all the metrics, as they are time-consuming
96
+ limit_val_batches: 100
97
+ accelerator: gpu
98
+ strategy: ddp
99
+ # devices: [0]
100
+ devices: [0,1,2,3,4,5,6,7]
101
+ num_nodes: 3
102
+ log_every_n_steps: 1000