nguyenvulebinh commited on
Commit
bbb9732
1 Parent(s): 8f926ab

add model voice-filter

Browse files
Files changed (3) hide show
  1. config.json +128 -0
  2. pytorch_model.bin +3 -0
  3. xvector_sincnet.pt +3 -0
config.json ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "./model-bin/voice_enhancing",
3
+ "architectures": [
4
+ "ASRVoiceFilter"
5
+ ],
6
+ "audio_max_lengh": 20,
7
+ "blank_token_id": 30,
8
+ "ctc_zero_infinity": true,
9
+ "d_dec": 640,
10
+ "d_enc": 256,
11
+ "d_inner": 0,
12
+ "dec_drop": 0.2,
13
+ "do_asr": false,
14
+ "do_enh": true,
15
+ "dropout": 0.2,
16
+ "emb_drop": 0.2,
17
+ "enc_drop": 0.2,
18
+ "enh_args": {
19
+ "batch_size": 8,
20
+ "batch_type": "folded",
21
+ "best_model_criterion": [
22
+ [
23
+ "valid",
24
+ "si_snr",
25
+ "max"
26
+ ],
27
+ [
28
+ "valid",
29
+ "loss",
30
+ "min"
31
+ ]
32
+ ],
33
+ "criterions": [
34
+ {
35
+ "conf": {
36
+ "eps": 1e-07
37
+ },
38
+ "name": "si_snr",
39
+ "wrapper": "pit",
40
+ "wrapper_conf": {
41
+ "independent_perm": false,
42
+ "weight": 1.0
43
+ }
44
+ }
45
+ ],
46
+ "decoder": "stft",
47
+ "decoder_conf": {
48
+ "hop_length": 128,
49
+ "n_fft": 512
50
+ },
51
+ "encoder": "stft",
52
+ "encoder_conf": {
53
+ "hop_length": 128,
54
+ "n_fft": 512
55
+ },
56
+ "init": "xavier_uniform",
57
+ "keep_nbest_models": 1,
58
+ "max_epoch": 5,
59
+ "model_conf": {
60
+ "loss_type": "mask_mse",
61
+ "mask_type": "psm"
62
+ },
63
+ "num_workers": 4,
64
+ "optim": "adam",
65
+ "optim_conf": {
66
+ "eps": 1e-08,
67
+ "lr": 0.001,
68
+ "weight_decay": 1e-07
69
+ },
70
+ "patience": 10,
71
+ "scheduler": "reducelronplateau",
72
+ "scheduler_conf": {
73
+ "factor": 0.7,
74
+ "mode": "min",
75
+ "patience": 1
76
+ },
77
+ "separator": "conformer_voice_filter",
78
+ "separator_conf": {
79
+ "adim": 1024,
80
+ "aheads": 8,
81
+ "attention_dropout_rate": 0.1,
82
+ "concat_after": false,
83
+ "conformer_activation_type": "swish",
84
+ "conformer_enc_kernel_size": 5,
85
+ "conformer_pos_enc_layer_type": "rel_pos",
86
+ "conformer_self_attn_layer_type": "rel_selfattn",
87
+ "dropout_rate": 0.1,
88
+ "input_layer": "linear",
89
+ "layers": 4,
90
+ "linear_units": 896,
91
+ "nonlinear": "relu",
92
+ "normalize_before": false,
93
+ "num_spk": 1,
94
+ "positional_dropout_rate": 0.1,
95
+ "positionwise_conv_kernel_size": 1,
96
+ "positionwise_layer_type": "conv1d",
97
+ "use_cnn_in_conformer": true,
98
+ "use_macaron_style_in_conformer": true
99
+ },
100
+ "val_scheduler_criterion": [
101
+ "valid",
102
+ "loss"
103
+ ],
104
+ "xvector_emb_dim": 512
105
+ },
106
+ "enh_chunk_size": 5,
107
+ "freq_kn": 3,
108
+ "freq_std": 2,
109
+ "ignore_token_id": -1,
110
+ "label_smooth": 0.1,
111
+ "loss_reduction": "mean",
112
+ "model_type": "asr_voicefilter",
113
+ "n_classes_ctc": 33,
114
+ "n_classes_s2s": 5003,
115
+ "n_dec": 1,
116
+ "n_enc": 16,
117
+ "n_fft": 512,
118
+ "n_head": 4,
119
+ "n_kernel": 25,
120
+ "n_mels": 80,
121
+ "sample_rate": 16000,
122
+ "shared_emb": true,
123
+ "teacher_force": 0.7,
124
+ "time_ds": 1,
125
+ "torch_dtype": "float32",
126
+ "transformers_version": "4.24.0",
127
+ "use_cnn": true
128
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abc7ebeb21a9d3cf7addcca0a40df72411195fc5f1a5d3b0c937ded9798d1baa
3
+ size 197745909
xvector_sincnet.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bcec986de13da7af7ac88736572692359950df63669989c4f78b294934c9089
3
+ size 96383626