File size: 6,461 Bytes
0d1350d
ed25c49
0d1350d
 
 
 
 
 
 
 
 
 
ed25c49
 
 
 
 
 
 
 
0d1350d
 
ed25c49
 
0d1350d
 
 
 
 
 
 
 
ed25c49
0d1350d
 
ed25c49
0d1350d
 
 
 
ed25c49
0d1350d
 
 
 
 
 
 
 
ed25c49
0d1350d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed25c49
0d1350d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed25c49
 
0d1350d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed25c49
 
0d1350d
 
 
 
 
 
 
ed25c49
 
0d1350d
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
# Generated 2023-09-08 from:
# /gpfsssd/scratch/rech/nou/uzn19yk/switched_data/semi_supervised_test_tunisian.yaml
# yamllint disable
# ################################
# Model: wav2vec2 + DNN + CTC
# Augmentation: SpecAugment
# Authors: Titouan Parcollet 2021
# ################################

# Seed needs to be set at top of yaml, before objects with parameters are made
seed: 1234
__set_seed: !!python/object/apply:torch.manual_seed [1234]
output_folder: 
  /gpfsstore/rech/nou/uzn19yk/switched_code_tunisian/train/tunisian_asr/results/14epoch_tunisian/<seed>
wer_file: 
  /gpfsstore/rech/nou/uzn19yk/switched_code_tunisian/train/tunisian_asr/results/14epoch_tunisian/1234/wer.txt
save_folder: 
  /gpfsstore/rech/nou/uzn19yk/switched_code_tunisian/train/tunisian_asr/results/14epoch_tunisian/1234/save
train_log: 
  /gpfsstore/rech/nou/uzn19yk/switched_code_tunisian/train/tunisian_asr/results/14epoch_tunisian/1234/train_log.txt

# URL for the biggest LeBenchmark wav2vec french.
wav2vec2_folder: 
  /gpfsstore/rech/nou/uzn19yk/switched_code_tunisian/train/tunisian_asr/results/14epoch_tunisian/1234/save/wav2vec2_checkpoint

# Data files
data_folder: /gpfsscratch/rech/nou/uzn19yk/tunisian_junk  # e.g, /localscratch/cv-corpus-5.1-2020-06-22/fr
train_tsv_file: /gpfsscratch/rech/nou/uzn19yk/tunisian_junk/train.tsv # Standard CommonVoice .tsv files
dev_tsv_file: /gpfsscratch/rech/nou/uzn19yk/tunisian_junk/dev.tsv # Standard CommonVoice .tsv files
test_tsv_file: /gpfsscratch/rech/nou/uzn19yk/tunisian_junk/test.tsv # Standard CommonVoice .tsv files
accented_letters: true
language: fr # use 'it' for Italian, 'rw' for Kinyarwanda, 'en' for english
train_csv: /gpfsscratch/rech/nou/uzn19yk/tunisian_csvs/good_final/train.csv
valid_csv: /gpfsscratch/rech/nou/uzn19yk/tunisian_csvs/good_final/dev.csv
test_csv:
- /gpfsscratch/rech/nou/uzn19yk/tunisian_semi/unlabeled.csv

skip_prep: true # Skip data preparation

use_language_modelling: true
ngram_lm_path: arpas/indomain.arpa

# We remove utterance slonger than 10s in the train/dev/test sets as
# longer sentences certainly correspond to "open microphones".
avoid_if_longer_than: 10.0
avoid_if_shorter_than: 1.2


# Training parameters
number_of_epochs: 14
lr: 1.0
lr_wav2vec: 0.0001
sorting: ascending
auto_mix_prec: false
sample_rate: 16000
ckpt_interval_minutes: 30 # save checkpoint every N min

# With data_parallel batch_size is split into N jobs
# With DDP batch_size is multiplied by N jobs
# Must be 6 per GPU to fit 16GB of VRAM
batch_size: 10
test_batch_size: 4

dataloader_options:
  batch_size: 10
  num_workers: 6
test_dataloader_options:
  batch_size: 4
  num_workers: 6

# BPE parameters
token_type: char  # ["unigram", "bpe", "char"]
character_coverage: 1.0

# Model parameters
# activation: !name:torch.nn.LeakyReLU
wav2vec_output_dim: 1024
dnn_neurons: 1024
freeze_wav2vec: false
freeze_feature_extractor: true
dropout: 0.15
warmup_steps: 500 # The wav2vec 2 model isn't updated for this amount of steps

# Outputs
output_neurons: 40  # BPE size, index(blank/eos/bos) = 0

# Decoding parameters
# Be sure that the bos and eos index match with the BPEs ones
blank_index: 0
unk_index: 1

#
# Functions and classes
#
epoch_counter: &id007 !new:speechbrain.utils.epoch_loop.EpochCounter

  limit: 14

augmentation: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
  sample_rate: 16000
  speeds: [95, 100, 105]

enc: &id002 !new:speechbrain.nnet.containers.Sequential
  input_shape: [null, null, 1024]
  linear1: !name:speechbrain.nnet.linear.Linear
    n_neurons: 1024
    bias: true
  bn1: !name:speechbrain.nnet.normalization.BatchNorm1d
  activation: !new:torch.nn.LeakyReLU
  drop: !new:torch.nn.Dropout
    p: 0.15
  linear2: !name:speechbrain.nnet.linear.Linear
    n_neurons: 1024
    bias: true
  bn2: !name:speechbrain.nnet.normalization.BatchNorm1d
  activation2: !new:torch.nn.LeakyReLU
  drop2: !new:torch.nn.Dropout
    p: 0.15
  linear3: !name:speechbrain.nnet.linear.Linear
    n_neurons: 1024
    bias: true
  bn3: !name:speechbrain.nnet.normalization.BatchNorm1d
  activation3: !new:torch.nn.LeakyReLU

wav2vec2: &id001 !new:speechbrain.lobes.models.huggingface_wav2vec.HuggingFaceWav2Vec2
  source: /gpfsstore/rech/nou/uzn19yk/wavlm/
  output_norm: false
  freeze: false
  freeze_feature_extractor: true
  save_path: 
    /gpfsstore/rech/nou/uzn19yk/switched_code_tunisian/train/tunisian_asr/results/14epoch_tunisian/1234/save/wav2vec2_checkpoint

#####
# Uncomment this block if you prefer to use a Fairseq pretrained model instead
# of a HuggingFace one. Here, we provide an URL that is obtained from the
# Fairseq github for the multilingual XLSR.
#
#wav2vec2_url: https://dl.fbaipublicfiles.com/fairseq/wav2vec/xlsr_53_56k.pt
#wav2vec2: !new:speechbrain.lobes.models.fairseq_wav2vec.FairseqWav2Vec2
#    pretrained_path: !ref <wav2vec2_url>
#    output_norm: True
#    freeze: False
#    save_path: !ref <save_folder>/wav2vec2_checkpoint/model.pt
#####


ctc_lin: &id003 !new:speechbrain.nnet.linear.Linear

  input_size: 1024
  n_neurons: 40

log_softmax: !new:speechbrain.nnet.activations.Softmax
  apply_log: true

ctc_cost: !name:speechbrain.nnet.losses.ctc_loss
  blank_index: 0

modules:
  wav2vec2: *id001
  enc: *id002
  ctc_lin: *id003
model: &id004 !new:torch.nn.ModuleList
- [*id002, *id003]
model_opt_class: !name:torch.optim.Adadelta
  lr: 1.0
  rho: 0.95
  eps: 1.e-8

wav2vec_opt_class: !name:torch.optim.Adam
  lr: 0.0001

lr_annealing_model: &id005 !new:speechbrain.nnet.schedulers.NewBobScheduler
  initial_value: 1.0
  improvement_threshold: 0.0025
  annealing_factor: 0.8
  patient: 0

lr_annealing_wav2vec: &id006 !new:speechbrain.nnet.schedulers.NewBobScheduler
  initial_value: 0.0001
  improvement_threshold: 0.0025
  annealing_factor: 0.9
  patient: 0

checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
  checkpoints_dir: 
    /gpfsstore/rech/nou/uzn19yk/switched_code_tunisian/train/tunisian_asr/results/14epoch_tunisian/1234/save
  recoverables:
    wav2vec2: *id001
    model: *id004
    scheduler_model: *id005
    scheduler_wav2vec: *id006
    counter: *id007
train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
  save_file: 
    /gpfsstore/rech/nou/uzn19yk/switched_code_tunisian/train/tunisian_asr/results/14epoch_tunisian/1234/train_log.txt

error_rate_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats

cer_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
  split_tokens: true