File size: 6,461 Bytes
0d1350d ed25c49 0d1350d ed25c49 0d1350d ed25c49 0d1350d ed25c49 0d1350d ed25c49 0d1350d ed25c49 0d1350d ed25c49 0d1350d ed25c49 0d1350d ed25c49 0d1350d ed25c49 0d1350d ed25c49 0d1350d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 |
# Generated 2023-09-08 from:
# /gpfsssd/scratch/rech/nou/uzn19yk/switched_data/semi_supervised_test_tunisian.yaml
# yamllint disable
# ################################
# Model: wav2vec2 + DNN + CTC
# Augmentation: SpecAugment
# Authors: Titouan Parcollet 2021
# ################################
# Seed needs to be set at top of yaml, before objects with parameters are made
seed: 1234
__set_seed: !!python/object/apply:torch.manual_seed [1234]
output_folder:
/gpfsstore/rech/nou/uzn19yk/switched_code_tunisian/train/tunisian_asr/results/14epoch_tunisian/<seed>
wer_file:
/gpfsstore/rech/nou/uzn19yk/switched_code_tunisian/train/tunisian_asr/results/14epoch_tunisian/1234/wer.txt
save_folder:
/gpfsstore/rech/nou/uzn19yk/switched_code_tunisian/train/tunisian_asr/results/14epoch_tunisian/1234/save
train_log:
/gpfsstore/rech/nou/uzn19yk/switched_code_tunisian/train/tunisian_asr/results/14epoch_tunisian/1234/train_log.txt
# URL for the biggest LeBenchmark wav2vec french.
wav2vec2_folder:
/gpfsstore/rech/nou/uzn19yk/switched_code_tunisian/train/tunisian_asr/results/14epoch_tunisian/1234/save/wav2vec2_checkpoint
# Data files
data_folder: /gpfsscratch/rech/nou/uzn19yk/tunisian_junk # e.g, /localscratch/cv-corpus-5.1-2020-06-22/fr
train_tsv_file: /gpfsscratch/rech/nou/uzn19yk/tunisian_junk/train.tsv # Standard CommonVoice .tsv files
dev_tsv_file: /gpfsscratch/rech/nou/uzn19yk/tunisian_junk/dev.tsv # Standard CommonVoice .tsv files
test_tsv_file: /gpfsscratch/rech/nou/uzn19yk/tunisian_junk/test.tsv # Standard CommonVoice .tsv files
accented_letters: true
language: fr # use 'it' for Italian, 'rw' for Kinyarwanda, 'en' for english
train_csv: /gpfsscratch/rech/nou/uzn19yk/tunisian_csvs/good_final/train.csv
valid_csv: /gpfsscratch/rech/nou/uzn19yk/tunisian_csvs/good_final/dev.csv
test_csv:
- /gpfsscratch/rech/nou/uzn19yk/tunisian_semi/unlabeled.csv
skip_prep: true # Skip data preparation
use_language_modelling: true
ngram_lm_path: arpas/indomain.arpa
# We remove utterance slonger than 10s in the train/dev/test sets as
# longer sentences certainly correspond to "open microphones".
avoid_if_longer_than: 10.0
avoid_if_shorter_than: 1.2
# Training parameters
number_of_epochs: 14
lr: 1.0
lr_wav2vec: 0.0001
sorting: ascending
auto_mix_prec: false
sample_rate: 16000
ckpt_interval_minutes: 30 # save checkpoint every N min
# With data_parallel batch_size is split into N jobs
# With DDP batch_size is multiplied by N jobs
# Must be 6 per GPU to fit 16GB of VRAM
batch_size: 10
test_batch_size: 4
dataloader_options:
batch_size: 10
num_workers: 6
test_dataloader_options:
batch_size: 4
num_workers: 6
# BPE parameters
token_type: char # ["unigram", "bpe", "char"]
character_coverage: 1.0
# Model parameters
# activation: !name:torch.nn.LeakyReLU
wav2vec_output_dim: 1024
dnn_neurons: 1024
freeze_wav2vec: false
freeze_feature_extractor: true
dropout: 0.15
warmup_steps: 500 # The wav2vec 2 model isn't updated for this amount of steps
# Outputs
output_neurons: 40 # BPE size, index(blank/eos/bos) = 0
# Decoding parameters
# Be sure that the bos and eos index match with the BPEs ones
blank_index: 0
unk_index: 1
#
# Functions and classes
#
epoch_counter: &id007 !new:speechbrain.utils.epoch_loop.EpochCounter
limit: 14
augmentation: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
sample_rate: 16000
speeds: [95, 100, 105]
enc: &id002 !new:speechbrain.nnet.containers.Sequential
input_shape: [null, null, 1024]
linear1: !name:speechbrain.nnet.linear.Linear
n_neurons: 1024
bias: true
bn1: !name:speechbrain.nnet.normalization.BatchNorm1d
activation: !new:torch.nn.LeakyReLU
drop: !new:torch.nn.Dropout
p: 0.15
linear2: !name:speechbrain.nnet.linear.Linear
n_neurons: 1024
bias: true
bn2: !name:speechbrain.nnet.normalization.BatchNorm1d
activation2: !new:torch.nn.LeakyReLU
drop2: !new:torch.nn.Dropout
p: 0.15
linear3: !name:speechbrain.nnet.linear.Linear
n_neurons: 1024
bias: true
bn3: !name:speechbrain.nnet.normalization.BatchNorm1d
activation3: !new:torch.nn.LeakyReLU
wav2vec2: &id001 !new:speechbrain.lobes.models.huggingface_wav2vec.HuggingFaceWav2Vec2
source: /gpfsstore/rech/nou/uzn19yk/wavlm/
output_norm: false
freeze: false
freeze_feature_extractor: true
save_path:
/gpfsstore/rech/nou/uzn19yk/switched_code_tunisian/train/tunisian_asr/results/14epoch_tunisian/1234/save/wav2vec2_checkpoint
#####
# Uncomment this block if you prefer to use a Fairseq pretrained model instead
# of a HuggingFace one. Here, we provide an URL that is obtained from the
# Fairseq github for the multilingual XLSR.
#
#wav2vec2_url: https://dl.fbaipublicfiles.com/fairseq/wav2vec/xlsr_53_56k.pt
#wav2vec2: !new:speechbrain.lobes.models.fairseq_wav2vec.FairseqWav2Vec2
# pretrained_path: !ref <wav2vec2_url>
# output_norm: True
# freeze: False
# save_path: !ref <save_folder>/wav2vec2_checkpoint/model.pt
#####
ctc_lin: &id003 !new:speechbrain.nnet.linear.Linear
input_size: 1024
n_neurons: 40
log_softmax: !new:speechbrain.nnet.activations.Softmax
apply_log: true
ctc_cost: !name:speechbrain.nnet.losses.ctc_loss
blank_index: 0
modules:
wav2vec2: *id001
enc: *id002
ctc_lin: *id003
model: &id004 !new:torch.nn.ModuleList
- [*id002, *id003]
model_opt_class: !name:torch.optim.Adadelta
lr: 1.0
rho: 0.95
eps: 1.e-8
wav2vec_opt_class: !name:torch.optim.Adam
lr: 0.0001
lr_annealing_model: &id005 !new:speechbrain.nnet.schedulers.NewBobScheduler
initial_value: 1.0
improvement_threshold: 0.0025
annealing_factor: 0.8
patient: 0
lr_annealing_wav2vec: &id006 !new:speechbrain.nnet.schedulers.NewBobScheduler
initial_value: 0.0001
improvement_threshold: 0.0025
annealing_factor: 0.9
patient: 0
checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
checkpoints_dir:
/gpfsstore/rech/nou/uzn19yk/switched_code_tunisian/train/tunisian_asr/results/14epoch_tunisian/1234/save
recoverables:
wav2vec2: *id001
model: *id004
scheduler_model: *id005
scheduler_wav2vec: *id006
counter: *id007
train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
save_file:
/gpfsstore/rech/nou/uzn19yk/switched_code_tunisian/train/tunisian_asr/results/14epoch_tunisian/1234/train_log.txt
error_rate_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
cer_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
split_tokens: true
|