File size: 3,774 Bytes
e1faefa 23ab61f e1faefa 4bad4f3 369b729 445cfb2 4bad4f3 23ab61f 4ed44b9 23ab61f 9c63cd5 23ab61f 2b61326 23ab61f 2b61326 23ab61f 2b61326 23ab61f ce7435b 23ab61f e305a79 23ab61f 2b61326 23ab61f e1faefa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
# Hparams NEEDED
HPARAMS_NEEDED: ["wav2vec_output_dim", "emb_size", "dec_neurons", "dec_layers", "output_neurons", "log_softmax", "tokenizer"]
# Modules Needed
MODULES_NEEDED: ["encoder_w2v2", "embedding", "ctc_lin", "seq_lin"]
# Pretrain folder (HuggingFace)
output_folder: !ref output_folder_seq2seq_cv_podcast_arhiv_augmentation
pretrained_path: Macedonian-ASR/wav2vec2-aed-macedonian-asr
wav2vec2_hub: facebook/wav2vec2-large-xlsr-53
save_folder: !ref <output_folder>/save
wav2vec2_folder: !ref <save_folder>/wav2vec2_checkpoint
####################### Training Parameters ####################################
####################### Model Parameters #######################################
dropout: 0.15
wav2vec_output_dim: 1024
emb_size: 128
dec_neurons: 1024
dec_layers: 1
output_neurons: 1000
blank_index: 0
bos_index: 0
eos_index: 0
unk_index: 0
# Decoding parameters
# Decoding parameters
min_decode_ratio: 0.0
max_decode_ratio: 1.0
valid_beam_size: 10
test_beam_size: 20
using_eos_threshold: True
eos_threshold: 1.5
using_max_attn_shift: True
max_attn_shift: 700
length_normalization: True
temperature: 1.0
# Scoring parameters
coverage_penalty: 1.5
# Wav2vec2 encoder
encoder_w2v2: !new:speechbrain.lobes.models.huggingface_transformers.wav2vec2.Wav2Vec2
source: !ref <wav2vec2_hub>
output_norm: True
freeze: False
freeze_feature_extractor: True
save_path: !ref <wav2vec2_folder>
output_all_hiddens: False
embedding: !new:speechbrain.nnet.embedding.Embedding
num_embeddings: !ref <output_neurons>
embedding_dim: !ref <emb_size>
# Attention-based RNN decoder.
decoder: !new:speechbrain.nnet.RNN.AttentionalRNNDecoder
enc_dim: !ref <wav2vec_output_dim>
input_size: !ref <emb_size>
rnn_type: gru
attn_type: location
hidden_size: !ref <dec_neurons>
attn_dim: 512
num_layers: !ref <dec_layers>
scaling: 1.0
channels: 10
kernel_size: 100
re_init: True
dropout: !ref <dropout>
ctc_lin: !new:speechbrain.nnet.linear.Linear
input_size: !ref <wav2vec_output_dim>
n_neurons: !ref <output_neurons>
seq_lin: !new:speechbrain.nnet.linear.Linear
input_size: !ref <dec_neurons>
n_neurons: !ref <output_neurons>
log_softmax: !new:speechbrain.nnet.activations.Softmax
apply_log: True
tokenizer: !new:sentencepiece.SentencePieceProcessor
model_file: 1000_unigram.model
modules:
encoder_w2v2: !ref <encoder_w2v2>
embedding: !ref <embedding>
decoder: !ref <decoder>
ctc_lin: !ref <ctc_lin>
seq_lin: !ref <seq_lin>
model: !new:torch.nn.ModuleList
- [!ref <encoder_w2v2>, !ref <embedding>, !ref <decoder>, !ref <ctc_lin>, !ref <seq_lin>]
############################## Decoding & optimiser ############################
coverage_scorer: !new:speechbrain.decoders.scorer.CoverageScorer
vocab_size: !ref <output_neurons>
scorer: !new:speechbrain.decoders.scorer.ScorerBuilder
full_scorers: [!ref <coverage_scorer>]
weights:
coverage: !ref <coverage_penalty>
test_search: !new:speechbrain.decoders.S2SRNNBeamSearcher
embedding: !ref <embedding>
decoder: !ref <decoder>
linear: !ref <seq_lin>
bos_index: !ref <bos_index>
eos_index: !ref <eos_index>
min_decode_ratio: !ref <min_decode_ratio>
max_decode_ratio: !ref <max_decode_ratio>
beam_size: !ref <test_beam_size>
eos_threshold: !ref <eos_threshold>
using_max_attn_shift: !ref <using_max_attn_shift>
max_attn_shift: !ref <max_attn_shift>
temperature: !ref <temperature>
scorer: !ref <scorer>
############################## Logging and Pretrainer ##########################
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
loadables:
model: !ref <model>
paths:
model: !ref <pretrained_path>/model.ckpt
|