# ################################ | |
# Model: Whisper (Encoder-Decoder) + NLL | |
# Augmentation: TimeDomainSpecAugment | |
# Authors: Pooneh Mousavi 2022 | |
# ################################ | |
# URL for the biggest Fairseq english whisper model. | |
whisper_hub: openai/whisper-large-v2 | |
# Normalize inputs with | |
# the same normalization done in the paper. Refer to Appendix C for further information. | |
normalized_transcripts: True | |
language: mongolian | |
auto_mix_prec: False | |
sample_rate: 16000 | |
# These values are only used for the searchers. | |
# They needs to be hardcoded and should not be changed with Whisper. | |
# They are used as part of the searching process. | |
# The bos token of the searcher will be timestamp_index | |
# and will be concatenated with the bos, language and task tokens. | |
timestamp_index: 50363 | |
eos_index: 50257 | |
bos_index: 50258 | |
# Decoding parameters | |
min_decode_ratio: 0.0 | |
max_decode_ratio: 0.1 | |
test_beam_size: 8 | |
# Model parameters | |
freeze_whisper: True | |
freeze_encoder: True | |
whisper: !new:speechbrain.lobes.models.huggingface_transformers.whisper.Whisper | |
source: !ref <whisper_hub> | |
freeze: !ref <freeze_whisper> | |
freeze_encoder: !ref <freeze_encoder> | |
save_path: whisper_checkpoints | |
encoder_only: False | |
decoder: !new:speechbrain.decoders.seq2seq.S2SWhisperGreedySearch | |
model: !ref <whisper> | |
bos_index: !ref <timestamp_index> | |
eos_index: !ref <eos_index> | |
min_decode_ratio: !ref <min_decode_ratio> | |
max_decode_ratio: !ref <max_decode_ratio> | |
# test_beam_searcher: !new:speechbrain.decoders.seq2seq.S2SWhisperBeamSearch | |
# module: [!ref <whisper>] | |
# bos_index: !ref <timestamp_index> | |
# eos_index: !ref <eos_index> | |
# min_decode_ratio: !ref <min_decode_ratio> | |
# max_decode_ratio: !ref <max_decode_ratio> | |
# beam_size: !ref <test_beam_size> | |
modules: | |
whisper: !ref <whisper> | |
decoder: !ref <decoder> | |
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer | |
loadables: | |
whisper: !ref <whisper> | |