|
DATA_ROOT=/home/wangrui/projects/SpeechT5/manifest |
|
SAVE_DIR=/home/wangrui/projects/SpeechT5/experimental/s2c |
|
TRAIN_SET=train |
|
VALID_SET=valid |
|
|
|
USER_DIR=/home/wangrui/projects/SpeechT5/SpeechT5/fairseq/examples/speecht5 |
|
PT_CHECKPOINT_PATH=/nfs-data/user1/PhDHub/ckpt/speecht5_base.pt |
|
|
|
mkdir -p ${SAVE_DIR} |
|
|
|
|
|
fairseq-train ${DATA_ROOT} \ |
|
--save-dir ${SAVE_DIR} \ |
|
--tensorboard-logdir ${SAVE_DIR} \ |
|
--train-subset ${TRAIN_SET} \ |
|
--valid-subset ${VALID_SET} \ |
|
--user-dir ${USER_DIR} \ |
|
--distributed-world-size 4 \ |
|
--distributed-port 0 \ |
|
--ddp-backend legacy_ddp \ |
|
--log-format json \ |
|
--seed 1 \ |
|
--fp16 \ |
|
\ |
|
--task speecht5 \ |
|
--t5-task s2c \ |
|
--sample-rate 16000 \ |
|
--num-workers 4 \ |
|
--batch-size 8 \ |
|
--update-freq 2 \ |
|
--data-buffer-size 0 \ |
|
\ |
|
--criterion speecht5 \ |
|
--report-accuracy \ |
|
--best-checkpoint-metric "s2c_accuracy" \ |
|
--maximize-best-checkpoint-metric \ |
|
\ |
|
--optimizer adam \ |
|
--dropout 0.1 \ |
|
--activation-dropout 0.1 \ |
|
--attention-dropout 0.1 \ |
|
--encoder-layerdrop 0.05 \ |
|
--lr-scheduler triangular \ |
|
--max-lr 2e-4 \ |
|
--lr-period-updates 60000 \ |
|
--lr-shrink 0.5 \ |
|
--lr 1e-8 \ |
|
--feature-grad-mult 1.0 \ |
|
--weight-decay 0.1 \ |
|
\ |
|
--max-update 60000 \ |
|
--max-text-positions 600 \ |
|
--max-speech-positions 8000 \ |
|
--required-batch-size-multiple 1 \ |
|
--skip-invalid-size-inputs-valid-test \ |
|
--save-interval-updates 10000 \ |
|
--validate-after-updates 20000 \ |
|
--no-epoch-checkpoints \ |
|
--log-interval 10 \ |
|
\ |
|
--arch t5_transformer_base_asr \ |
|
--share-input-output-embed \ |
|
--find-unused-parameters \ |
|
--bert-init \ |
|
--relative-position-embedding \ |
|
--mask-prob 0.0 \ |
|
--mask-channel-prob 0.0 \ |
|
--sid-no-pooling-bn \ |
|
--sid-no-embed-postnet \ |
|
\ |
|
--finetune-from-model ${PT_CHECKPOINT_PATH} >> ${SAVE_DIR}/train.log |
|
|
|
echo "SID finetuning finished" |
|
|