diff --git "a/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.6.log" "b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.6.log"
new file mode 100644--- /dev/null
+++ "b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.6.log"
@@ -0,0 +1,5571 @@
+# Running on gpua003.delta.ncsa.illinois.edu
+# Started at Wed Jul 5 22:37:23 CDT 2023
+# SLURMD_NODENAME=gpua003
+# SLURM_CLUSTER_NAME=delta
+# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf
+# SLURM_CPUS_ON_NODE=64
+# SLURM_CPUS_PER_TASK=64
+# SLURM_EXPORT_ENV=PATH
+# SLURM_GET_USER_ENV=1
+# SLURM_GPUS_ON_NODE=4
+# SLURM_GTIDS=0
+# SLURM_JOBID=2132611
+# SLURM_JOB_ACCOUNT=bbjs-delta-gpu
+# SLURM_JOB_CPUS_PER_NODE='64(x16)'
+# SLURM_JOB_GID=202
+# SLURM_JOB_GPUS=0,1,2,3
+# SLURM_JOB_ID=2132611
+# SLURM_JOB_NAME=exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.log
+# SLURM_JOB_NODELIST='gpua[003,005,010,025,028-029,031,035,053,055,057,060,074,087,090,098]'
+# SLURM_JOB_NUM_NODES=16
+# SLURM_JOB_PARTITION=gpuA100x4
+# SLURM_JOB_QOS=bbjs-delta-gpu
+# SLURM_JOB_UID=68077
+# SLURM_JOB_USER=peng6
+# SLURM_LOCALID=0
+# SLURM_MEM_PER_NODE=240000
+# SLURM_NNODES=16
+# SLURM_NODEID=0
+# SLURM_NODELIST='gpua[003,005,010,025,028-029,031,035,053,055,057,060,074,087,090,098]'
+# SLURM_NODE_ALIASES='(null)'
+# SLURM_OPEN_MODE=a
+# SLURM_PRIO_PROCESS=0
+# SLURM_PROCID=0
+# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1
+# SLURM_SUBMIT_HOST=dt-login02.delta.internal.ncsa.edu
+# SLURM_TASKS_PER_NODE='1(x16)'
+# SLURM_TASK_PID=350544
+# SLURM_TOPOLOGY_ADDR=ss00.ss05.gpua003
+# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node
+# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9728:109
+# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0
+[gpua003:0/64] 2023-07-05 22:40:37,448 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0
+[gpua003:0/64] 2023-07-05 22:40:38,431 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 64 nodes.
+[gpua003:0/64] 2023-07-05 22:40:38,458 (s2t:483) INFO: Vocabulary size: 50002
+[gpua003:0/64] 2023-07-05 22:40:52,612 (abs_task:1201) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True
+[gpua003:0/64] 2023-07-05 22:40:52,621 (abs_task:1202) INFO: Model structure:
+ESPnetS2TModel(
+  (frontend): DefaultFrontend(
+    (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True)
+    (frontend): Frontend()
+    (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False)
+  )
+  (specaug): SpecAug(
+    (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq)
+    (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time)
+  )
+  (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True)
+  (encoder): TransformerEncoder(
+    (embed): Conv2dSubsampling(
+      (conv): Sequential(
+        (0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2))
+        (1): ReLU()
+        (2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2))
+        (3): ReLU()
+      )
+      (out): Sequential(
+        (0): Linear(in_features=19456, out_features=1024, bias=True)
+        (1): PositionalEncoding(
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+      )
+    )
+    (encoders): MultiSequential(
+      (0): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (1): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (2): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (3): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (4): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (5): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (6): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (7): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (8): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (9): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (10): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (11): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (12): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (13): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (14): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (15): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (16): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (17): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (18): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (19): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (20): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (21): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (22): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (23): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+    (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+  )
+  (decoder): TransformerDecoder(
+    (embed): Sequential(
+      (0): Embedding(50002, 1024)
+      (1): PositionalEncoding(
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+    (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+    (output_layer): Linear(in_features=1024, out_features=50002, bias=True)
+    (decoders): MultiSequential(
+      (0): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (1): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (2): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (3): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (4): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (5): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (6): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (7): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (8): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (9): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (10): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (11): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (12): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (13): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (14): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (15): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (16): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (17): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (18): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (19): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (20): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (21): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (22): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (23): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+  )
+  (criterion_att): LabelSmoothingLoss(
+    (criterion): KLDivLoss()
+  )
+  (ctc): CTC(
+    (ctc_lo): Linear(in_features=1024, out_features=50002, bias=True)
+    (ctc_loss): CTCLoss()
+  )
+)
+
+Model summary:
+    Class Name: ESPnetS2TModel
+    Total Number of model parameters: 888.51 M
+    Number of trainable parameters: 888.51 M (100.0%)
+    Size: 3.55 GB
+    Type: torch.float32
+[gpua003:0/64] 2023-07-05 22:40:52,621 (abs_task:1205) INFO: Optimizer:
+AdamW (
+Parameter Group 0
+    amsgrad: False
+    betas: [0.9, 0.98]
+    capturable: False
+    eps: 1e-06
+    foreach: None
+    initial_lr: 0.00025
+    lr: 2.5e-08
+    maximize: False
+    weight_decay: 0.0
+)
+[gpua003:0/64] 2023-07-05 22:40:52,621 (abs_task:1206) INFO: Scheduler: WarmupLR(warmup_steps=10000)
+[gpua003:0/64] 2023-07-05 22:40:52,630 (abs_task:1215) INFO: Saving the configuration in exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/config.yaml
+[gpua003:0/64] 2023-07-05 22:40:53,329 (abs_task:1272) INFO: Loading pretrained params from /scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v2/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e18_d18_lr5e-4_warmup20k_raw_bpe50000/valid.acc.ave.pth
+[gpua003:0/64] 2023-07-05 22:41:01,373 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-05 22:41:01,580 (abs_task:1570) INFO: [valid] dataset:
+ESPnetDataset(
+  speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"}
+  text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"}
+  text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"}
+  text: {"path": "dump/raw/dev/text", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9792293eb0>)
+[gpua003:0/64] 2023-07-05 22:41:01,580 (abs_task:1571) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=1012, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, 
+[gpua003:0/64] 2023-07-05 22:41:01,582 (abs_task:1572) INFO: [valid] mini-batch sizes summary: N-batch=1012, mean=128.1, min=128, max=129
+[gpua003:0/64] 2023-07-05 22:41:02,091 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-05 22:41:02,408 (abs_task:1570) INFO: [plot_att] dataset:
+ESPnetDataset(
+  speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"}
+  text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"}
+  text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"}
+  text: {"path": "dump/raw/dev/text", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9792293b50>)
+[gpua003:0/64] 2023-07-05 22:41:02,409 (abs_task:1571) INFO: [plot_att] Batch sampler: UnsortedBatchSampler(N-batch=129591, batch_size=1, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, 
+[gpua003:0/64] 2023-07-05 22:41:02,409 (abs_task:1572) INFO: [plot_att] mini-batch sizes summary: N-batch=3, mean=1.0, min=1, max=1
+[gpua003:0/64] 2023-07-05 22:41:33,411 (trainer:159) INFO: The training was resumed using exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/checkpoint.pth
+gpua003:350633:350633 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.3<0>
+gpua003:350633:350633 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua003:350633:350633 [0] NCCL INFO cudaDriverVersion 12010
+NCCL version 2.14.3+cuda11.7
+[gpua003:0/64] 2023-07-05 22:41:38,247 (trainer:284) INFO: 14/100epoch started
+[gpua003:0/64] 2023-07-05 22:41:38,292 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua003:0/64] 2023-07-05 22:41:57,218 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-05 22:42:00,682 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f95d55c5450>)
+[gpua003:0/64] 2023-07-05 22:42:00,683 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpua003:0/64] 2023-07-05 22:42:00,689 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+gpua031:1680700:1680700 [0] NCCL INFO cudaDriverVersion 12010
+gpua031:1680700:1680700 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.31<0>
+gpua031:1680700:1680700 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua031:1680700:1680773 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.31<0>
+gpua031:1680700:1680773 [0] NCCL INFO Using network IB
+gpua031:1680700:1680773 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua031:1680700:1680773 [0] NCCL INFO Trees [0] 25/28/-1->24->16 [1] 25/-1/-1->24->21
+gpua031:1680700:1680773 [0] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0
+gpua031:1680700:1680773 [0] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0
+gpua031:1680700:1680773 [0] NCCL INFO Channel 00/0 : 24[7000] -> 25[46000] via P2P/IPC/read
+gpua031:1680700:1680773 [0] NCCL INFO Channel 01/0 : 24[7000] -> 25[46000] via P2P/IPC/read
+gpua031:1680700:1680773 [0] NCCL INFO Connected all rings
+gpua031:1680700:1680773 [0] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [receive] via NET/IB/0
+gpua031:1680700:1680773 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [send] via NET/IB/0
+gpua031:1680700:1680773 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [receive] via NET/IB/0
+gpua031:1680700:1680773 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [send] via NET/IB/0
+gpua031:1680700:1680773 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [receive] via NET/IB/0
+gpua031:1680700:1680773 [0] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [send] via NET/IB/0
+gpua031:1680700:1680773 [0] NCCL INFO Connected all trees
+gpua031:1680700:1680773 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua031:1680700:1680773 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua031:1680700:1680773 [0] NCCL INFO comm 0xb9862e50 rank 24 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua031:1680703:1680703 [3] NCCL INFO cudaDriverVersion 12010
+gpua031:1680703:1680703 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.31<0>
+gpua031:1680703:1680703 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua031:1680703:1680772 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.31<0>
+gpua031:1680703:1680772 [3] NCCL INFO Using network IB
+gpua031:1680703:1680772 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua031:1680703:1680772 [3] NCCL INFO Trees [0] -1/-1/-1->27->26 [1] -1/-1/-1->27->26
+gpua031:1680703:1680772 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0
+gpua031:1680703:1680772 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0
+gpua031:1680703:1680772 [3] NCCL INFO Connected all rings
+gpua031:1680703:1680772 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 26[85000] via P2P/IPC/read
+gpua031:1680703:1680772 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 26[85000] via P2P/IPC/read
+gpua031:1680703:1680772 [3] NCCL INFO Connected all trees
+gpua031:1680703:1680772 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua031:1680703:1680772 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua031:1680703:1680772 [3] NCCL INFO comm 0x5195fe00 rank 27 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua031:1680702:1680702 [2] NCCL INFO cudaDriverVersion 12010
+gpua031:1680702:1680702 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.31<0>
+gpua031:1680702:1680702 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua031:1680702:1680774 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.31<0>
+gpua031:1680702:1680774 [2] NCCL INFO Using network IB
+gpua031:1680702:1680774 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua031:1680702:1680774 [2] NCCL INFO Trees [0] 27/-1/-1->26->25 [1] 27/-1/-1->26->25
+gpua031:1680702:1680774 [2] NCCL INFO Channel 00/0 : 26[85000] -> 27[c7000] via P2P/IPC/read
+gpua031:1680702:1680774 [2] NCCL INFO Channel 01/0 : 26[85000] -> 27[c7000] via P2P/IPC/read
+gpua031:1680702:1680774 [2] NCCL INFO Connected all rings
+gpua031:1680702:1680774 [2] NCCL INFO Channel 00/0 : 26[85000] -> 25[46000] via P2P/IPC/read
+gpua031:1680702:1680774 [2] NCCL INFO Channel 01/0 : 26[85000] -> 25[46000] via P2P/IPC/read
+gpua031:1680702:1680774 [2] NCCL INFO Connected all trees
+gpua031:1680702:1680774 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua031:1680702:1680774 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua031:1680702:1680774 [2] NCCL INFO comm 0x90042a50 rank 26 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua031:1680701:1680701 [1] NCCL INFO cudaDriverVersion 12010
+gpua031:1680701:1680701 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.31<0>
+gpua031:1680701:1680701 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua031:1680701:1680775 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.31<0>
+gpua031:1680701:1680775 [1] NCCL INFO Using network IB
+gpua031:1680701:1680775 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua031:1680701:1680775 [1] NCCL INFO Trees [0] 26/20/-1->25->24 [1] 26/-1/-1->25->24
+gpua031:1680701:1680775 [1] NCCL INFO Channel 00/0 : 25[46000] -> 26[85000] via P2P/IPC/read
+gpua031:1680701:1680775 [1] NCCL INFO Channel 01/0 : 25[46000] -> 26[85000] via P2P/IPC/read
+gpua031:1680701:1680775 [1] NCCL INFO Connected all rings
+gpua031:1680701:1680775 [1] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [receive] via NET/IB/0
+gpua031:1680701:1680775 [1] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [send] via NET/IB/0
+gpua031:1680701:1680775 [1] NCCL INFO Channel 00/0 : 25[46000] -> 24[7000] via P2P/IPC/read
+gpua031:1680701:1680775 [1] NCCL INFO Channel 01/0 : 25[46000] -> 24[7000] via P2P/IPC/read
+gpua031:1680701:1680775 [1] NCCL INFO Connected all trees
+gpua031:1680701:1680775 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua031:1680701:1680775 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua031:1680701:1680775 [1] NCCL INFO comm 0xb74170b0 rank 25 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua029:1226924:1226924 [3] NCCL INFO cudaDriverVersion 12010
+gpua029:1226924:1226924 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.29<0>
+gpua029:1226924:1226924 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua029:1226924:1226999 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.29<0>
+gpua029:1226924:1226999 [3] NCCL INFO Using network IB
+gpua029:1226924:1226999 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua029:1226924:1226999 [3] NCCL INFO Trees [0] -1/-1/-1->23->22 [1] -1/-1/-1->23->22
+gpua029:1226924:1226999 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0
+gpua029:1226924:1226999 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0
+gpua029:1226924:1226999 [3] NCCL INFO Connected all rings
+gpua029:1226924:1226999 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 22[85000] via P2P/IPC/read
+gpua029:1226924:1226999 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 22[85000] via P2P/IPC/read
+gpua029:1226924:1226999 [3] NCCL INFO Connected all trees
+gpua029:1226924:1226999 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua029:1226924:1226999 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua029:1226924:1226999 [3] NCCL INFO comm 0x502a1280 rank 23 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua029:1226921:1226921 [0] NCCL INFO cudaDriverVersion 12010
+gpua029:1226921:1226921 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.29<0>
+gpua029:1226921:1226921 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua029:1226921:1226997 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.29<0>
+gpua029:1226921:1226997 [0] NCCL INFO Using network IB
+gpua029:1226921:1226997 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua029:1226921:1226997 [0] NCCL INFO Trees [0] 21/-1/-1->20->25 [1] 21/16/-1->20->13
+gpua029:1226921:1226997 [0] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0
+gpua029:1226921:1226997 [0] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0
+gpua029:1226921:1226997 [0] NCCL INFO Channel 00/0 : 20[7000] -> 21[46000] via P2P/IPC/read
+gpua029:1226921:1226997 [0] NCCL INFO Channel 01/0 : 20[7000] -> 21[46000] via P2P/IPC/read
+gpua029:1226921:1226997 [0] NCCL INFO Connected all rings
+gpua029:1226921:1226997 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [receive] via NET/IB/0
+gpua029:1226921:1226997 [0] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [send] via NET/IB/0
+gpua029:1226921:1226997 [0] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [receive] via NET/IB/0
+gpua029:1226921:1226997 [0] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [send] via NET/IB/0
+gpua029:1226921:1226997 [0] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [receive] via NET/IB/0
+gpua029:1226921:1226997 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [send] via NET/IB/0
+gpua029:1226921:1226997 [0] NCCL INFO Connected all trees
+gpua029:1226921:1226997 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua029:1226921:1226997 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua029:1226921:1226997 [0] NCCL INFO comm 0x8dcadfd0 rank 20 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua074:989792:989792 [1] NCCL INFO cudaDriverVersion 12010
+gpua074:989792:989792 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.74<0>
+gpua074:989792:989792 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua074:989792:989862 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.74<0>
+gpua074:989792:989862 [1] NCCL INFO Using network IB
+gpua074:989792:989862 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua074:989792:989862 [1] NCCL INFO Trees [0] 50/40/-1->49->48 [1] 50/-1/-1->49->48
+gpua074:989792:989862 [1] NCCL INFO Channel 00/0 : 49[46000] -> 50[85000] via P2P/IPC/read
+gpua074:989792:989862 [1] NCCL INFO Channel 01/0 : 49[46000] -> 50[85000] via P2P/IPC/read
+gpua074:989792:989862 [1] NCCL INFO Connected all rings
+gpua074:989792:989862 [1] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [receive] via NET/IB/0
+gpua074:989792:989862 [1] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [send] via NET/IB/0
+gpua074:989792:989862 [1] NCCL INFO Channel 00/0 : 49[46000] -> 48[7000] via P2P/IPC/read
+gpua074:989792:989862 [1] NCCL INFO Channel 01/0 : 49[46000] -> 48[7000] via P2P/IPC/read
+gpua074:989792:989862 [1] NCCL INFO Connected all trees
+gpua074:989792:989862 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua074:989792:989862 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua074:989792:989862 [1] NCCL INFO comm 0x91b8e50 rank 49 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua074:989794:989794 [3] NCCL INFO cudaDriverVersion 12010
+gpua074:989794:989794 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.74<0>
+gpua074:989794:989794 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua074:989794:989863 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.74<0>
+gpua074:989794:989863 [3] NCCL INFO Using network IB
+gpua074:989794:989863 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua074:989794:989863 [3] NCCL INFO Trees [0] -1/-1/-1->51->50 [1] -1/-1/-1->51->50
+gpua074:989794:989863 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0
+gpua074:989794:989863 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0
+gpua074:989794:989863 [3] NCCL INFO Connected all rings
+gpua074:989794:989863 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 50[85000] via P2P/IPC/read
+gpua074:989794:989863 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 50[85000] via P2P/IPC/read
+gpua074:989794:989863 [3] NCCL INFO Connected all trees
+gpua074:989794:989863 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua074:989794:989863 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua074:989794:989863 [3] NCCL INFO comm 0x51823d90 rank 51 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua074:989793:989793 [2] NCCL INFO cudaDriverVersion 12010
+gpua074:989793:989793 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.74<0>
+gpua074:989793:989793 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua074:989793:989861 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.74<0>
+gpua074:989793:989861 [2] NCCL INFO Using network IB
+gpua074:989793:989861 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua074:989793:989861 [2] NCCL INFO Trees [0] 51/-1/-1->50->49 [1] 51/-1/-1->50->49
+gpua074:989793:989861 [2] NCCL INFO Channel 00/0 : 50[85000] -> 51[c7000] via P2P/IPC/read
+gpua074:989793:989861 [2] NCCL INFO Channel 01/0 : 50[85000] -> 51[c7000] via P2P/IPC/read
+gpua074:989793:989861 [2] NCCL INFO Connected all rings
+gpua074:989793:989861 [2] NCCL INFO Channel 00/0 : 50[85000] -> 49[46000] via P2P/IPC/read
+gpua074:989793:989861 [2] NCCL INFO Channel 01/0 : 50[85000] -> 49[46000] via P2P/IPC/read
+gpua074:989793:989861 [2] NCCL INFO Connected all trees
+gpua074:989793:989861 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua074:989793:989861 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua074:989793:989861 [2] NCCL INFO comm 0x50124340 rank 50 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua029:1226922:1226922 [1] NCCL INFO cudaDriverVersion 12010
+gpua029:1226922:1226922 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.29<0>
+gpua029:1226922:1226922 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua029:1226922:1226996 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.29<0>
+gpua029:1226922:1226996 [1] NCCL INFO Using network IB
+gpua029:1226922:1226996 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua029:1226922:1226996 [1] NCCL INFO Trees [0] 22/-1/-1->21->20 [1] 22/24/-1->21->20
+gpua029:1226922:1226996 [1] NCCL INFO Channel 00/0 : 21[46000] -> 22[85000] via P2P/IPC/read
+gpua029:1226922:1226996 [1] NCCL INFO Channel 01/0 : 21[46000] -> 22[85000] via P2P/IPC/read
+gpua029:1226922:1226996 [1] NCCL INFO Connected all rings
+gpua029:1226922:1226996 [1] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [send] via NET/IB/0
+gpua029:1226922:1226996 [1] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [receive] via NET/IB/0
+gpua029:1226922:1226996 [1] NCCL INFO Channel 00/0 : 21[46000] -> 20[7000] via P2P/IPC/read
+gpua029:1226922:1226996 [1] NCCL INFO Channel 01/0 : 21[46000] -> 20[7000] via P2P/IPC/read
+gpua029:1226922:1226996 [1] NCCL INFO Connected all trees
+gpua029:1226922:1226996 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua029:1226922:1226996 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua029:1226922:1226996 [1] NCCL INFO comm 0x91446d0 rank 21 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua087:2330953:2330953 [0] NCCL INFO cudaDriverVersion 12010
+gpua087:2330953:2330953 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.87<0>
+gpua087:2330953:2330953 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua087:2330953:2331026 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.87<0>
+gpua087:2330953:2331026 [0] NCCL INFO Using network IB
+gpua087:2330953:2331026 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua087:2330953:2331026 [0] NCCL INFO Trees [0] 53/-1/-1->52->57 [1] 53/48/-1->52->45
+gpua087:2330953:2331026 [0] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0
+gpua087:2330953:2331026 [0] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0
+gpua087:2330953:2331026 [0] NCCL INFO Channel 00/0 : 52[7000] -> 53[46000] via P2P/IPC/read
+gpua087:2330953:2331026 [0] NCCL INFO Channel 01/0 : 52[7000] -> 53[46000] via P2P/IPC/read
+gpua087:2330953:2331026 [0] NCCL INFO Connected all rings
+gpua087:2330953:2331026 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [receive] via NET/IB/0
+gpua087:2330953:2331026 [0] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [send] via NET/IB/0
+gpua087:2330953:2331026 [0] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [receive] via NET/IB/0
+gpua087:2330953:2331026 [0] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [send] via NET/IB/0
+gpua087:2330953:2331026 [0] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [receive] via NET/IB/0
+gpua087:2330953:2331026 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [send] via NET/IB/0
+gpua087:2330953:2331026 [0] NCCL INFO Connected all trees
+gpua087:2330953:2331026 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua087:2330953:2331026 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua087:2330953:2331026 [0] NCCL INFO comm 0x8805010 rank 52 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua087:2330955:2330955 [2] NCCL INFO cudaDriverVersion 12010
+gpua087:2330955:2330955 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.87<0>
+gpua087:2330955:2330955 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua087:2330955:2331028 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.87<0>
+gpua087:2330955:2331028 [2] NCCL INFO Using network IB
+gpua087:2330955:2331028 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua087:2330955:2331028 [2] NCCL INFO Trees [0] 55/-1/-1->54->53 [1] 55/-1/-1->54->53
+gpua087:2330955:2331028 [2] NCCL INFO Channel 00/0 : 54[85000] -> 55[c7000] via P2P/IPC/read
+gpua087:2330955:2331028 [2] NCCL INFO Channel 01/0 : 54[85000] -> 55[c7000] via P2P/IPC/read
+gpua087:2330955:2331028 [2] NCCL INFO Connected all rings
+gpua087:2330955:2331028 [2] NCCL INFO Channel 00/0 : 54[85000] -> 53[46000] via P2P/IPC/read
+gpua087:2330955:2331028 [2] NCCL INFO Channel 01/0 : 54[85000] -> 53[46000] via P2P/IPC/read
+gpua087:2330955:2331028 [2] NCCL INFO Connected all trees
+gpua087:2330955:2331028 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua087:2330955:2331028 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua087:2330955:2331028 [2] NCCL INFO comm 0x1091ecd0 rank 54 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua029:1226923:1226923 [2] NCCL INFO cudaDriverVersion 12010
+gpua029:1226923:1226923 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.29<0>
+gpua029:1226923:1226923 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua029:1226923:1226998 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.29<0>
+gpua029:1226923:1226998 [2] NCCL INFO Using network IB
+gpua029:1226923:1226998 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua029:1226923:1226998 [2] NCCL INFO Trees [0] 23/-1/-1->22->21 [1] 23/-1/-1->22->21
+gpua029:1226923:1226998 [2] NCCL INFO Channel 00/0 : 22[85000] -> 23[c7000] via P2P/IPC/read
+gpua029:1226923:1226998 [2] NCCL INFO Channel 01/0 : 22[85000] -> 23[c7000] via P2P/IPC/read
+gpua029:1226923:1226998 [2] NCCL INFO Connected all rings
+gpua029:1226923:1226998 [2] NCCL INFO Channel 00/0 : 22[85000] -> 21[46000] via P2P/IPC/read
+gpua029:1226923:1226998 [2] NCCL INFO Channel 01/0 : 22[85000] -> 21[46000] via P2P/IPC/read
+gpua029:1226923:1226998 [2] NCCL INFO Connected all trees
+gpua029:1226923:1226998 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua029:1226923:1226998 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua029:1226923:1226998 [2] NCCL INFO comm 0x9682050 rank 22 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua057:1814426:1814426 [1] NCCL INFO cudaDriverVersion 12010
+gpua057:1814426:1814426 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.57<0>
+gpua057:1814426:1814426 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua057:1814426:1814504 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.57<0>
+gpua057:1814426:1814504 [1] NCCL INFO Using network IB
+gpua057:1814426:1814504 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua057:1814426:1814504 [1] NCCL INFO Trees [0] 42/36/-1->41->40 [1] 42/-1/-1->41->40
+gpua057:1814426:1814504 [1] NCCL INFO Channel 00/0 : 41[46000] -> 42[85000] via P2P/IPC/read
+gpua057:1814426:1814504 [1] NCCL INFO Channel 01/0 : 41[46000] -> 42[85000] via P2P/IPC/read
+gpua057:1814426:1814504 [1] NCCL INFO Connected all rings
+gpua057:1814426:1814504 [1] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [receive] via NET/IB/0
+gpua057:1814426:1814504 [1] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [send] via NET/IB/0
+gpua057:1814426:1814504 [1] NCCL INFO Channel 00/0 : 41[46000] -> 40[7000] via P2P/IPC/read
+gpua057:1814426:1814504 [1] NCCL INFO Channel 01/0 : 41[46000] -> 40[7000] via P2P/IPC/read
+gpua057:1814426:1814504 [1] NCCL INFO Connected all trees
+gpua057:1814426:1814504 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua057:1814426:1814504 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua057:1814426:1814504 [1] NCCL INFO comm 0xb6887810 rank 41 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua074:989791:989791 [0] NCCL INFO cudaDriverVersion 12010
+gpua074:989791:989791 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.74<0>
+gpua074:989791:989791 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua074:989791:989864 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.74<0>
+gpua074:989791:989864 [0] NCCL INFO Using network IB
+gpua074:989791:989864 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua074:989791:989864 [0] NCCL INFO Trees [0] 49/56/-1->48->32 [1] 49/-1/-1->48->52
+gpua074:989791:989864 [0] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0
+gpua074:989791:989864 [0] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0
+gpua074:989791:989864 [0] NCCL INFO Channel 00/0 : 48[7000] -> 49[46000] via P2P/IPC/read
+gpua074:989791:989864 [0] NCCL INFO Channel 01/0 : 48[7000] -> 49[46000] via P2P/IPC/read
+gpua074:989791:989864 [0] NCCL INFO Connected all rings
+gpua057:1814428:1814428 [3] NCCL INFO cudaDriverVersion 12010
+gpua057:1814428:1814428 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.57<0>
+gpua057:1814428:1814428 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua057:1814428:1814503 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.57<0>
+gpua057:1814428:1814503 [3] NCCL INFO Using network IB
+gpua057:1814428:1814503 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua057:1814428:1814503 [3] NCCL INFO Trees [0] -1/-1/-1->43->42 [1] -1/-1/-1->43->42
+gpua057:1814428:1814503 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0
+gpua057:1814428:1814503 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0
+gpua057:1814428:1814503 [3] NCCL INFO Connected all rings
+gpua057:1814428:1814503 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 42[85000] via P2P/IPC/read
+gpua057:1814428:1814503 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 42[85000] via P2P/IPC/read
+gpua074:989791:989864 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [send] via NET/IB/0
+gpua074:989791:989864 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [send] via NET/IB/0
+gpua074:989791:989864 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [receive] via NET/IB/0
+gpua074:989791:989864 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [send] via NET/IB/0
+gpua074:989791:989864 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [receive] via NET/IB/0
+gpua074:989791:989864 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [receive] via NET/IB/0
+gpua074:989791:989864 [0] NCCL INFO Connected all trees
+gpua074:989791:989864 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua074:989791:989864 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua074:989791:989864 [0] NCCL INFO comm 0x4f541ea0 rank 48 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua057:1814428:1814503 [3] NCCL INFO Connected all trees
+gpua057:1814428:1814503 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua057:1814428:1814503 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua057:1814428:1814503 [3] NCCL INFO comm 0xa830f510 rank 43 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua057:1814427:1814427 [2] NCCL INFO cudaDriverVersion 12010
+gpua057:1814427:1814427 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.57<0>
+gpua057:1814427:1814427 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua057:1814427:1814505 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.57<0>
+gpua057:1814427:1814505 [2] NCCL INFO Using network IB
+gpua057:1814427:1814505 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua057:1814427:1814505 [2] NCCL INFO Trees [0] 43/-1/-1->42->41 [1] 43/-1/-1->42->41
+gpua057:1814427:1814505 [2] NCCL INFO Channel 00/0 : 42[85000] -> 43[c7000] via P2P/IPC/read
+gpua057:1814427:1814505 [2] NCCL INFO Channel 01/0 : 42[85000] -> 43[c7000] via P2P/IPC/read
+gpua057:1814427:1814505 [2] NCCL INFO Connected all rings
+gpua057:1814427:1814505 [2] NCCL INFO Channel 00/0 : 42[85000] -> 41[46000] via P2P/IPC/read
+gpua057:1814427:1814505 [2] NCCL INFO Channel 01/0 : 42[85000] -> 41[46000] via P2P/IPC/read
+gpua057:1814427:1814505 [2] NCCL INFO Connected all trees
+gpua057:1814427:1814505 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua057:1814427:1814505 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua057:1814427:1814505 [2] NCCL INFO comm 0x8ff8bf0 rank 42 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua087:2330956:2330956 [3] NCCL INFO cudaDriverVersion 12010
+gpua087:2330956:2330956 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.87<0>
+gpua087:2330956:2330956 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua087:2330956:2331027 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.87<0>
+gpua087:2330956:2331027 [3] NCCL INFO Using network IB
+gpua087:2330956:2331027 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua087:2330956:2331027 [3] NCCL INFO Trees [0] -1/-1/-1->55->54 [1] -1/-1/-1->55->54
+gpua087:2330956:2331027 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0
+gpua087:2330956:2331027 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0
+gpua087:2330956:2331027 [3] NCCL INFO Connected all rings
+gpua087:2330956:2331027 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 54[85000] via P2P/IPC/read
+gpua087:2330956:2331027 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 54[85000] via P2P/IPC/read
+gpua057:1814425:1814425 [0] NCCL INFO cudaDriverVersion 12010
+gpua057:1814425:1814425 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.57<0>
+gpua057:1814425:1814425 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua057:1814425:1814506 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.57<0>
+gpua057:1814425:1814506 [0] NCCL INFO Using network IB
+gpua057:1814425:1814506 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua057:1814425:1814506 [0] NCCL INFO Trees [0] 41/44/-1->40->49 [1] 41/-1/-1->40->37
+gpua057:1814425:1814506 [0] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0
+gpua057:1814425:1814506 [0] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0
+gpua057:1814425:1814506 [0] NCCL INFO Channel 00/0 : 40[7000] -> 41[46000] via P2P/IPC/read
+gpua057:1814425:1814506 [0] NCCL INFO Channel 01/0 : 40[7000] -> 41[46000] via P2P/IPC/read
+gpua057:1814425:1814506 [0] NCCL INFO Connected all rings
+gpua087:2330956:2331027 [3] NCCL INFO Connected all trees
+gpua087:2330956:2331027 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua087:2330956:2331027 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua087:2330956:2331027 [3] NCCL INFO comm 0x4fa40250 rank 55 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua057:1814425:1814506 [0] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [receive] via NET/IB/0
+gpua057:1814425:1814506 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [send] via NET/IB/0
+gpua057:1814425:1814506 [0] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [send] via NET/IB/0
+gpua057:1814425:1814506 [0] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [receive] via NET/IB/0
+gpua057:1814425:1814506 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [receive] via NET/IB/0
+gpua057:1814425:1814506 [0] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [send] via NET/IB/0
+gpua057:1814425:1814506 [0] NCCL INFO Connected all trees
+gpua057:1814425:1814506 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua057:1814425:1814506 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua057:1814425:1814506 [0] NCCL INFO comm 0xc0b1e520 rank 40 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua087:2330954:2330954 [1] NCCL INFO cudaDriverVersion 12010
+gpua087:2330954:2330954 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.87<0>
+gpua087:2330954:2330954 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua087:2330954:2331029 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.87<0>
+gpua087:2330954:2331029 [1] NCCL INFO Using network IB
+gpua087:2330954:2331029 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua087:2330954:2331029 [1] NCCL INFO Trees [0] 54/-1/-1->53->52 [1] 54/56/-1->53->52
+gpua087:2330954:2331029 [1] NCCL INFO Channel 00/0 : 53[46000] -> 54[85000] via P2P/IPC/read
+gpua087:2330954:2331029 [1] NCCL INFO Channel 01/0 : 53[46000] -> 54[85000] via P2P/IPC/read
+gpua087:2330954:2331029 [1] NCCL INFO Connected all rings
+gpua087:2330954:2331029 [1] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [send] via NET/IB/0
+gpua087:2330954:2331029 [1] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [receive] via NET/IB/0
+gpua087:2330954:2331029 [1] NCCL INFO Channel 00/0 : 53[46000] -> 52[7000] via P2P/IPC/read
+gpua087:2330954:2331029 [1] NCCL INFO Channel 01/0 : 53[46000] -> 52[7000] via P2P/IPC/read
+gpua087:2330954:2331029 [1] NCCL INFO Connected all trees
+gpua087:2330954:2331029 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua087:2330954:2331029 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua087:2330954:2331029 [1] NCCL INFO comm 0xbc380f30 rank 53 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua028:3269324:3269324 [3] NCCL INFO cudaDriverVersion 12010
+gpua028:3269324:3269324 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.28<0>
+gpua028:3269324:3269324 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua028:3269324:3269401 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.28<0>
+gpua028:3269324:3269401 [3] NCCL INFO Using network IB
+gpua028:3269324:3269401 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua028:3269324:3269401 [3] NCCL INFO Trees [0] -1/-1/-1->19->18 [1] -1/-1/-1->19->18
+gpua028:3269324:3269401 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0
+gpua028:3269324:3269401 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0
+gpua028:3269324:3269401 [3] NCCL INFO Connected all rings
+gpua028:3269324:3269401 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 18[85000] via P2P/IPC/read
+gpua028:3269324:3269401 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 18[85000] via P2P/IPC/read
+gpua028:3269324:3269401 [3] NCCL INFO Connected all trees
+gpua028:3269324:3269401 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua028:3269324:3269401 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua028:3269324:3269401 [3] NCCL INFO comm 0x50758ff0 rank 19 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua053:959076:959076 [2] NCCL INFO cudaDriverVersion 12010
+gpua053:959076:959076 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.53<0>
+gpua053:959076:959076 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua053:959076:959150 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.53<0>
+gpua053:959076:959150 [2] NCCL INFO Using network IB
+gpua053:959076:959150 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua053:959076:959150 [2] NCCL INFO Trees [0] 35/-1/-1->34->33 [1] 35/-1/-1->34->33
+gpua053:959076:959150 [2] NCCL INFO Channel 00/0 : 34[85000] -> 35[c7000] via P2P/IPC/read
+gpua053:959076:959150 [2] NCCL INFO Channel 01/0 : 34[85000] -> 35[c7000] via P2P/IPC/read
+gpua053:959076:959150 [2] NCCL INFO Connected all rings
+gpua053:959076:959150 [2] NCCL INFO Channel 00/0 : 34[85000] -> 33[46000] via P2P/IPC/read
+gpua053:959076:959150 [2] NCCL INFO Channel 01/0 : 34[85000] -> 33[46000] via P2P/IPC/read
+gpua053:959076:959150 [2] NCCL INFO Connected all trees
+gpua053:959076:959150 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua053:959076:959150 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua053:959076:959150 [2] NCCL INFO comm 0xa5547430 rank 34 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua028:3269322:3269322 [1] NCCL INFO cudaDriverVersion 12010
+gpua028:3269322:3269322 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.28<0>
+gpua028:3269322:3269322 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua028:3269322:3269404 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.28<0>
+gpua028:3269322:3269404 [1] NCCL INFO Using network IB
+gpua028:3269322:3269404 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua028:3269322:3269404 [1] NCCL INFO Trees [0] 18/8/-1->17->16 [1] 18/-1/-1->17->16
+gpua028:3269322:3269404 [1] NCCL INFO Channel 00/0 : 17[46000] -> 18[85000] via P2P/IPC/read
+gpua028:3269322:3269404 [1] NCCL INFO Channel 01/0 : 17[46000] -> 18[85000] via P2P/IPC/read
+gpua028:3269322:3269404 [1] NCCL INFO Connected all rings
+gpua028:3269322:3269404 [1] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [receive] via NET/IB/0
+gpua028:3269322:3269404 [1] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [send] via NET/IB/0
+gpua028:3269322:3269404 [1] NCCL INFO Channel 00/0 : 17[46000] -> 16[7000] via P2P/IPC/read
+gpua028:3269322:3269404 [1] NCCL INFO Channel 01/0 : 17[46000] -> 16[7000] via P2P/IPC/read
+gpua028:3269322:3269404 [1] NCCL INFO Connected all trees
+gpua028:3269322:3269404 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua028:3269322:3269404 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua028:3269322:3269404 [1] NCCL INFO comm 0x50ff9ba0 rank 17 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua053:959074:959074 [0] NCCL INFO cudaDriverVersion 12010
+gpua053:959074:959074 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.53<0>
+gpua053:959074:959074 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua053:959074:959149 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.53<0>
+gpua053:959074:959149 [0] NCCL INFO Using network IB
+gpua053:959074:959149 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua053:959074:959149 [0] NCCL INFO Trees [0] 33/48/-1->32->0 [1] 33/-1/-1->32->36
+gpua053:959074:959149 [0] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0
+gpua053:959074:959149 [0] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0
+gpua053:959074:959149 [0] NCCL INFO Channel 00/0 : 32[7000] -> 33[46000] via P2P/IPC/read
+gpua053:959074:959149 [0] NCCL INFO Channel 01/0 : 32[7000] -> 33[46000] via P2P/IPC/read
+gpua053:959074:959149 [0] NCCL INFO Connected all rings
+gpua053:959074:959149 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [send] via NET/IB/0
+gpua053:959074:959149 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [send] via NET/IB/0
+gpua053:959074:959149 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [receive] via NET/IB/0
+gpua053:959074:959149 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [send] via NET/IB/0
+gpua053:959074:959149 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [receive] via NET/IB/0
+gpua053:959074:959149 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [receive] via NET/IB/0
+gpua053:959074:959149 [0] NCCL INFO Connected all trees
+gpua053:959074:959149 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua053:959074:959149 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua053:959074:959149 [0] NCCL INFO comm 0x50589df0 rank 32 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua028:3269321:3269321 [0] NCCL INFO cudaDriverVersion 12010
+gpua028:3269321:3269321 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.28<0>
+gpua028:3269321:3269321 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua028:3269321:3269403 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.28<0>
+gpua028:3269321:3269403 [0] NCCL INFO Using network IB
+gpua028:3269321:3269403 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua028:3269321:3269403 [0] NCCL INFO Trees [0] 17/24/-1->16->33 [1] 17/-1/-1->16->20
+gpua028:3269321:3269403 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0
+gpua028:3269321:3269403 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0
+gpua028:3269321:3269403 [0] NCCL INFO Channel 00/0 : 16[7000] -> 17[46000] via P2P/IPC/read
+gpua028:3269321:3269403 [0] NCCL INFO Channel 01/0 : 16[7000] -> 17[46000] via P2P/IPC/read
+gpua028:3269321:3269403 [0] NCCL INFO Connected all rings
+gpua028:3269321:3269403 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [send] via NET/IB/0
+gpua028:3269321:3269403 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [send] via NET/IB/0
+gpua028:3269321:3269403 [0] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [send] via NET/IB/0
+gpua028:3269321:3269403 [0] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [receive] via NET/IB/0
+gpua028:3269321:3269403 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [receive] via NET/IB/0
+gpua028:3269321:3269403 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [receive] via NET/IB/0
+gpua028:3269321:3269403 [0] NCCL INFO Connected all trees
+gpua028:3269321:3269403 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua028:3269321:3269403 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua028:3269321:3269403 [0] NCCL INFO comm 0xc37df860 rank 16 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua028:3269323:3269323 [2] NCCL INFO cudaDriverVersion 12010
+gpua028:3269323:3269323 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.28<0>
+gpua028:3269323:3269323 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua028:3269323:3269402 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.28<0>
+gpua028:3269323:3269402 [2] NCCL INFO Using network IB
+gpua028:3269323:3269402 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua028:3269323:3269402 [2] NCCL INFO Trees [0] 19/-1/-1->18->17 [1] 19/-1/-1->18->17
+gpua028:3269323:3269402 [2] NCCL INFO Channel 00/0 : 18[85000] -> 19[c7000] via P2P/IPC/read
+gpua028:3269323:3269402 [2] NCCL INFO Channel 01/0 : 18[85000] -> 19[c7000] via P2P/IPC/read
+gpua028:3269323:3269402 [2] NCCL INFO Connected all rings
+gpua028:3269323:3269402 [2] NCCL INFO Channel 00/0 : 18[85000] -> 17[46000] via P2P/IPC/read
+gpua028:3269323:3269402 [2] NCCL INFO Channel 01/0 : 18[85000] -> 17[46000] via P2P/IPC/read
+gpua028:3269323:3269402 [2] NCCL INFO Connected all trees
+gpua028:3269323:3269402 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua028:3269323:3269402 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua028:3269323:3269402 [2] NCCL INFO comm 0x4fe1d010 rank 18 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua053:959077:959077 [3] NCCL INFO cudaDriverVersion 12010
+gpua053:959077:959077 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.53<0>
+gpua053:959077:959077 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua053:959077:959151 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.53<0>
+gpua053:959077:959151 [3] NCCL INFO Using network IB
+gpua053:959077:959151 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua053:959077:959151 [3] NCCL INFO Trees [0] -1/-1/-1->35->34 [1] -1/-1/-1->35->34
+gpua053:959077:959151 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0
+gpua053:959077:959151 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0
+gpua053:959077:959151 [3] NCCL INFO Connected all rings
+gpua053:959077:959151 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 34[85000] via P2P/IPC/read
+gpua053:959077:959151 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 34[85000] via P2P/IPC/read
+gpua053:959077:959151 [3] NCCL INFO Connected all trees
+gpua053:959077:959151 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua053:959077:959151 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua053:959077:959151 [3] NCCL INFO comm 0x8f7ecf20 rank 35 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua053:959075:959075 [1] NCCL INFO cudaDriverVersion 12010
+gpua053:959075:959075 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.53<0>
+gpua053:959075:959075 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua053:959075:959152 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.53<0>
+gpua053:959075:959152 [1] NCCL INFO Using network IB
+gpua053:959075:959152 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua053:959075:959152 [1] NCCL INFO Trees [0] 34/16/-1->33->32 [1] 34/-1/-1->33->32
+gpua053:959075:959152 [1] NCCL INFO Channel 00/0 : 33[46000] -> 34[85000] via P2P/IPC/read
+gpua053:959075:959152 [1] NCCL INFO Channel 01/0 : 33[46000] -> 34[85000] via P2P/IPC/read
+gpua053:959075:959152 [1] NCCL INFO Connected all rings
+gpua053:959075:959152 [1] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [receive] via NET/IB/0
+gpua053:959075:959152 [1] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [send] via NET/IB/0
+gpua053:959075:959152 [1] NCCL INFO Channel 00/0 : 33[46000] -> 32[7000] via P2P/IPC/read
+gpua053:959075:959152 [1] NCCL INFO Channel 01/0 : 33[46000] -> 32[7000] via P2P/IPC/read
+gpua053:959075:959152 [1] NCCL INFO Connected all trees
+gpua053:959075:959152 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua053:959075:959152 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua053:959075:959152 [1] NCCL INFO comm 0x50f9bf70 rank 33 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua055:3866106:3866106 [3] NCCL INFO cudaDriverVersion 12010
+gpua055:3866106:3866106 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.55<0>
+gpua055:3866106:3866106 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua055:3866106:3866180 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.55<0>
+gpua055:3866106:3866180 [3] NCCL INFO Using network IB
+gpua055:3866106:3866180 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua055:3866106:3866180 [3] NCCL INFO Trees [0] -1/-1/-1->39->38 [1] -1/-1/-1->39->38
+gpua055:3866106:3866180 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0
+gpua055:3866106:3866180 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0
+gpua055:3866106:3866180 [3] NCCL INFO Connected all rings
+gpua055:3866106:3866180 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 38[85000] via P2P/IPC/read
+gpua055:3866106:3866180 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 38[85000] via P2P/IPC/read
+gpua055:3866106:3866180 [3] NCCL INFO Connected all trees
+gpua055:3866106:3866180 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua055:3866106:3866180 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua055:3866106:3866180 [3] NCCL INFO comm 0xb731bb50 rank 39 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua055:3866104:3866104 [1] NCCL INFO cudaDriverVersion 12010
+gpua055:3866104:3866104 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.55<0>
+gpua055:3866104:3866104 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua055:3866104:3866182 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.55<0>
+gpua055:3866104:3866182 [1] NCCL INFO Using network IB
+gpua055:3866104:3866182 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua055:3866104:3866182 [1] NCCL INFO Trees [0] 38/-1/-1->37->36 [1] 38/40/-1->37->36
+gpua055:3866104:3866182 [1] NCCL INFO Channel 00/0 : 37[46000] -> 38[85000] via P2P/IPC/read
+gpua055:3866104:3866182 [1] NCCL INFO Channel 01/0 : 37[46000] -> 38[85000] via P2P/IPC/read
+gpua055:3866104:3866182 [1] NCCL INFO Connected all rings
+gpua055:3866104:3866182 [1] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [send] via NET/IB/0
+gpua055:3866104:3866182 [1] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [receive] via NET/IB/0
+gpua055:3866104:3866182 [1] NCCL INFO Channel 00/0 : 37[46000] -> 36[7000] via P2P/IPC/read
+gpua055:3866104:3866182 [1] NCCL INFO Channel 01/0 : 37[46000] -> 36[7000] via P2P/IPC/read
+gpua055:3866104:3866182 [1] NCCL INFO Connected all trees
+gpua055:3866104:3866182 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua055:3866104:3866182 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua055:3866104:3866182 [1] NCCL INFO comm 0x4ff24650 rank 37 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua055:3866103:3866103 [0] NCCL INFO cudaDriverVersion 12010
+gpua055:3866103:3866103 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.55<0>
+gpua055:3866103:3866103 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua055:3866103:3866183 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.55<0>
+gpua055:3866103:3866183 [0] NCCL INFO Using network IB
+gpua055:3866103:3866183 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua055:3866103:3866183 [0] NCCL INFO Trees [0] 37/-1/-1->36->41 [1] 37/32/-1->36->44
+gpua055:3866103:3866183 [0] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0
+gpua055:3866103:3866183 [0] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0
+gpua055:3866103:3866183 [0] NCCL INFO Channel 00/0 : 36[7000] -> 37[46000] via P2P/IPC/read
+gpua055:3866103:3866183 [0] NCCL INFO Channel 01/0 : 36[7000] -> 37[46000] via P2P/IPC/read
+gpua055:3866103:3866183 [0] NCCL INFO Connected all rings
+gpua055:3866103:3866183 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [receive] via NET/IB/0
+gpua055:3866103:3866183 [0] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [send] via NET/IB/0
+gpua055:3866103:3866183 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [send] via NET/IB/0
+gpua055:3866103:3866183 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [receive] via NET/IB/0
+gpua055:3866103:3866183 [0] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [receive] via NET/IB/0
+gpua055:3866103:3866183 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [send] via NET/IB/0
+gpua055:3866103:3866183 [0] NCCL INFO Connected all trees
+gpua055:3866103:3866183 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua055:3866103:3866183 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua055:3866103:3866183 [0] NCCL INFO comm 0x8783410 rank 36 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua055:3866105:3866105 [2] NCCL INFO cudaDriverVersion 12010
+gpua055:3866105:3866105 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.55<0>
+gpua055:3866105:3866105 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua055:3866105:3866181 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.55<0>
+gpua055:3866105:3866181 [2] NCCL INFO Using network IB
+gpua055:3866105:3866181 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua055:3866105:3866181 [2] NCCL INFO Trees [0] 39/-1/-1->38->37 [1] 39/-1/-1->38->37
+gpua055:3866105:3866181 [2] NCCL INFO Channel 00/0 : 38[85000] -> 39[c7000] via P2P/IPC/read
+gpua055:3866105:3866181 [2] NCCL INFO Channel 01/0 : 38[85000] -> 39[c7000] via P2P/IPC/read
+gpua055:3866105:3866181 [2] NCCL INFO Connected all rings
+gpua055:3866105:3866181 [2] NCCL INFO Channel 00/0 : 38[85000] -> 37[46000] via P2P/IPC/read
+gpua055:3866105:3866181 [2] NCCL INFO Channel 01/0 : 38[85000] -> 37[46000] via P2P/IPC/read
+gpua055:3866105:3866181 [2] NCCL INFO Connected all trees
+gpua055:3866105:3866181 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua055:3866105:3866181 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua055:3866105:3866181 [2] NCCL INFO comm 0xa0bacc0 rank 38 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua098:2101209:2101209 [1] NCCL INFO cudaDriverVersion 12010
+gpua098:2101209:2101209 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.98<0>
+gpua098:2101209:2101209 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua098:2101209:2101288 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.98<0>
+gpua098:2101209:2101288 [1] NCCL INFO Using network IB
+gpua098:2101209:2101288 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua098:2101209:2101288 [1] NCCL INFO Trees [0] 62/-1/-1->61->60 [1] 62/-1/-1->61->60
+gpua098:2101209:2101288 [1] NCCL INFO Channel 00/0 : 61[46000] -> 62[85000] via P2P/IPC/read
+gpua098:2101209:2101288 [1] NCCL INFO Channel 01/0 : 61[46000] -> 62[85000] via P2P/IPC/read
+gpua098:2101209:2101288 [1] NCCL INFO Connected all rings
+gpua098:2101209:2101288 [1] NCCL INFO Channel 00/0 : 61[46000] -> 60[7000] via P2P/IPC/read
+gpua098:2101209:2101288 [1] NCCL INFO Channel 01/0 : 61[46000] -> 60[7000] via P2P/IPC/read
+gpua098:2101209:2101288 [1] NCCL INFO Connected all trees
+gpua098:2101209:2101288 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua098:2101209:2101288 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua098:2101209:2101288 [1] NCCL INFO comm 0xb77452f0 rank 61 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua098:2101208:2101208 [0] NCCL INFO cudaDriverVersion 12010
+gpua098:2101208:2101208 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.98<0>
+gpua098:2101208:2101208 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua098:2101208:2101291 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.98<0>
+gpua098:2101208:2101291 [0] NCCL INFO Using network IB
+gpua098:2101208:2101291 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua098:2101208:2101291 [0] NCCL INFO Trees [0] 61/-1/-1->60->56 [1] 61/28/-1->60->-1
+gpua098:2101208:2101291 [0] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0
+gpua098:2101208:2101291 [0] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0
+gpua098:2101208:2101291 [0] NCCL INFO Channel 00/0 : 60[7000] -> 61[46000] via P2P/IPC/read
+gpua098:2101208:2101291 [0] NCCL INFO Channel 01/0 : 60[7000] -> 61[46000] via P2P/IPC/read
+gpua098:2101208:2101291 [0] NCCL INFO Connected all rings
+gpua098:2101208:2101291 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [receive] via NET/IB/0
+gpua098:2101208:2101291 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [receive] via NET/IB/0
+gpua098:2101208:2101291 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [send] via NET/IB/0
+gpua098:2101208:2101291 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [send] via NET/IB/0
+gpua098:2101208:2101291 [0] NCCL INFO Connected all trees
+gpua098:2101208:2101291 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua098:2101208:2101291 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua098:2101208:2101291 [0] NCCL INFO comm 0x8ba9dc20 rank 60 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua098:2101210:2101210 [2] NCCL INFO cudaDriverVersion 12010
+gpua098:2101210:2101210 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.98<0>
+gpua098:2101210:2101210 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua098:2101210:2101290 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.98<0>
+gpua098:2101210:2101290 [2] NCCL INFO Using network IB
+gpua098:2101210:2101290 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua098:2101210:2101290 [2] NCCL INFO Trees [0] 63/-1/-1->62->61 [1] 63/-1/-1->62->61
+gpua098:2101210:2101290 [2] NCCL INFO Channel 00/0 : 62[85000] -> 63[c7000] via P2P/IPC/read
+gpua098:2101210:2101290 [2] NCCL INFO Channel 01/0 : 62[85000] -> 63[c7000] via P2P/IPC/read
+gpua098:2101210:2101290 [2] NCCL INFO Connected all rings
+gpua098:2101210:2101290 [2] NCCL INFO Channel 00/0 : 62[85000] -> 61[46000] via P2P/IPC/read
+gpua098:2101210:2101290 [2] NCCL INFO Channel 01/0 : 62[85000] -> 61[46000] via P2P/IPC/read
+gpua098:2101210:2101290 [2] NCCL INFO Connected all trees
+gpua098:2101210:2101290 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua098:2101210:2101290 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua098:2101210:2101290 [2] NCCL INFO comm 0xb13e4b0 rank 62 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua090:2294100:2294100 [3] NCCL INFO cudaDriverVersion 12010
+gpua090:2294100:2294100 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.90<0>
+gpua090:2294100:2294100 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua090:2294100:2294189 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.90<0>
+gpua090:2294100:2294189 [3] NCCL INFO Using network IB
+gpua090:2294100:2294189 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua090:2294100:2294189 [3] NCCL INFO Trees [0] -1/-1/-1->59->58 [1] -1/-1/-1->59->58
+gpua090:2294100:2294189 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0
+gpua090:2294100:2294189 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0
+gpua090:2294100:2294189 [3] NCCL INFO Connected all rings
+gpua090:2294100:2294189 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 58[85000] via P2P/IPC/read
+gpua090:2294100:2294189 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 58[85000] via P2P/IPC/read
+gpua090:2294100:2294189 [3] NCCL INFO Connected all trees
+gpua090:2294100:2294189 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua090:2294100:2294189 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua090:2294100:2294189 [3] NCCL INFO comm 0x8d2a2250 rank 59 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua090:2294099:2294099 [2] NCCL INFO cudaDriverVersion 12010
+gpua090:2294099:2294099 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.90<0>
+gpua090:2294099:2294099 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua090:2294099:2294186 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.90<0>
+gpua090:2294099:2294186 [2] NCCL INFO Using network IB
+gpua090:2294099:2294186 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua090:2294099:2294186 [2] NCCL INFO Trees [0] 59/-1/-1->58->57 [1] 59/-1/-1->58->57
+gpua090:2294099:2294186 [2] NCCL INFO Channel 00/0 : 58[85000] -> 59[c7000] via P2P/IPC/read
+gpua090:2294099:2294186 [2] NCCL INFO Channel 01/0 : 58[85000] -> 59[c7000] via P2P/IPC/read
+gpua090:2294099:2294186 [2] NCCL INFO Connected all rings
+gpua090:2294099:2294186 [2] NCCL INFO Channel 00/0 : 58[85000] -> 57[46000] via P2P/IPC/read
+gpua090:2294099:2294186 [2] NCCL INFO Channel 01/0 : 58[85000] -> 57[46000] via P2P/IPC/read
+gpua090:2294099:2294186 [2] NCCL INFO Connected all trees
+gpua090:2294099:2294186 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua090:2294099:2294186 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua090:2294099:2294186 [2] NCCL INFO comm 0x508070c0 rank 58 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua003:350634:350634 [1] NCCL INFO cudaDriverVersion 12010
+gpua003:350634:350634 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.3<0>
+gpua003:350634:350634 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua003:350634:350707 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.3<0>
+gpua003:350634:350707 [1] NCCL INFO Using network IB
+gpua003:350634:350707 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua003:350634:350707 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0
+gpua003:350634:350707 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC/read
+gpua003:350634:350707 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC/read
+gpua003:350634:350707 [1] NCCL INFO Connected all rings
+gpua003:350634:350707 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC/read
+gpua003:350634:350707 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC/read
+gpua003:350634:350707 [1] NCCL INFO Connected all trees
+gpua003:350634:350707 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua003:350634:350707 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua003:350634:350707 [1] NCCL INFO comm 0xb8217e10 rank 1 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua090:2294098:2294098 [1] NCCL INFO cudaDriverVersion 12010
+gpua090:2294098:2294098 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.90<0>
+gpua090:2294098:2294098 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua090:2294098:2294187 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.90<0>
+gpua090:2294098:2294187 [1] NCCL INFO Using network IB
+gpua090:2294098:2294187 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua090:2294098:2294187 [1] NCCL INFO Trees [0] 58/52/-1->57->56 [1] 58/-1/-1->57->56
+gpua090:2294098:2294187 [1] NCCL INFO Channel 00/0 : 57[46000] -> 58[85000] via P2P/IPC/read
+gpua090:2294098:2294187 [1] NCCL INFO Channel 01/0 : 57[46000] -> 58[85000] via P2P/IPC/read
+gpua090:2294098:2294187 [1] NCCL INFO Connected all rings
+gpua090:2294098:2294187 [1] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [receive] via NET/IB/0
+gpua090:2294098:2294187 [1] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [send] via NET/IB/0
+gpua090:2294098:2294187 [1] NCCL INFO Channel 00/0 : 57[46000] -> 56[7000] via P2P/IPC/read
+gpua090:2294098:2294187 [1] NCCL INFO Channel 01/0 : 57[46000] -> 56[7000] via P2P/IPC/read
+gpua090:2294098:2294187 [1] NCCL INFO Connected all trees
+gpua090:2294098:2294187 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua090:2294098:2294187 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua090:2294098:2294187 [1] NCCL INFO comm 0xb9291470 rank 57 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua098:2101211:2101211 [3] NCCL INFO cudaDriverVersion 12010
+gpua098:2101211:2101211 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.98<0>
+gpua098:2101211:2101211 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua098:2101211:2101289 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.98<0>
+gpua098:2101211:2101289 [3] NCCL INFO Using network IB
+gpua098:2101211:2101289 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua098:2101211:2101289 [3] NCCL INFO Trees [0] -1/-1/-1->63->62 [1] -1/-1/-1->63->62
+gpua098:2101211:2101289 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0
+gpua098:2101211:2101289 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0
+gpua098:2101211:2101289 [3] NCCL INFO Connected all rings
+gpua098:2101211:2101289 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 62[85000] via P2P/IPC/read
+gpua098:2101211:2101289 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 62[85000] via P2P/IPC/read
+gpua098:2101211:2101289 [3] NCCL INFO Connected all trees
+gpua098:2101211:2101289 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua098:2101211:2101289 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua098:2101211:2101289 [3] NCCL INFO comm 0xb9e844a0 rank 63 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua003:350636:350636 [3] NCCL INFO cudaDriverVersion 12010
+gpua003:350636:350636 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.3<0>
+gpua003:350636:350636 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua003:350636:350708 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.3<0>
+gpua003:350636:350708 [3] NCCL INFO Using network IB
+gpua003:350636:350708 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua003:350636:350708 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2
+gpua003:350636:350708 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0
+gpua003:350636:350708 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0
+gpua003:350636:350708 [3] NCCL INFO Connected all rings
+gpua003:350636:350708 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC/read
+gpua003:350636:350708 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC/read
+gpua003:350636:350708 [3] NCCL INFO Connected all trees
+gpua003:350636:350708 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua003:350636:350708 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua003:350636:350708 [3] NCCL INFO comm 0x8b901f80 rank 3 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua010:1622002:1622002 [2] NCCL INFO cudaDriverVersion 12010
+gpua010:1622002:1622002 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.10<0>
+gpua010:1622002:1622002 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua010:1622002:1622073 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.10<0>
+gpua010:1622002:1622073 [2] NCCL INFO Using network IB
+gpua010:1622002:1622073 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua010:1622002:1622073 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9
+gpua010:1622002:1622073 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC/read
+gpua010:1622002:1622073 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC/read
+gpua010:1622002:1622073 [2] NCCL INFO Connected all rings
+gpua010:1622002:1622073 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC/read
+gpua010:1622002:1622073 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC/read
+gpua010:1622002:1622073 [2] NCCL INFO Connected all trees
+gpua010:1622002:1622073 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua010:1622002:1622073 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua010:1622002:1622073 [2] NCCL INFO comm 0x95597d0 rank 10 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua003:350633:350706 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.3<0>
+gpua003:350633:350706 [0] NCCL INFO Using network IB
+gpua003:350633:350706 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua003:350633:350706 [0] NCCL INFO Channel 00/02 :    0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19
+gpua003:350633:350706 [0] NCCL INFO Channel 01/02 :    0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19
+gpua003:350633:350706 [0] NCCL INFO Trees [0] 1/32/-1->0->-1 [1] 1/-1/-1->0->4
+gpua003:350633:350706 [0] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0
+gpua003:350633:350706 [0] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0
+gpua003:350633:350706 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC/read
+gpua003:350633:350706 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC/read
+gpua003:350633:350706 [0] NCCL INFO Connected all rings
+gpua003:350633:350706 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/IB/0
+gpua003:350633:350706 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [receive] via NET/IB/0
+gpua003:350633:350706 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [send] via NET/IB/0
+gpua003:350633:350706 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/IB/0
+gpua003:350633:350706 [0] NCCL INFO Connected all trees
+gpua003:350633:350706 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua003:350633:350706 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua003:350633:350706 [0] NCCL INFO comm 0x505c0d10 rank 0 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua003:350635:350635 [2] NCCL INFO cudaDriverVersion 12010
+gpua003:350635:350635 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.3<0>
+gpua003:350635:350635 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua003:350635:350709 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.3<0>
+gpua003:350635:350709 [2] NCCL INFO Using network IB
+gpua003:350635:350709 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua003:350635:350709 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1
+gpua003:350635:350709 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC/read
+gpua003:350635:350709 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC/read
+gpua003:350635:350709 [2] NCCL INFO Connected all rings
+gpua003:350635:350709 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC/read
+gpua003:350635:350709 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC/read
+gpua003:350635:350709 [2] NCCL INFO Connected all trees
+gpua003:350635:350709 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua003:350635:350709 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua003:350635:350709 [2] NCCL INFO comm 0xc165ff50 rank 2 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua090:2294097:2294097 [0] NCCL INFO cudaDriverVersion 12010
+gpua090:2294097:2294097 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.90<0>
+gpua090:2294097:2294097 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua090:2294097:2294188 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.90<0>
+gpua090:2294097:2294188 [0] NCCL INFO Using network IB
+gpua090:2294097:2294188 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua090:2294097:2294188 [0] NCCL INFO Trees [0] 57/60/-1->56->48 [1] 57/-1/-1->56->53
+gpua090:2294097:2294188 [0] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0
+gpua090:2294097:2294188 [0] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0
+gpua090:2294097:2294188 [0] NCCL INFO Channel 00/0 : 56[7000] -> 57[46000] via P2P/IPC/read
+gpua090:2294097:2294188 [0] NCCL INFO Channel 01/0 : 56[7000] -> 57[46000] via P2P/IPC/read
+gpua090:2294097:2294188 [0] NCCL INFO Connected all rings
+gpua090:2294097:2294188 [0] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [receive] via NET/IB/0
+gpua090:2294097:2294188 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [send] via NET/IB/0
+gpua090:2294097:2294188 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [receive] via NET/IB/0
+gpua090:2294097:2294188 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [send] via NET/IB/0
+gpua090:2294097:2294188 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [receive] via NET/IB/0
+gpua090:2294097:2294188 [0] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [send] via NET/IB/0
+gpua090:2294097:2294188 [0] NCCL INFO Connected all trees
+gpua090:2294097:2294188 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua090:2294097:2294188 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua090:2294097:2294188 [0] NCCL INFO comm 0x4ed27c50 rank 56 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua010:1622003:1622003 [3] NCCL INFO cudaDriverVersion 12010
+gpua010:1622003:1622003 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.10<0>
+gpua010:1622003:1622003 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua010:1622003:1622076 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.10<0>
+gpua010:1622003:1622076 [3] NCCL INFO Using network IB
+gpua010:1622003:1622076 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua010:1622003:1622076 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10
+gpua010:1622003:1622076 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0
+gpua010:1622003:1622076 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0
+gpua010:1622003:1622076 [3] NCCL INFO Connected all rings
+gpua010:1622003:1622076 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC/read
+gpua010:1622003:1622076 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC/read
+gpua010:1622003:1622076 [3] NCCL INFO Connected all trees
+gpua010:1622003:1622076 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua010:1622003:1622076 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua010:1622003:1622076 [3] NCCL INFO comm 0x9c22310 rank 11 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua010:1622000:1622000 [0] NCCL INFO cudaDriverVersion 12010
+gpua010:1622000:1622000 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.10<0>
+gpua010:1622000:1622000 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua010:1622000:1622074 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.10<0>
+gpua010:1622000:1622074 [0] NCCL INFO Using network IB
+gpua010:1622000:1622074 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua010:1622000:1622074 [0] NCCL INFO Trees [0] 9/12/-1->8->17 [1] 9/-1/-1->8->5
+gpua010:1622000:1622074 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0
+gpua010:1622000:1622074 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0
+gpua010:1622000:1622074 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC/read
+gpua010:1622000:1622074 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC/read
+gpua010:1622000:1622074 [0] NCCL INFO Connected all rings
+gpua010:1622000:1622074 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/IB/0
+gpua010:1622000:1622074 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/IB/0
+gpua010:1622000:1622074 [0] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [send] via NET/IB/0
+gpua010:1622000:1622074 [0] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [receive] via NET/IB/0
+gpua010:1622000:1622074 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/IB/0
+gpua010:1622000:1622074 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/IB/0
+gpua010:1622000:1622074 [0] NCCL INFO Connected all trees
+gpua010:1622000:1622074 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua010:1622000:1622074 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua010:1622000:1622074 [0] NCCL INFO comm 0xc2d78fd0 rank 8 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua010:1622001:1622001 [1] NCCL INFO cudaDriverVersion 12010
+gpua010:1622001:1622001 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.10<0>
+gpua010:1622001:1622001 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua010:1622001:1622075 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.10<0>
+gpua010:1622001:1622075 [1] NCCL INFO Using network IB
+gpua010:1622001:1622075 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua010:1622001:1622075 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8
+gpua010:1622001:1622075 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC/read
+gpua010:1622001:1622075 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC/read
+gpua010:1622001:1622075 [1] NCCL INFO Connected all rings
+gpua010:1622001:1622075 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/IB/0
+gpua010:1622001:1622075 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/IB/0
+gpua010:1622001:1622075 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC/read
+gpua010:1622001:1622075 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC/read
+gpua010:1622001:1622075 [1] NCCL INFO Connected all trees
+gpua010:1622001:1622075 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua010:1622001:1622075 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua010:1622001:1622075 [1] NCCL INFO comm 0x8e6a9490 rank 9 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua060:2854971:2854971 [3] NCCL INFO cudaDriverVersion 12010
+gpua060:2854971:2854971 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.60<0>
+gpua060:2854971:2854971 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua060:2854971:2855041 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.60<0>
+gpua060:2854971:2855041 [3] NCCL INFO Using network IB
+gpua060:2854971:2855041 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua060:2854971:2855041 [3] NCCL INFO Trees [0] -1/-1/-1->47->46 [1] -1/-1/-1->47->46
+gpua060:2854971:2855041 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0
+gpua060:2854971:2855041 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0
+gpua060:2854971:2855041 [3] NCCL INFO Connected all rings
+gpua060:2854971:2855041 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 46[85000] via P2P/IPC/read
+gpua060:2854971:2855041 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 46[85000] via P2P/IPC/read
+gpua025:63838:63838 [2] NCCL INFO cudaDriverVersion 12010
+gpua025:63838:63838 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.25<0>
+gpua025:63838:63838 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua025:63838:63912 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.25<0>
+gpua025:63838:63912 [2] NCCL INFO Using network IB
+gpua025:63838:63912 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua025:63838:63912 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13
+gpua025:63838:63912 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC/read
+gpua025:63838:63912 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC/read
+gpua025:63838:63912 [2] NCCL INFO Connected all rings
+gpua025:63838:63912 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC/read
+gpua025:63838:63912 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC/read
+gpua025:63838:63912 [2] NCCL INFO Connected all trees
+gpua060:2854971:2855041 [3] NCCL INFO Connected all trees
+gpua060:2854971:2855041 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua060:2854971:2855041 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua060:2854971:2855041 [3] NCCL INFO comm 0xb6f9a6a0 rank 47 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua025:63838:63912 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua025:63838:63912 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua025:63838:63912 [2] NCCL INFO comm 0xc1f876b0 rank 14 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua025:63837:63837 [1] NCCL INFO cudaDriverVersion 12010
+gpua025:63837:63837 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.25<0>
+gpua025:63837:63837 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua025:63837:63913 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.25<0>
+gpua025:63837:63913 [1] NCCL INFO Using network IB
+gpua025:63837:63913 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua025:63837:63913 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/20/-1->13->12
+gpua025:63837:63913 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC/read
+gpua025:63837:63913 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC/read
+gpua025:63837:63913 [1] NCCL INFO Connected all rings
+gpua025:63837:63913 [1] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [send] via NET/IB/0
+gpua025:63837:63913 [1] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [receive] via NET/IB/0
+gpua025:63837:63913 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC/read
+gpua025:63837:63913 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC/read
+gpua025:63837:63913 [1] NCCL INFO Connected all trees
+gpua025:63837:63913 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua025:63837:63913 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua025:63837:63913 [1] NCCL INFO comm 0xa196ac90 rank 13 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua025:63839:63839 [3] NCCL INFO cudaDriverVersion 12010
+gpua025:63839:63839 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.25<0>
+gpua025:63839:63839 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua025:63839:63914 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.25<0>
+gpua025:63839:63914 [3] NCCL INFO Using network IB
+gpua025:63839:63914 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua025:63839:63914 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14
+gpua025:63839:63914 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0
+gpua025:63839:63914 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0
+gpua025:63839:63914 [3] NCCL INFO Connected all rings
+gpua025:63839:63914 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC/read
+gpua025:63839:63914 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC/read
+gpua025:63839:63914 [3] NCCL INFO Connected all trees
+gpua025:63839:63914 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua025:63839:63914 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua025:63839:63914 [3] NCCL INFO comm 0xc1e534d0 rank 15 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua060:2854968:2854968 [0] NCCL INFO cudaDriverVersion 12010
+gpua060:2854968:2854968 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.60<0>
+gpua060:2854968:2854968 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua060:2854968:2855043 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.60<0>
+gpua060:2854968:2855043 [0] NCCL INFO Using network IB
+gpua060:2854968:2855043 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua060:2854968:2855043 [0] NCCL INFO Trees [0] 45/-1/-1->44->40 [1] 45/36/-1->44->29
+gpua060:2854968:2855043 [0] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0
+gpua060:2854968:2855043 [0] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0
+gpua060:2854968:2855043 [0] NCCL INFO Channel 00/0 : 44[7000] -> 45[46000] via P2P/IPC/read
+gpua060:2854968:2855043 [0] NCCL INFO Channel 01/0 : 44[7000] -> 45[46000] via P2P/IPC/read
+gpua060:2854968:2855043 [0] NCCL INFO Connected all rings
+gpua060:2854968:2855043 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [receive] via NET/IB/0
+gpua060:2854968:2855043 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [receive] via NET/IB/0
+gpua060:2854968:2855043 [0] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [receive] via NET/IB/0
+gpua060:2854968:2855043 [0] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [send] via NET/IB/0
+gpua060:2854968:2855043 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [send] via NET/IB/0
+gpua060:2854968:2855043 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [send] via NET/IB/0
+gpua060:2854968:2855043 [0] NCCL INFO Connected all trees
+gpua060:2854968:2855043 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua060:2854968:2855043 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua060:2854968:2855043 [0] NCCL INFO comm 0x9da77350 rank 44 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua060:2854970:2854970 [2] NCCL INFO cudaDriverVersion 12010
+gpua060:2854970:2854970 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.60<0>
+gpua060:2854970:2854970 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua060:2854970:2855044 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.60<0>
+gpua060:2854970:2855044 [2] NCCL INFO Using network IB
+gpua060:2854970:2855044 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua060:2854970:2855044 [2] NCCL INFO Trees [0] 47/-1/-1->46->45 [1] 47/-1/-1->46->45
+gpua060:2854970:2855044 [2] NCCL INFO Channel 00/0 : 46[85000] -> 47[c7000] via P2P/IPC/read
+gpua060:2854970:2855044 [2] NCCL INFO Channel 01/0 : 46[85000] -> 47[c7000] via P2P/IPC/read
+gpua060:2854970:2855044 [2] NCCL INFO Connected all rings
+gpua060:2854970:2855044 [2] NCCL INFO Channel 00/0 : 46[85000] -> 45[46000] via P2P/IPC/read
+gpua060:2854970:2855044 [2] NCCL INFO Channel 01/0 : 46[85000] -> 45[46000] via P2P/IPC/read
+gpua060:2854970:2855044 [2] NCCL INFO Connected all trees
+gpua060:2854970:2855044 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua060:2854970:2855044 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua060:2854970:2855044 [2] NCCL INFO comm 0xb4b68d30 rank 46 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua025:63836:63836 [0] NCCL INFO cudaDriverVersion 12010
+gpua025:63836:63836 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.25<0>
+gpua025:63836:63836 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua025:63836:63915 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.25<0>
+gpua025:63836:63915 [0] NCCL INFO Using network IB
+gpua025:63836:63915 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua025:63836:63915 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->28
+gpua025:63836:63915 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0
+gpua025:63836:63915 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0
+gpua025:63836:63915 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC/read
+gpua025:63836:63915 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC/read
+gpua025:63836:63915 [0] NCCL INFO Connected all rings
+gpua025:63836:63915 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/IB/0
+gpua025:63836:63915 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/IB/0
+gpua025:63836:63915 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [send] via NET/IB/0
+gpua025:63836:63915 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [receive] via NET/IB/0
+gpua025:63836:63915 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/IB/0
+gpua025:63836:63915 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/IB/0
+gpua025:63836:63915 [0] NCCL INFO Connected all trees
+gpua025:63836:63915 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua025:63836:63915 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua025:63836:63915 [0] NCCL INFO comm 0x1772ec20 rank 12 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua060:2854969:2854969 [1] NCCL INFO cudaDriverVersion 12010
+gpua060:2854969:2854969 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.60<0>
+gpua060:2854969:2854969 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua060:2854969:2855042 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.60<0>
+gpua060:2854969:2855042 [1] NCCL INFO Using network IB
+gpua060:2854969:2855042 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua060:2854969:2855042 [1] NCCL INFO Trees [0] 46/-1/-1->45->44 [1] 46/52/-1->45->44
+gpua060:2854969:2855042 [1] NCCL INFO Channel 00/0 : 45[46000] -> 46[85000] via P2P/IPC/read
+gpua060:2854969:2855042 [1] NCCL INFO Channel 01/0 : 45[46000] -> 46[85000] via P2P/IPC/read
+gpua060:2854969:2855042 [1] NCCL INFO Connected all rings
+gpua060:2854969:2855042 [1] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [send] via NET/IB/0
+gpua060:2854969:2855042 [1] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [receive] via NET/IB/0
+gpua060:2854969:2855042 [1] NCCL INFO Channel 00/0 : 45[46000] -> 44[7000] via P2P/IPC/read
+gpua060:2854969:2855042 [1] NCCL INFO Channel 01/0 : 45[46000] -> 44[7000] via P2P/IPC/read
+gpua060:2854969:2855042 [1] NCCL INFO Connected all trees
+gpua060:2854969:2855042 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua060:2854969:2855042 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua060:2854969:2855042 [1] NCCL INFO comm 0x8c2cb6d0 rank 45 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua005:322787:322787 [2] NCCL INFO cudaDriverVersion 12010
+gpua005:322787:322787 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.5<0>
+gpua005:322787:322787 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua005:322787:322863 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.5<0>
+gpua005:322787:322863 [2] NCCL INFO Using network IB
+gpua005:322787:322863 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua005:322787:322863 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5
+gpua005:322787:322863 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC/read
+gpua005:322787:322863 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC/read
+gpua005:322787:322863 [2] NCCL INFO Connected all rings
+gpua005:322787:322863 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC/read
+gpua005:322787:322863 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC/read
+gpua005:322787:322863 [2] NCCL INFO Connected all trees
+gpua005:322787:322863 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua005:322787:322863 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua005:322787:322863 [2] NCCL INFO comm 0xa671d450 rank 6 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua005:322788:322788 [3] NCCL INFO cudaDriverVersion 12010
+gpua005:322788:322788 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.5<0>
+gpua005:322788:322788 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua005:322788:322860 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.5<0>
+gpua005:322788:322860 [3] NCCL INFO Using network IB
+gpua005:322788:322860 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua005:322788:322860 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6
+gpua005:322788:322860 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0
+gpua005:322788:322860 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0
+gpua005:322788:322860 [3] NCCL INFO Connected all rings
+gpua005:322788:322860 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC/read
+gpua005:322788:322860 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC/read
+gpua005:322788:322860 [3] NCCL INFO Connected all trees
+gpua005:322788:322860 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua005:322788:322860 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua005:322788:322860 [3] NCCL INFO comm 0xb7586590 rank 7 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua005:322785:322785 [0] NCCL INFO cudaDriverVersion 12010
+gpua005:322785:322785 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.5<0>
+gpua005:322785:322785 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua005:322785:322861 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.5<0>
+gpua005:322785:322861 [0] NCCL INFO Using network IB
+gpua005:322785:322861 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua005:322785:322861 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12
+gpua005:322785:322861 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0
+gpua005:322785:322861 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0
+gpua005:322785:322861 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC/read
+gpua005:322785:322861 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC/read
+gpua005:322785:322861 [0] NCCL INFO Connected all rings
+gpua005:322785:322861 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/IB/0
+gpua005:322785:322861 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/IB/0
+gpua005:322785:322861 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/IB/0
+gpua005:322785:322861 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/IB/0
+gpua005:322785:322861 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/IB/0
+gpua005:322785:322861 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/IB/0
+gpua005:322785:322861 [0] NCCL INFO Connected all trees
+gpua005:322785:322861 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua005:322785:322861 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua005:322785:322861 [0] NCCL INFO comm 0xbdcfe00 rank 4 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua005:322786:322786 [1] NCCL INFO cudaDriverVersion 12010
+gpua005:322786:322786 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.5<0>
+gpua005:322786:322786 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua005:322786:322862 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.5<0>
+gpua005:322786:322862 [1] NCCL INFO Using network IB
+gpua005:322786:322862 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua005:322786:322862 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4
+gpua005:322786:322862 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC/read
+gpua005:322786:322862 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC/read
+gpua005:322786:322862 [1] NCCL INFO Connected all rings
+gpua005:322786:322862 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/IB/0
+gpua005:322786:322862 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/IB/0
+gpua005:322786:322862 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC/read
+gpua005:322786:322862 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC/read
+gpua005:322786:322862 [1] NCCL INFO Connected all trees
+gpua005:322786:322862 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua005:322786:322862 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua005:322786:322862 [1] NCCL INFO comm 0x9e527b50 rank 5 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua035:1685218:1685218 [2] NCCL INFO cudaDriverVersion 12010
+gpua035:1685218:1685218 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.35<0>
+gpua035:1685218:1685218 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua035:1685218:1685292 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.35<0>
+gpua035:1685218:1685292 [2] NCCL INFO Using network IB
+gpua035:1685218:1685292 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua035:1685218:1685292 [2] NCCL INFO Trees [0] 31/-1/-1->30->29 [1] 31/-1/-1->30->29
+gpua035:1685218:1685292 [2] NCCL INFO Channel 00/0 : 30[85000] -> 31[c7000] via P2P/IPC/read
+gpua035:1685218:1685292 [2] NCCL INFO Channel 01/0 : 30[85000] -> 31[c7000] via P2P/IPC/read
+gpua035:1685218:1685292 [2] NCCL INFO Connected all rings
+gpua035:1685218:1685292 [2] NCCL INFO Channel 00/0 : 30[85000] -> 29[46000] via P2P/IPC/read
+gpua035:1685218:1685292 [2] NCCL INFO Channel 01/0 : 30[85000] -> 29[46000] via P2P/IPC/read
+gpua035:1685218:1685292 [2] NCCL INFO Connected all trees
+gpua035:1685218:1685292 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua035:1685218:1685292 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua035:1685218:1685292 [2] NCCL INFO comm 0x5149e590 rank 30 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua035:1685217:1685217 [1] NCCL INFO cudaDriverVersion 12010
+gpua035:1685217:1685217 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.35<0>
+gpua035:1685217:1685217 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua035:1685217:1685295 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.35<0>
+gpua035:1685217:1685295 [1] NCCL INFO Using network IB
+gpua035:1685217:1685295 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua035:1685217:1685295 [1] NCCL INFO Trees [0] 30/-1/-1->29->28 [1] 30/44/-1->29->28
+gpua035:1685217:1685295 [1] NCCL INFO Channel 00/0 : 29[46000] -> 30[85000] via P2P/IPC/read
+gpua035:1685217:1685295 [1] NCCL INFO Channel 01/0 : 29[46000] -> 30[85000] via P2P/IPC/read
+gpua035:1685217:1685295 [1] NCCL INFO Connected all rings
+gpua035:1685217:1685295 [1] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [send] via NET/IB/0
+gpua035:1685217:1685295 [1] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [receive] via NET/IB/0
+gpua035:1685217:1685295 [1] NCCL INFO Channel 00/0 : 29[46000] -> 28[7000] via P2P/IPC/read
+gpua035:1685217:1685295 [1] NCCL INFO Channel 01/0 : 29[46000] -> 28[7000] via P2P/IPC/read
+gpua035:1685217:1685295 [1] NCCL INFO Connected all trees
+gpua035:1685217:1685295 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua035:1685217:1685295 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua035:1685217:1685295 [1] NCCL INFO comm 0x94073350 rank 29 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua035:1685219:1685219 [3] NCCL INFO cudaDriverVersion 12010
+gpua035:1685219:1685219 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.35<0>
+gpua035:1685219:1685219 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua035:1685219:1685293 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.35<0>
+gpua035:1685219:1685293 [3] NCCL INFO Using network IB
+gpua035:1685219:1685293 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua035:1685219:1685293 [3] NCCL INFO Trees [0] -1/-1/-1->31->30 [1] -1/-1/-1->31->30
+gpua035:1685219:1685293 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0
+gpua035:1685219:1685293 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0
+gpua035:1685219:1685293 [3] NCCL INFO Connected all rings
+gpua035:1685219:1685293 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 30[85000] via P2P/IPC/read
+gpua035:1685219:1685293 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 30[85000] via P2P/IPC/read
+gpua035:1685219:1685293 [3] NCCL INFO Connected all trees
+gpua035:1685219:1685293 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua035:1685219:1685293 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua035:1685219:1685293 [3] NCCL INFO comm 0x9d08f8e0 rank 31 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua035:1685216:1685216 [0] NCCL INFO cudaDriverVersion 12010
+gpua035:1685216:1685216 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.35<0>
+gpua035:1685216:1685216 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua035:1685216:1685294 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.35<0>
+gpua035:1685216:1685294 [0] NCCL INFO Using network IB
+gpua035:1685216:1685294 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua035:1685216:1685294 [0] NCCL INFO Trees [0] 29/-1/-1->28->24 [1] 29/12/-1->28->60
+gpua035:1685216:1685294 [0] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0
+gpua035:1685216:1685294 [0] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0
+gpua035:1685216:1685294 [0] NCCL INFO Channel 00/0 : 28[7000] -> 29[46000] via P2P/IPC/read
+gpua035:1685216:1685294 [0] NCCL INFO Channel 01/0 : 28[7000] -> 29[46000] via P2P/IPC/read
+gpua035:1685216:1685294 [0] NCCL INFO Connected all rings
+gpua035:1685216:1685294 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [receive] via NET/IB/0
+gpua035:1685216:1685294 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [receive] via NET/IB/0
+gpua035:1685216:1685294 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [receive] via NET/IB/0
+gpua035:1685216:1685294 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [send] via NET/IB/0
+gpua035:1685216:1685294 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [send] via NET/IB/0
+gpua035:1685216:1685294 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [send] via NET/IB/0
+gpua035:1685216:1685294 [0] NCCL INFO Connected all trees
+gpua035:1685216:1685294 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua035:1685216:1685294 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua035:1685216:1685294 [0] NCCL INFO comm 0x8b5a90d0 rank 28 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[gpua003:0/64] 2023-07-05 22:46:27,131 (trainer:732) INFO: 14epoch:train:1-100batch: iter_time=1.256, forward_time=0.181, loss_ctc=67.478, loss_att=50.061, acc=0.683, loss=55.286, backward_time=0.765, grad_norm=84.127, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.113, optim0_lr0=1.020e-04, train_time=5.776
+[gpua003:0/64] 2023-07-05 22:48:06,129 (trainer:732) INFO: 14epoch:train:101-200batch: iter_time=1.036e-04, forward_time=0.104, loss_ctc=76.315, loss_att=60.252, acc=0.659, loss=65.071, backward_time=0.747, grad_norm=106.131, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.020e-04, train_time=1.980
+[gpua003:0/64] 2023-07-05 22:49:44,933 (trainer:732) INFO: 14epoch:train:201-300batch: iter_time=1.112e-04, forward_time=0.104, loss_ctc=71.342, loss_att=53.820, acc=0.681, loss=59.077, backward_time=0.744, grad_norm=88.859, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.109, optim0_lr0=1.020e-04, train_time=1.976
+[gpua003:0/64] 2023-07-05 22:51:23,717 (trainer:732) INFO: 14epoch:train:301-400batch: iter_time=1.061e-04, forward_time=0.104, loss_ctc=74.278, loss_att=54.336, acc=0.672, loss=60.318, backward_time=0.744, grad_norm=83.344, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.019e-04, train_time=1.975
+[gpua003:0/64] 2023-07-05 22:53:02,383 (trainer:732) INFO: 14epoch:train:401-500batch: iter_time=9.902e-05, forward_time=0.104, loss_ctc=73.819, loss_att=59.568, acc=0.675, loss=63.843, backward_time=0.745, grad_norm=90.212, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.019e-04, train_time=1.973
+[gpua003:0/64] 2023-07-05 22:54:41,341 (trainer:732) INFO: 14epoch:train:501-600batch: iter_time=9.856e-05, forward_time=0.105, loss_ctc=67.201, loss_att=54.515, acc=0.666, loss=58.321, backward_time=0.746, grad_norm=85.901, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.018e-04, train_time=1.979
+[gpua003:0/64] 2023-07-05 22:56:20,255 (trainer:732) INFO: 14epoch:train:601-700batch: iter_time=9.906e-05, forward_time=0.105, loss_ctc=79.513, loss_att=65.577, acc=0.663, loss=69.758, backward_time=0.745, grad_norm=91.560, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.018e-04, train_time=1.978
+[gpua003:0/64] 2023-07-05 22:58:13,995 (trainer:732) INFO: 14epoch:train:701-800batch: iter_time=1.052e-04, forward_time=0.104, loss_ctc=86.261, loss_att=57.434, acc=0.687, loss=66.082, backward_time=0.756, grad_norm=111.598, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.017e-04, train_time=2.275
+[gpua003:0/64] 2023-07-05 22:59:03,798 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpua003:0/64] 2023-07-05 22:59:22,487 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-05 22:59:25,999 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f95d55e6230>)
+[gpua003:0/64] 2023-07-05 22:59:26,000 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpua003:0/64] 2023-07-05 22:59:26,006 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-05 23:04:47,145 (trainer:732) INFO: 14epoch:train:801-900batch: iter_time=1.366, forward_time=0.106, loss_ctc=79.493, loss_att=56.962, acc=0.683, loss=63.721, backward_time=0.769, grad_norm=96.755, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.017e-04, train_time=7.863
+[gpua003:0/64] 2023-07-05 23:06:27,057 (trainer:732) INFO: 14epoch:train:901-1000batch: iter_time=1.071e-04, forward_time=0.107, loss_ctc=77.587, loss_att=65.112, acc=0.669, loss=68.855, backward_time=0.749, grad_norm=96.509, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.017e-04, train_time=1.998
+[gpua003:0/64] 2023-07-05 23:08:06,608 (trainer:732) INFO: 14epoch:train:1001-1100batch: iter_time=1.219e-04, forward_time=0.108, loss_ctc=69.740, loss_att=52.983, acc=0.695, loss=58.010, backward_time=0.747, grad_norm=83.010, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.016e-04, train_time=1.991
+[gpua003:0/64] 2023-07-05 23:09:46,058 (trainer:732) INFO: 14epoch:train:1101-1200batch: iter_time=9.900e-05, forward_time=0.107, loss_ctc=71.265, loss_att=51.955, acc=0.681, loss=57.748, backward_time=0.747, grad_norm=86.780, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.016e-04, train_time=1.989
+[gpua003:0/64] 2023-07-05 23:11:25,939 (trainer:732) INFO: 14epoch:train:1201-1300batch: iter_time=1.090e-04, forward_time=0.107, loss_ctc=72.245, loss_att=58.430, acc=0.682, loss=62.574, backward_time=0.748, grad_norm=82.034, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.015e-04, train_time=1.997
+[gpua003:0/64] 2023-07-05 23:13:05,201 (trainer:732) INFO: 14epoch:train:1301-1400batch: iter_time=1.137e-04, forward_time=0.106, loss_ctc=68.613, loss_att=56.255, acc=0.673, loss=59.963, backward_time=0.746, grad_norm=90.540, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.015e-04, train_time=1.985
+[gpua003:0/64] 2023-07-05 23:14:44,608 (trainer:732) INFO: 14epoch:train:1401-1500batch: iter_time=1.156e-04, forward_time=0.107, loss_ctc=75.103, loss_att=62.936, acc=0.675, loss=66.586, backward_time=0.747, grad_norm=91.101, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.014e-04, train_time=1.988
+[gpua003:0/64] 2023-07-05 23:16:23,886 (trainer:732) INFO: 14epoch:train:1501-1600batch: iter_time=1.146e-04, forward_time=0.107, loss_ctc=85.725, loss_att=59.152, acc=0.686, loss=67.124, backward_time=0.748, grad_norm=323.443, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.014e-04, train_time=1.985
+[gpua003:0/64] 2023-07-05 23:17:31,811 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpua003:0/64] 2023-07-05 23:17:50,852 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-05 23:17:54,347 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f977ee26890>)
+[gpua003:0/64] 2023-07-05 23:17:54,348 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpua003:0/64] 2023-07-05 23:17:54,354 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-05 23:22:17,852 (trainer:732) INFO: 14epoch:train:1601-1700batch: iter_time=1.315, forward_time=0.107, loss_ctc=91.275, loss_att=63.464, acc=0.682, loss=71.807, backward_time=0.761, grad_norm=113.989, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.014e-04, train_time=7.079
+[gpua003:0/64] 2023-07-05 23:23:57,809 (trainer:732) INFO: 14epoch:train:1701-1800batch: iter_time=1.095e-04, forward_time=0.106, loss_ctc=65.126, loss_att=52.331, acc=0.668, loss=56.169, backward_time=0.746, grad_norm=83.364, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.013e-04, train_time=1.999
+[gpua003:0/64] 2023-07-05 23:25:37,561 (trainer:732) INFO: 14epoch:train:1801-1900batch: iter_time=1.136e-04, forward_time=0.105, loss_ctc=79.242, loss_att=61.858, acc=0.676, loss=67.073, backward_time=0.744, grad_norm=97.701, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.013e-04, train_time=1.995
+[gpua003:0/64] 2023-07-05 23:27:17,066 (trainer:732) INFO: 14epoch:train:1901-2000batch: iter_time=1.351e-04, forward_time=0.108, loss_ctc=65.033, loss_att=46.741, acc=0.696, loss=52.229, backward_time=0.748, grad_norm=72.732, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.012e-04, train_time=1.990
+[gpua003:0/64] 2023-07-05 23:28:56,259 (trainer:732) INFO: 14epoch:train:2001-2100batch: iter_time=1.376e-04, forward_time=0.107, loss_ctc=74.297, loss_att=55.693, acc=0.679, loss=61.274, backward_time=0.747, grad_norm=81.546, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.111, optim0_lr0=1.012e-04, train_time=1.984
+[gpua003:0/64] 2023-07-05 23:30:35,698 (trainer:732) INFO: 14epoch:train:2101-2200batch: iter_time=1.153e-04, forward_time=0.107, loss_ctc=69.547, loss_att=58.660, acc=0.668, loss=61.926, backward_time=0.747, grad_norm=98.590, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.012e-04, train_time=1.989
+[gpua003:0/64] 2023-07-05 23:32:14,941 (trainer:732) INFO: 14epoch:train:2201-2300batch: iter_time=1.191e-04, forward_time=0.105, loss_ctc=70.991, loss_att=61.134, acc=0.665, loss=64.091, backward_time=0.745, grad_norm=88.986, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.109, optim0_lr0=1.011e-04, train_time=1.985
+[gpua003:0/64] 2023-07-05 23:33:54,151 (trainer:732) INFO: 14epoch:train:2301-2400batch: iter_time=1.136e-04, forward_time=0.106, loss_ctc=78.668, loss_att=55.499, acc=0.691, loss=62.450, backward_time=0.744, grad_norm=94.472, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.109, optim0_lr0=1.011e-04, train_time=1.984
+[gpua003:0/64] 2023-07-05 23:35:33,588 (trainer:732) INFO: 14epoch:train:2401-2500batch: iter_time=1.069e-04, forward_time=0.106, loss_ctc=89.577, loss_att=66.586, acc=0.668, loss=73.483, backward_time=0.746, grad_norm=115.780, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.109, optim0_lr0=1.010e-04, train_time=1.989
+[gpua003:0/64] 2023-07-05 23:35:35,877 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpua003:0/64] 2023-07-05 23:35:54,848 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-05 23:35:58,368 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9520e5fa60>)
+[gpua003:0/64] 2023-07-05 23:35:58,368 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpua003:0/64] 2023-07-05 23:35:58,374 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-05 23:42:19,890 (trainer:732) INFO: 14epoch:train:2501-2600batch: iter_time=1.276, forward_time=0.106, loss_ctc=65.318, loss_att=47.624, acc=0.706, loss=52.932, backward_time=0.758, grad_norm=76.861, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.010e-04, train_time=8.126
+[gpua003:0/64] 2023-07-05 23:44:00,174 (trainer:732) INFO: 14epoch:train:2601-2700batch: iter_time=1.018e-04, forward_time=0.106, loss_ctc=74.292, loss_att=60.131, acc=0.672, loss=64.379, backward_time=0.748, grad_norm=91.235, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.010e-04, train_time=2.005
+[gpua003:0/64] 2023-07-05 23:45:39,635 (trainer:732) INFO: 14epoch:train:2701-2800batch: iter_time=9.909e-05, forward_time=0.106, loss_ctc=70.460, loss_att=53.646, acc=0.696, loss=58.690, backward_time=0.747, grad_norm=76.382, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.009e-04, train_time=1.989
+[gpua003:0/64] 2023-07-05 23:47:18,986 (trainer:732) INFO: 14epoch:train:2801-2900batch: iter_time=9.455e-05, forward_time=0.107, loss_ctc=72.107, loss_att=52.607, acc=0.679, loss=58.457, backward_time=0.746, grad_norm=92.055, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.009e-04, train_time=1.987
+[gpua003:0/64] 2023-07-05 23:48:58,340 (trainer:732) INFO: 14epoch:train:2901-3000batch: iter_time=8.975e-05, forward_time=0.106, loss_ctc=71.947, loss_att=58.354, acc=0.687, loss=62.432, backward_time=0.747, grad_norm=87.978, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.008e-04, train_time=1.987
+[gpua003:0/64] 2023-07-05 23:50:37,583 (trainer:732) INFO: 14epoch:train:3001-3100batch: iter_time=1.174e-04, forward_time=0.106, loss_ctc=67.838, loss_att=54.060, acc=0.680, loss=58.194, backward_time=0.746, grad_norm=87.646, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.008e-04, train_time=1.985
+[gpua003:0/64] 2023-07-05 23:52:16,968 (trainer:732) INFO: 14epoch:train:3101-3200batch: iter_time=9.872e-05, forward_time=0.106, loss_ctc=78.444, loss_att=65.330, acc=0.674, loss=69.264, backward_time=0.747, grad_norm=89.372, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.007e-04, train_time=1.987
+[gpua003:0/64] 2023-07-05 23:53:56,336 (trainer:732) INFO: 14epoch:train:3201-3300batch: iter_time=1.046e-04, forward_time=0.106, loss_ctc=84.969, loss_att=57.606, acc=0.694, loss=65.815, backward_time=0.746, grad_norm=110.526, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.007e-04, train_time=1.987
+[gpua003:0/64] 2023-07-05 23:54:31,552 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpua003:0/64] 2023-07-05 23:54:50,698 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-05 23:54:54,271 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f952a422d10>)
+[gpua003:0/64] 2023-07-05 23:54:54,271 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpua003:0/64] 2023-07-05 23:54:54,277 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-05 23:59:43,333 (trainer:732) INFO: 14epoch:train:3301-3400batch: iter_time=1.303, forward_time=0.146, loss_ctc=74.612, loss_att=55.066, acc=0.684, loss=60.930, backward_time=0.760, grad_norm=98.484, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.111, optim0_lr0=1.007e-04, train_time=6.939
+[gpua003:0/64] 2023-07-06 00:01:23,508 (trainer:732) INFO: 14epoch:train:3401-3500batch: iter_time=9.903e-05, forward_time=0.106, loss_ctc=79.257, loss_att=64.273, acc=0.665, loss=68.768, backward_time=0.747, grad_norm=97.943, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.109, optim0_lr0=1.006e-04, train_time=2.004
+[gpua003:0/64] 2023-07-06 00:03:02,774 (trainer:732) INFO: 14epoch:train:3501-3600batch: iter_time=1.067e-04, forward_time=0.105, loss_ctc=69.162, loss_att=52.606, acc=0.690, loss=57.572, backward_time=0.744, grad_norm=91.125, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.109, optim0_lr0=1.006e-04, train_time=1.985
+[gpua003:0/64] 2023-07-06 00:04:42,112 (trainer:732) INFO: 14epoch:train:3601-3700batch: iter_time=1.060e-04, forward_time=0.105, loss_ctc=67.528, loss_att=48.483, acc=0.692, loss=54.197, backward_time=0.746, grad_norm=77.302, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.109, optim0_lr0=1.005e-04, train_time=1.987
+[gpua003:0/64] 2023-07-06 00:06:21,444 (trainer:732) INFO: 14epoch:train:3701-3800batch: iter_time=1.056e-04, forward_time=0.105, loss_ctc=70.824, loss_att=56.721, acc=0.686, loss=60.952, backward_time=0.745, grad_norm=88.267, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.005e-04, train_time=1.986
+[gpua003:0/64] 2023-07-06 00:08:00,672 (trainer:732) INFO: 14epoch:train:3801-3900batch: iter_time=1.154e-04, forward_time=0.106, loss_ctc=64.923, loss_att=53.550, acc=0.673, loss=56.962, backward_time=0.746, grad_norm=87.728, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.005e-04, train_time=1.984
+[gpua003:0/64] 2023-07-06 00:09:40,070 (trainer:732) INFO: 14epoch:train:3901-4000batch: iter_time=1.270e-04, forward_time=0.106, loss_ctc=73.750, loss_att=61.788, acc=0.673, loss=65.377, backward_time=0.746, grad_norm=99.456, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.004e-04, train_time=1.988
+[gpua003:0/64] 2023-07-06 00:11:19,231 (trainer:732) INFO: 14epoch:train:4001-4100batch: iter_time=1.074e-04, forward_time=0.105, loss_ctc=83.056, loss_att=57.768, acc=0.689, loss=65.354, backward_time=0.744, grad_norm=118.212, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=1.004e-04, train_time=1.983
+[gpua003:0/64] 2023-07-06 00:12:39,831 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpua003:0/64] 2023-07-06 00:12:59,113 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 00:13:02,670 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9520eb7610>)
+[gpua003:0/64] 2023-07-06 00:13:02,670 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpua003:0/64] 2023-07-06 00:13:02,687 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 00:16:52,797 (trainer:732) INFO: 14epoch:train:4101-4200batch: iter_time=2.223, forward_time=0.105, loss_ctc=86.387, loss_att=61.491, acc=0.682, loss=68.960, backward_time=0.756, grad_norm=109.255, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=1.003e-04, train_time=6.671
+[gpua003:0/64] 2023-07-06 00:18:32,838 (trainer:732) INFO: 14epoch:train:4201-4300batch: iter_time=9.583e-05, forward_time=0.105, loss_ctc=66.113, loss_att=52.338, acc=0.676, loss=56.471, backward_time=0.749, grad_norm=87.547, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=1.003e-04, train_time=2.001
+[gpua003:0/64] 2023-07-06 00:20:12,105 (trainer:732) INFO: 14epoch:train:4301-4400batch: iter_time=9.179e-05, forward_time=0.105, loss_ctc=74.526, loss_att=56.969, acc=0.689, loss=62.236, backward_time=0.745, grad_norm=94.528, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=1.003e-04, train_time=1.985
+[gpua003:0/64] 2023-07-06 00:21:52,782 (trainer:732) INFO: 14epoch:train:4401-4500batch: iter_time=1.001e-04, forward_time=0.105, loss_ctc=67.853, loss_att=50.469, acc=0.683, loss=55.684, backward_time=0.746, grad_norm=71.313, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=1.002e-04, train_time=2.013
+[gpua003:0/64] 2023-07-06 00:23:34,997 (trainer:732) INFO: 14epoch:train:4501-4600batch: iter_time=1.021e-04, forward_time=0.105, loss_ctc=70.434, loss_att=50.423, acc=0.692, loss=56.426, backward_time=0.747, grad_norm=81.773, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=1.002e-04, train_time=2.044
+[gpua003:0/64] 2023-07-06 00:25:14,224 (trainer:732) INFO: 14epoch:train:4601-4700batch: iter_time=9.233e-05, forward_time=0.104, loss_ctc=72.767, loss_att=60.956, acc=0.675, loss=64.499, backward_time=0.745, grad_norm=91.196, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=1.001e-04, train_time=1.984
+[gpua003:0/64] 2023-07-06 00:26:53,508 (trainer:732) INFO: 14epoch:train:4701-4800batch: iter_time=9.383e-05, forward_time=0.105, loss_ctc=67.376, loss_att=54.770, acc=0.670, loss=58.552, backward_time=0.746, grad_norm=96.216, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=1.001e-04, train_time=1.985
+[gpua003:0/64] 2023-07-06 00:28:32,809 (trainer:732) INFO: 14epoch:train:4801-4900batch: iter_time=1.024e-04, forward_time=0.105, loss_ctc=78.619, loss_att=59.814, acc=0.681, loss=65.456, backward_time=0.745, grad_norm=92.282, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=1.001e-04, train_time=1.986
+[gpua003:0/64] 2023-07-06 00:30:12,054 (trainer:732) INFO: 14epoch:train:4901-5000batch: iter_time=1.019e-04, forward_time=0.106, loss_ctc=85.585, loss_att=63.122, acc=0.681, loss=69.861, backward_time=0.744, grad_norm=103.405, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=1.000e-04, train_time=1.985
+[gpua003:0/64] 2023-07-06 00:30:14,272 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpua003:0/64] 2023-07-06 00:30:33,230 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 00:30:36,750 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f950da09060>)
+[gpua003:0/64] 2023-07-06 00:30:36,750 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpua003:0/64] 2023-07-06 00:30:36,757 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 00:35:36,135 (trainer:732) INFO: 14epoch:train:5001-5100batch: iter_time=1.279, forward_time=0.105, loss_ctc=64.965, loss_att=47.311, acc=0.710, loss=52.607, backward_time=0.755, grad_norm=80.711, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.998e-05, train_time=6.481
+[gpua003:0/64] 2023-07-06 00:37:16,313 (trainer:732) INFO: 14epoch:train:5101-5200batch: iter_time=1.019e-04, forward_time=0.105, loss_ctc=71.440, loss_att=58.275, acc=0.679, loss=62.225, backward_time=0.746, grad_norm=92.300, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.994e-05, train_time=2.003
+[gpua003:0/64] 2023-07-06 00:38:55,753 (trainer:732) INFO: 14epoch:train:5201-5300batch: iter_time=9.692e-05, forward_time=0.106, loss_ctc=68.311, loss_att=51.470, acc=0.704, loss=56.522, backward_time=0.746, grad_norm=109.860, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.990e-05, train_time=1.989
+[gpua003:0/64] 2023-07-06 00:40:35,087 (trainer:732) INFO: 14epoch:train:5301-5400batch: iter_time=9.289e-05, forward_time=0.106, loss_ctc=71.699, loss_att=52.171, acc=0.683, loss=58.029, backward_time=0.746, grad_norm=87.463, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.986e-05, train_time=1.986
+[gpua003:0/64] 2023-07-06 00:42:14,290 (trainer:732) INFO: 14epoch:train:5401-5500batch: iter_time=1.061e-04, forward_time=0.105, loss_ctc=71.095, loss_att=58.813, acc=0.688, loss=62.498, backward_time=0.744, grad_norm=80.801, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.982e-05, train_time=1.984
+[gpua003:0/64] 2023-07-06 00:43:53,453 (trainer:732) INFO: 14epoch:train:5501-5600batch: iter_time=1.011e-04, forward_time=0.105, loss_ctc=65.967, loss_att=52.829, acc=0.683, loss=56.770, backward_time=0.743, grad_norm=80.073, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.978e-05, train_time=1.983
+[gpua003:0/64] 2023-07-06 00:45:32,694 (trainer:732) INFO: 14epoch:train:5601-5700batch: iter_time=9.394e-05, forward_time=0.105, loss_ctc=76.086, loss_att=65.998, acc=0.677, loss=69.024, backward_time=0.745, grad_norm=84.296, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.974e-05, train_time=1.985
+[gpua003:0/64] 2023-07-06 00:47:11,756 (trainer:732) INFO: 14epoch:train:5701-5800batch: iter_time=1.005e-04, forward_time=0.104, loss_ctc=80.981, loss_att=57.081, acc=0.692, loss=64.251, backward_time=0.745, grad_norm=117.230, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.970e-05, train_time=1.981
+[gpua003:0/64] 2023-07-06 00:47:46,785 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpua003:0/64] 2023-07-06 00:48:05,668 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 00:48:09,217 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f95201cb610>)
+[gpua003:0/64] 2023-07-06 00:48:09,217 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpua003:0/64] 2023-07-06 00:48:09,223 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 00:51:56,399 (trainer:732) INFO: 14epoch:train:5801-5900batch: iter_time=1.268, forward_time=0.105, loss_ctc=70.851, loss_att=51.948, acc=0.699, loss=57.619, backward_time=0.758, grad_norm=95.312, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.966e-05, train_time=5.693
+[gpua003:0/64] 2023-07-06 00:53:36,326 (trainer:732) INFO: 14epoch:train:5901-6000batch: iter_time=9.412e-05, forward_time=0.105, loss_ctc=74.049, loss_att=61.596, acc=0.687, loss=65.332, backward_time=0.745, grad_norm=83.262, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.962e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 00:55:15,556 (trainer:732) INFO: 14epoch:train:6001-6100batch: iter_time=9.442e-05, forward_time=0.105, loss_ctc=72.845, loss_att=54.112, acc=0.692, loss=59.732, backward_time=0.744, grad_norm=84.620, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.958e-05, train_time=1.984
+[gpua003:0/64] 2023-07-06 00:56:54,727 (trainer:732) INFO: 14epoch:train:6101-6200batch: iter_time=9.749e-05, forward_time=0.106, loss_ctc=62.931, loss_att=46.045, acc=0.705, loss=51.111, backward_time=0.744, grad_norm=75.046, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.954e-05, train_time=1.983
+[gpua003:0/64] 2023-07-06 00:58:34,088 (trainer:732) INFO: 14epoch:train:6201-6300batch: iter_time=9.675e-05, forward_time=0.106, loss_ctc=75.966, loss_att=57.343, acc=0.689, loss=62.930, backward_time=0.745, grad_norm=93.914, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.950e-05, train_time=1.987
+[gpua003:0/64] 2023-07-06 01:00:28,991 (trainer:732) INFO: 14epoch:train:6301-6400batch: iter_time=9.513e-05, forward_time=0.105, loss_ctc=65.892, loss_att=55.960, acc=0.686, loss=58.940, backward_time=0.769, grad_norm=98.285, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.946e-05, train_time=2.298
+[gpua003:0/64] 2023-07-06 01:02:08,399 (trainer:732) INFO: 14epoch:train:6401-6500batch: iter_time=9.649e-05, forward_time=0.106, loss_ctc=71.103, loss_att=59.408, acc=0.685, loss=62.916, backward_time=0.746, grad_norm=81.143, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.942e-05, train_time=1.988
+[gpua003:0/64] 2023-07-06 01:03:51,462 (trainer:732) INFO: 14epoch:train:6501-6600batch: iter_time=1.032e-04, forward_time=0.106, loss_ctc=81.907, loss_att=58.088, acc=0.700, loss=65.234, backward_time=0.749, grad_norm=98.608, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.938e-05, train_time=2.061
+[gpua003:0/64] 2023-07-06 01:04:59,329 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpua003:0/64] 2023-07-06 01:05:18,889 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 01:05:22,372 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9521b5ca00>)
+[gpua003:0/64] 2023-07-06 01:05:22,373 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpua003:0/64] 2023-07-06 01:05:22,379 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 01:10:37,392 (trainer:732) INFO: 14epoch:train:6601-6700batch: iter_time=1.280, forward_time=0.107, loss_ctc=81.131, loss_att=58.478, acc=0.688, loss=65.274, backward_time=0.755, grad_norm=114.707, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.935e-05, train_time=8.118
+[gpua003:0/64] 2023-07-06 01:12:18,338 (trainer:732) INFO: 14epoch:train:6701-6800batch: iter_time=1.129e-04, forward_time=0.106, loss_ctc=63.589, loss_att=52.978, acc=0.681, loss=56.162, backward_time=0.750, grad_norm=83.786, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.931e-05, train_time=2.019
+[gpua003:0/64] 2023-07-06 01:13:57,605 (trainer:732) INFO: 14epoch:train:6801-6900batch: iter_time=1.194e-04, forward_time=0.106, loss_ctc=73.216, loss_att=56.431, acc=0.697, loss=61.466, backward_time=0.746, grad_norm=85.113, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.927e-05, train_time=1.985
+[gpua003:0/64] 2023-07-06 01:15:36,756 (trainer:732) INFO: 14epoch:train:6901-7000batch: iter_time=1.192e-04, forward_time=0.106, loss_ctc=66.478, loss_att=48.895, acc=0.692, loss=54.170, backward_time=0.745, grad_norm=88.642, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.923e-05, train_time=1.983
+[gpua003:0/64] 2023-07-06 01:17:15,887 (trainer:732) INFO: 14epoch:train:7001-7100batch: iter_time=1.179e-04, forward_time=0.106, loss_ctc=70.266, loss_att=50.382, acc=0.693, loss=56.347, backward_time=0.746, grad_norm=84.071, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.919e-05, train_time=1.982
+[gpua003:0/64] 2023-07-06 01:18:54,896 (trainer:732) INFO: 14epoch:train:7101-7200batch: iter_time=1.187e-04, forward_time=0.105, loss_ctc=73.012, loss_att=61.557, acc=0.673, loss=64.994, backward_time=0.746, grad_norm=89.483, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.915e-05, train_time=1.980
+[gpua003:0/64] 2023-07-06 01:20:34,275 (trainer:732) INFO: 14epoch:train:7201-7300batch: iter_time=1.036e-04, forward_time=0.106, loss_ctc=65.363, loss_att=55.629, acc=0.669, loss=58.549, backward_time=0.746, grad_norm=89.409, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.911e-05, train_time=1.987
+[gpua003:0/64] 2023-07-06 01:22:13,430 (trainer:732) INFO: 14epoch:train:7301-7400batch: iter_time=1.061e-04, forward_time=0.105, loss_ctc=78.086, loss_att=58.693, acc=0.692, loss=64.511, backward_time=0.744, grad_norm=93.347, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.907e-05, train_time=1.983
+[gpua003:0/64] 2023-07-06 01:23:52,443 (trainer:732) INFO: 14epoch:train:7401-7500batch: iter_time=1.255e-04, forward_time=0.105, loss_ctc=85.073, loss_att=62.105, acc=0.679, loss=68.995, backward_time=0.745, grad_norm=99.476, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.903e-05, train_time=1.980
+[gpua003:0/64] 2023-07-06 01:23:53,863 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpua003:0/64] 2023-07-06 01:24:12,946 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 01:24:16,467 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f952a12fa60>)
+[gpua003:0/64] 2023-07-06 01:24:16,467 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpua003:0/64] 2023-07-06 01:24:16,474 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 01:28:56,627 (trainer:732) INFO: 14epoch:train:7501-7600batch: iter_time=1.287, forward_time=0.106, loss_ctc=68.678, loss_att=50.359, acc=0.697, loss=55.855, backward_time=0.755, grad_norm=80.423, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.899e-05, train_time=6.083
+[gpua003:0/64] 2023-07-06 01:30:36,072 (trainer:732) INFO: 14epoch:train:7601-7700batch: iter_time=1.040e-04, forward_time=0.106, loss_ctc=68.907, loss_att=57.070, acc=0.685, loss=60.621, backward_time=0.745, grad_norm=91.649, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.896e-05, train_time=1.989
+[gpua003:0/64] 2023-07-06 01:32:15,336 (trainer:732) INFO: 14epoch:train:7701-7800batch: iter_time=1.071e-04, forward_time=0.106, loss_ctc=70.050, loss_att=51.852, acc=0.696, loss=57.312, backward_time=0.744, grad_norm=83.173, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.892e-05, train_time=1.985
+[gpua003:0/64] 2023-07-06 01:33:54,551 (trainer:732) INFO: 14epoch:train:7801-7900batch: iter_time=9.941e-05, forward_time=0.106, loss_ctc=73.006, loss_att=51.646, acc=0.691, loss=58.054, backward_time=0.745, grad_norm=86.960, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.888e-05, train_time=1.984
+[gpua003:0/64] 2023-07-06 01:35:34,182 (trainer:732) INFO: 14epoch:train:7901-8000batch: iter_time=8.641e-05, forward_time=0.107, loss_ctc=70.408, loss_att=60.834, acc=0.686, loss=63.707, backward_time=0.748, grad_norm=102.961, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.884e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 01:37:13,489 (trainer:732) INFO: 14epoch:train:8001-8100batch: iter_time=1.062e-04, forward_time=0.107, loss_ctc=66.066, loss_att=55.633, acc=0.676, loss=58.763, backward_time=0.747, grad_norm=84.748, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.880e-05, train_time=1.986
+[gpua003:0/64] 2023-07-06 01:38:55,993 (trainer:732) INFO: 14epoch:train:8101-8200batch: iter_time=1.083e-04, forward_time=0.107, loss_ctc=77.571, loss_att=61.968, acc=0.687, loss=66.649, backward_time=0.748, grad_norm=91.196, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.876e-05, train_time=2.050
+[gpua003:0/64] 2023-07-06 01:40:35,417 (trainer:732) INFO: 14epoch:train:8201-8300batch: iter_time=9.520e-05, forward_time=0.107, loss_ctc=80.442, loss_att=55.920, acc=0.694, loss=63.276, backward_time=0.747, grad_norm=82.962, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.872e-05, train_time=1.988
+[gpua003:0/64] 2023-07-06 01:41:10,610 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpua003:0/64] 2023-07-06 01:41:29,651 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 01:41:33,072 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f901ffdfc10>)
+[gpua003:0/64] 2023-07-06 01:41:33,072 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpua003:0/64] 2023-07-06 01:41:33,078 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 01:46:45,702 (trainer:732) INFO: 14epoch:train:8301-8400batch: iter_time=1.290, forward_time=0.120, loss_ctc=72.247, loss_att=53.015, acc=0.698, loss=58.785, backward_time=0.761, grad_norm=82.758, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.113, optim0_lr0=9.869e-05, train_time=7.405
+[gpua003:0/64] 2023-07-06 01:48:26,447 (trainer:732) INFO: 14epoch:train:8401-8500batch: iter_time=1.170e-04, forward_time=0.105, loss_ctc=72.826, loss_att=61.567, acc=0.673, loss=64.945, backward_time=0.746, grad_norm=92.862, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.865e-05, train_time=2.015
+[gpua003:0/64] 2023-07-06 01:50:06,144 (trainer:732) INFO: 14epoch:train:8501-8600batch: iter_time=1.142e-04, forward_time=0.105, loss_ctc=71.755, loss_att=54.026, acc=0.689, loss=59.345, backward_time=0.745, grad_norm=84.483, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.109, optim0_lr0=9.861e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 01:51:45,693 (trainer:732) INFO: 14epoch:train:8601-8700batch: iter_time=1.203e-04, forward_time=0.106, loss_ctc=62.700, loss_att=45.802, acc=0.702, loss=50.872, backward_time=0.746, grad_norm=76.913, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.109, optim0_lr0=9.857e-05, train_time=1.991
+[gpua003:0/64] 2023-07-06 01:53:38,865 (trainer:732) INFO: 14epoch:train:8701-8800batch: iter_time=1.071e-04, forward_time=0.106, loss_ctc=74.653, loss_att=55.710, acc=0.688, loss=61.393, backward_time=0.773, grad_norm=129.792, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.853e-05, train_time=2.263
+[gpua003:0/64] 2023-07-06 01:55:18,104 (trainer:732) INFO: 14epoch:train:8801-8900batch: iter_time=1.105e-04, forward_time=0.106, loss_ctc=64.757, loss_att=56.246, acc=0.674, loss=58.799, backward_time=0.744, grad_norm=80.970, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.849e-05, train_time=1.985
+[gpua003:0/64] 2023-07-06 01:56:59,672 (trainer:732) INFO: 14epoch:train:8901-9000batch: iter_time=1.077e-04, forward_time=0.106, loss_ctc=70.334, loss_att=57.944, acc=0.678, loss=61.661, backward_time=0.751, grad_norm=83.367, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.109, optim0_lr0=9.846e-05, train_time=2.031
+[gpua003:0/64] 2023-07-06 01:58:38,903 (trainer:732) INFO: 14epoch:train:9001-9100batch: iter_time=1.082e-04, forward_time=0.106, loss_ctc=80.565, loss_att=57.072, acc=0.700, loss=64.119, backward_time=0.745, grad_norm=93.217, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.109, optim0_lr0=9.842e-05, train_time=1.984
+[gpua003:0/64] 2023-07-06 01:59:52,859 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpua003:0/64] 2023-07-06 02:00:11,825 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 02:00:15,305 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f950de674f0>)
+[gpua003:0/64] 2023-07-06 02:00:15,305 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpua003:0/64] 2023-07-06 02:00:15,311 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 02:04:51,398 (trainer:732) INFO: 14epoch:train:9101-9200batch: iter_time=1.321, forward_time=0.157, loss_ctc=81.944, loss_att=58.952, acc=0.689, loss=65.850, backward_time=0.767, grad_norm=105.225, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.114, optim0_lr0=9.838e-05, train_time=7.449
+[gpua003:0/64] 2023-07-06 02:06:31,077 (trainer:732) INFO: 14epoch:train:9201-9300batch: iter_time=1.023e-04, forward_time=0.105, loss_ctc=66.026, loss_att=53.515, acc=0.692, loss=57.268, backward_time=0.747, grad_norm=86.498, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.834e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 02:08:11,806 (trainer:732) INFO: 14epoch:train:9301-9400batch: iter_time=9.862e-05, forward_time=0.106, loss_ctc=73.964, loss_att=57.337, acc=0.700, loss=62.325, backward_time=0.745, grad_norm=85.275, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.830e-05, train_time=2.014
+[gpua003:0/64] 2023-07-06 02:09:51,859 (trainer:732) INFO: 14epoch:train:9401-9500batch: iter_time=1.055e-04, forward_time=0.106, loss_ctc=64.980, loss_att=48.354, acc=0.695, loss=53.342, backward_time=0.745, grad_norm=74.030, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.827e-05, train_time=2.001
+[gpua003:0/64] 2023-07-06 02:11:31,288 (trainer:732) INFO: 14epoch:train:9501-9600batch: iter_time=1.036e-04, forward_time=0.106, loss_ctc=70.565, loss_att=51.030, acc=0.695, loss=56.890, backward_time=0.744, grad_norm=84.506, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.823e-05, train_time=1.988
+[gpua003:0/64] 2023-07-06 02:13:10,467 (trainer:732) INFO: 14epoch:train:9601-9700batch: iter_time=1.057e-04, forward_time=0.106, loss_ctc=71.513, loss_att=60.661, acc=0.691, loss=63.916, backward_time=0.745, grad_norm=87.016, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.819e-05, train_time=1.983
+[gpua003:0/64] 2023-07-06 02:14:49,595 (trainer:732) INFO: 14epoch:train:9701-9800batch: iter_time=1.059e-04, forward_time=0.106, loss_ctc=65.538, loss_att=54.445, acc=0.682, loss=57.773, backward_time=0.744, grad_norm=94.902, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.815e-05, train_time=1.982
+[gpua003:0/64] 2023-07-06 02:16:28,940 (trainer:732) INFO: 14epoch:train:9801-9900batch: iter_time=1.015e-04, forward_time=0.107, loss_ctc=77.709, loss_att=58.250, acc=0.701, loss=64.087, backward_time=0.745, grad_norm=110.894, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.109, optim0_lr0=9.811e-05, train_time=1.987
+[gpua003:0/64] 2023-07-06 02:18:08,171 (trainer:732) INFO: 14epoch:train:9901-10000batch: iter_time=9.460e-05, forward_time=0.106, loss_ctc=82.982, loss_att=61.283, acc=0.688, loss=67.793, backward_time=0.744, grad_norm=103.022, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.109, optim0_lr0=9.808e-05, train_time=1.984
+[gpua003:0/64] 2023-07-06 02:32:00,423 (trainer:338) INFO: 14epoch results: [train] iter_time=0.165, forward_time=0.108, loss_ctc=73.204, loss_att=56.354, acc=0.685, loss=61.409, backward_time=0.748, grad_norm=93.679, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=1.000e-04, train_time=2.598, time=3 hours, 36 minutes and 52.71 seconds, total_count=110000, gpu_max_cached_mem_GB=34.473, [valid] loss_ctc=52.779, cer_ctc=0.299, loss_att=43.314, acc=0.648, cer=0.406, wer=0.989, loss=46.153, time=7 minutes and 15.36 seconds, total_count=11638, gpu_max_cached_mem_GB=37.768, [att_plot] time=6 minutes and 14.08 seconds, total_count=0, gpu_max_cached_mem_GB=37.768
+[gpua003:0/64] 2023-07-06 02:32:18,801 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count
+[gpua003:0/64] 2023-07-06 02:32:18,908 (trainer:272) INFO: 15/100epoch started. Estimated time to finish: 1 week, 6 days and 18 hours
+[gpua003:0/64] 2023-07-06 02:32:19,981 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua003:0/64] 2023-07-06 02:32:39,156 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 02:32:42,632 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8ee3ab61d0>)
+[gpua003:0/64] 2023-07-06 02:32:42,633 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpua003:0/64] 2023-07-06 02:32:42,673 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 02:39:26,949 (trainer:732) INFO: 15epoch:train:1-100batch: iter_time=3.199, forward_time=0.158, loss_ctc=82.906, loss_att=63.268, acc=0.672, loss=69.159, backward_time=0.766, grad_norm=96.452, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.118, optim0_lr0=9.804e-05, train_time=8.551
+[gpua003:0/64] 2023-07-06 02:41:12,530 (trainer:732) INFO: 15epoch:train:101-200batch: iter_time=1.083e-04, forward_time=0.110, loss_ctc=93.463, loss_att=58.148, acc=0.684, loss=68.742, backward_time=0.761, grad_norm=99.343, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.113, optim0_lr0=9.800e-05, train_time=2.112
+[gpua003:0/64] 2023-07-06 02:42:58,616 (trainer:732) INFO: 15epoch:train:201-300batch: iter_time=1.075e-04, forward_time=0.109, loss_ctc=72.287, loss_att=52.500, acc=0.677, loss=58.436, backward_time=0.756, grad_norm=100.014, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.113, optim0_lr0=9.796e-05, train_time=2.122
+[gpua003:0/64] 2023-07-06 02:44:49,824 (trainer:732) INFO: 15epoch:train:301-400batch: iter_time=1.050e-04, forward_time=0.108, loss_ctc=74.935, loss_att=63.168, acc=0.669, loss=66.698, backward_time=0.769, grad_norm=93.180, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.113, optim0_lr0=9.793e-05, train_time=2.224
+[gpua003:0/64] 2023-07-06 02:46:40,595 (trainer:732) INFO: 15epoch:train:401-500batch: iter_time=1.069e-04, forward_time=0.109, loss_ctc=84.885, loss_att=68.005, acc=0.654, loss=73.069, backward_time=0.761, grad_norm=120.148, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.114, optim0_lr0=9.789e-05, train_time=2.215
+[gpua003:0/64] 2023-07-06 02:48:27,207 (trainer:732) INFO: 15epoch:train:501-600batch: iter_time=1.120e-04, forward_time=0.116, loss_ctc=88.432, loss_att=68.337, acc=0.676, loss=74.366, backward_time=0.760, grad_norm=91.380, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.113, optim0_lr0=9.785e-05, train_time=2.132
+[gpua003:0/64] 2023-07-06 02:50:13,816 (trainer:732) INFO: 15epoch:train:601-700batch: iter_time=1.091e-04, forward_time=0.109, loss_ctc=76.229, loss_att=54.425, acc=0.696, loss=60.966, backward_time=0.767, grad_norm=83.791, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.113, optim0_lr0=9.781e-05, train_time=2.132
+[gpua003:0/64] 2023-07-06 02:52:11,799 (trainer:732) INFO: 15epoch:train:701-800batch: iter_time=1.037e-04, forward_time=0.120, loss_ctc=79.546, loss_att=62.351, acc=0.675, loss=67.509, backward_time=0.782, grad_norm=103.551, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.113, optim0_lr0=9.778e-05, train_time=2.359
+[gpua003:0/64] 2023-07-06 02:53:02,111 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpua003:0/64] 2023-07-06 02:53:21,354 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 02:53:24,837 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f89e42a7a30>)
+[gpua003:0/64] 2023-07-06 02:53:24,837 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpua003:0/64] 2023-07-06 02:53:24,887 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 02:58:05,256 (trainer:732) INFO: 15epoch:train:801-900batch: iter_time=1.628, forward_time=0.133, loss_ctc=82.093, loss_att=59.999, acc=0.678, loss=66.627, backward_time=0.769, grad_norm=104.738, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.113, optim0_lr0=9.774e-05, train_time=7.069
+[gpua003:0/64] 2023-07-06 02:59:46,184 (trainer:732) INFO: 15epoch:train:901-1000batch: iter_time=1.123e-04, forward_time=0.108, loss_ctc=77.407, loss_att=56.196, acc=0.685, loss=62.559, backward_time=0.752, grad_norm=95.582, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.112, optim0_lr0=9.770e-05, train_time=2.018
+[gpua003:0/64] 2023-07-06 03:01:26,322 (trainer:732) INFO: 15epoch:train:1001-1100batch: iter_time=9.835e-05, forward_time=0.107, loss_ctc=85.077, loss_att=54.745, acc=0.695, loss=63.845, backward_time=0.755, grad_norm=87.655, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.112, optim0_lr0=9.766e-05, train_time=2.003
+[gpua003:0/64] 2023-07-06 03:03:06,138 (trainer:732) INFO: 15epoch:train:1101-1200batch: iter_time=8.910e-05, forward_time=0.106, loss_ctc=71.804, loss_att=57.490, acc=0.664, loss=61.784, backward_time=0.752, grad_norm=96.520, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.112, optim0_lr0=9.763e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 03:04:46,029 (trainer:732) INFO: 15epoch:train:1201-1300batch: iter_time=1.024e-04, forward_time=0.107, loss_ctc=78.767, loss_att=64.261, acc=0.679, loss=68.613, backward_time=0.752, grad_norm=95.553, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.112, optim0_lr0=9.759e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 03:06:25,781 (trainer:732) INFO: 15epoch:train:1301-1400batch: iter_time=9.587e-05, forward_time=0.106, loss_ctc=89.123, loss_att=69.314, acc=0.678, loss=75.257, backward_time=0.752, grad_norm=99.925, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.112, optim0_lr0=9.755e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 03:08:05,655 (trainer:732) INFO: 15epoch:train:1401-1500batch: iter_time=9.323e-05, forward_time=0.106, loss_ctc=77.240, loss_att=56.417, acc=0.693, loss=62.664, backward_time=0.752, grad_norm=92.784, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.111, optim0_lr0=9.751e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 03:09:45,213 (trainer:732) INFO: 15epoch:train:1501-1600batch: iter_time=9.074e-05, forward_time=0.105, loss_ctc=76.823, loss_att=57.859, acc=0.677, loss=63.548, backward_time=0.751, grad_norm=99.313, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.112, optim0_lr0=9.748e-05, train_time=1.991
+[gpua003:0/64] 2023-07-06 03:10:52,700 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpua003:0/64] 2023-07-06 03:11:11,991 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 03:11:15,554 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f89e3721d20>)
+[gpua003:0/64] 2023-07-06 03:11:15,554 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpua003:0/64] 2023-07-06 03:11:15,561 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 03:16:19,134 (trainer:732) INFO: 15epoch:train:1601-1700batch: iter_time=1.281, forward_time=0.107, loss_ctc=78.684, loss_att=60.055, acc=0.678, loss=65.643, backward_time=0.768, grad_norm=81.939, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.112, optim0_lr0=9.744e-05, train_time=7.878
+[gpua003:0/64] 2023-07-06 03:18:00,308 (trainer:732) INFO: 15epoch:train:1701-1800batch: iter_time=9.922e-05, forward_time=0.107, loss_ctc=79.008, loss_att=56.298, acc=0.696, loss=63.111, backward_time=0.753, grad_norm=81.521, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.111, optim0_lr0=9.740e-05, train_time=2.023
+[gpua003:0/64] 2023-07-06 03:19:40,539 (trainer:732) INFO: 15epoch:train:1801-1900batch: iter_time=9.836e-05, forward_time=0.108, loss_ctc=90.709, loss_att=57.184, acc=0.688, loss=67.242, backward_time=0.754, grad_norm=97.794, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.111, optim0_lr0=9.737e-05, train_time=2.004
+[gpua003:0/64] 2023-07-06 03:21:20,631 (trainer:732) INFO: 15epoch:train:1901-2000batch: iter_time=9.980e-05, forward_time=0.107, loss_ctc=71.072, loss_att=53.719, acc=0.682, loss=58.925, backward_time=0.751, grad_norm=83.439, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.111, optim0_lr0=9.733e-05, train_time=2.002
+[gpua003:0/64] 2023-07-06 03:23:00,483 (trainer:732) INFO: 15epoch:train:2001-2100batch: iter_time=9.705e-05, forward_time=0.107, loss_ctc=74.810, loss_att=63.359, acc=0.675, loss=66.794, backward_time=0.751, grad_norm=91.749, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.729e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 03:24:40,225 (trainer:732) INFO: 15epoch:train:2101-2200batch: iter_time=9.846e-05, forward_time=0.107, loss_ctc=86.087, loss_att=63.950, acc=0.671, loss=70.591, backward_time=0.751, grad_norm=104.916, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.726e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 03:26:19,997 (trainer:732) INFO: 15epoch:train:2201-2300batch: iter_time=1.004e-04, forward_time=0.107, loss_ctc=81.168, loss_att=65.843, acc=0.683, loss=70.440, backward_time=0.751, grad_norm=85.546, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.722e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 03:28:19,638 (trainer:732) INFO: 15epoch:train:2301-2400batch: iter_time=9.845e-05, forward_time=0.106, loss_ctc=75.986, loss_att=55.773, acc=0.686, loss=61.837, backward_time=0.780, grad_norm=89.935, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.718e-05, train_time=2.393
+[gpua003:0/64] 2023-07-06 03:30:11,150 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpua003:0/64] 2023-07-06 03:30:30,084 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 03:30:33,644 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f950c3308b0>)
+[gpua003:0/64] 2023-07-06 03:30:33,644 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpua003:0/64] 2023-07-06 03:30:33,650 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 03:33:45,898 (trainer:732) INFO: 15epoch:train:2401-2500batch: iter_time=1.301, forward_time=0.142, loss_ctc=78.818, loss_att=56.765, acc=0.693, loss=63.381, backward_time=0.782, grad_norm=91.105, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.113, optim0_lr0=9.715e-05, train_time=6.525
+[gpua003:0/64] 2023-07-06 03:35:27,576 (trainer:732) INFO: 15epoch:train:2501-2600batch: iter_time=1.159e-04, forward_time=0.110, loss_ctc=80.164, loss_att=60.607, acc=0.691, loss=66.474, backward_time=0.759, grad_norm=88.751, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.711e-05, train_time=2.034
+[gpua003:0/64] 2023-07-06 03:37:07,711 (trainer:732) INFO: 15epoch:train:2601-2700batch: iter_time=1.071e-04, forward_time=0.108, loss_ctc=89.216, loss_att=56.175, acc=0.699, loss=66.087, backward_time=0.752, grad_norm=94.279, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.707e-05, train_time=2.002
+[gpua003:0/64] 2023-07-06 03:38:47,535 (trainer:732) INFO: 15epoch:train:2701-2800batch: iter_time=1.003e-04, forward_time=0.107, loss_ctc=71.669, loss_att=52.232, acc=0.691, loss=58.063, backward_time=0.752, grad_norm=78.637, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.704e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 03:40:27,453 (trainer:732) INFO: 15epoch:train:2801-2900batch: iter_time=9.136e-05, forward_time=0.108, loss_ctc=72.118, loss_att=61.501, acc=0.688, loss=64.686, backward_time=0.752, grad_norm=86.278, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.700e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 03:42:07,650 (trainer:732) INFO: 15epoch:train:2901-3000batch: iter_time=9.709e-05, forward_time=0.108, loss_ctc=83.002, loss_att=63.713, acc=0.675, loss=69.500, backward_time=0.754, grad_norm=94.808, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.696e-05, train_time=2.004
+[gpua003:0/64] 2023-07-06 03:43:47,676 (trainer:732) INFO: 15epoch:train:3001-3100batch: iter_time=9.954e-05, forward_time=0.108, loss_ctc=85.122, loss_att=64.702, acc=0.694, loss=70.828, backward_time=0.754, grad_norm=92.367, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.693e-05, train_time=2.000
+[gpua003:0/64] 2023-07-06 03:45:30,195 (trainer:732) INFO: 15epoch:train:3101-3200batch: iter_time=1.035e-04, forward_time=0.108, loss_ctc=74.716, loss_att=52.707, acc=0.702, loss=59.310, backward_time=0.753, grad_norm=81.698, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.113, optim0_lr0=9.689e-05, train_time=2.050
+[gpua003:0/64] 2023-07-06 03:47:13,089 (trainer:732) INFO: 15epoch:train:3201-3300batch: iter_time=9.444e-05, forward_time=0.108, loss_ctc=77.867, loss_att=61.783, acc=0.691, loss=66.608, backward_time=0.759, grad_norm=98.749, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.685e-05, train_time=2.058
+[gpua003:0/64] 2023-07-06 03:47:53,832 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpua003:0/64] 2023-07-06 03:48:12,960 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 03:48:16,513 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9520770130>)
+[gpua003:0/64] 2023-07-06 03:48:16,514 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpua003:0/64] 2023-07-06 03:48:16,520 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 03:53:07,778 (trainer:732) INFO: 15epoch:train:3301-3400batch: iter_time=1.891, forward_time=0.108, loss_ctc=80.670, loss_att=59.551, acc=0.693, loss=65.887, backward_time=0.768, grad_norm=91.581, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.682e-05, train_time=7.094
+[gpua003:0/64] 2023-07-06 03:54:48,045 (trainer:732) INFO: 15epoch:train:3401-3500batch: iter_time=1.099e-04, forward_time=0.109, loss_ctc=76.671, loss_att=54.588, acc=0.697, loss=61.213, backward_time=0.753, grad_norm=86.293, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.678e-05, train_time=2.005
+[gpua003:0/64] 2023-07-06 03:56:29,514 (trainer:732) INFO: 15epoch:train:3501-3600batch: iter_time=9.121e-05, forward_time=0.108, loss_ctc=83.637, loss_att=55.309, acc=0.702, loss=63.808, backward_time=0.753, grad_norm=89.277, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.674e-05, train_time=2.029
+[gpua003:0/64] 2023-07-06 03:58:09,348 (trainer:732) INFO: 15epoch:train:3601-3700batch: iter_time=9.560e-05, forward_time=0.108, loss_ctc=70.720, loss_att=55.269, acc=0.679, loss=59.904, backward_time=0.751, grad_norm=89.133, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.671e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 03:59:49,035 (trainer:732) INFO: 15epoch:train:3701-3800batch: iter_time=9.637e-05, forward_time=0.107, loss_ctc=77.221, loss_att=62.310, acc=0.690, loss=66.783, backward_time=0.751, grad_norm=96.452, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.667e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 04:01:31,489 (trainer:732) INFO: 15epoch:train:3801-3900batch: iter_time=1.006e-04, forward_time=0.107, loss_ctc=86.594, loss_att=65.894, acc=0.691, loss=72.104, backward_time=0.753, grad_norm=92.320, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.664e-05, train_time=2.049
+[gpua003:0/64] 2023-07-06 04:03:11,208 (trainer:732) INFO: 15epoch:train:3901-4000batch: iter_time=9.501e-05, forward_time=0.107, loss_ctc=78.169, loss_att=56.640, acc=0.693, loss=63.098, backward_time=0.750, grad_norm=89.150, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.660e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 04:04:51,122 (trainer:732) INFO: 15epoch:train:4001-4100batch: iter_time=9.624e-05, forward_time=0.107, loss_ctc=76.858, loss_att=56.884, acc=0.686, loss=62.876, backward_time=0.751, grad_norm=103.211, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.656e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 04:05:57,372 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpua003:0/64] 2023-07-06 04:06:16,333 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 04:06:19,864 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f97a3f2dc60>)
+[gpua003:0/64] 2023-07-06 04:06:19,864 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpua003:0/64] 2023-07-06 04:06:19,870 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 04:09:19,124 (trainer:732) INFO: 15epoch:train:4101-4200batch: iter_time=1.298, forward_time=0.107, loss_ctc=76.656, loss_att=57.378, acc=0.699, loss=63.161, backward_time=0.761, grad_norm=84.164, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.653e-05, train_time=5.360
+[gpua003:0/64] 2023-07-06 04:10:59,509 (trainer:732) INFO: 15epoch:train:4201-4300batch: iter_time=1.008e-04, forward_time=0.109, loss_ctc=77.716, loss_att=58.424, acc=0.694, loss=64.212, backward_time=0.754, grad_norm=86.815, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.649e-05, train_time=2.007
+[gpua003:0/64] 2023-07-06 04:12:39,763 (trainer:732) INFO: 15epoch:train:4301-4400batch: iter_time=1.001e-04, forward_time=0.108, loss_ctc=86.970, loss_att=57.258, acc=0.694, loss=66.171, backward_time=0.754, grad_norm=89.527, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.646e-05, train_time=2.005
+[gpua003:0/64] 2023-07-06 04:14:19,772 (trainer:732) INFO: 15epoch:train:4401-4500batch: iter_time=8.559e-05, forward_time=0.108, loss_ctc=70.095, loss_att=53.690, acc=0.681, loss=58.611, backward_time=0.754, grad_norm=78.614, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.642e-05, train_time=2.000
+[gpua003:0/64] 2023-07-06 04:15:59,517 (trainer:732) INFO: 15epoch:train:4501-4600batch: iter_time=9.013e-05, forward_time=0.107, loss_ctc=74.563, loss_att=61.596, acc=0.680, loss=65.486, backward_time=0.751, grad_norm=89.324, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.638e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 04:17:39,351 (trainer:732) INFO: 15epoch:train:4601-4700batch: iter_time=9.033e-05, forward_time=0.108, loss_ctc=83.925, loss_att=60.152, acc=0.693, loss=67.284, backward_time=0.752, grad_norm=90.456, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.635e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 04:19:19,250 (trainer:732) INFO: 15epoch:train:4701-4800batch: iter_time=9.522e-05, forward_time=0.108, loss_ctc=78.761, loss_att=63.200, acc=0.690, loss=67.868, backward_time=0.753, grad_norm=86.126, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.631e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 04:20:59,100 (trainer:732) INFO: 15epoch:train:4801-4900batch: iter_time=9.812e-05, forward_time=0.107, loss_ctc=74.851, loss_att=55.680, acc=0.694, loss=61.431, backward_time=0.752, grad_norm=103.100, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.628e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 04:22:39,137 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpua003:0/64] 2023-07-06 04:22:58,402 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 04:23:01,981 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8f97f2fd00>)
+[gpua003:0/64] 2023-07-06 04:23:01,981 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpua003:0/64] 2023-07-06 04:23:01,987 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 04:26:15,035 (trainer:732) INFO: 15epoch:train:4901-5000batch: iter_time=1.298, forward_time=0.107, loss_ctc=79.639, loss_att=56.728, acc=0.693, loss=63.601, backward_time=0.757, grad_norm=91.688, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.624e-05, train_time=6.318
+[gpua003:0/64] 2023-07-06 04:27:57,014 (trainer:732) INFO: 15epoch:train:5001-5100batch: iter_time=1.188e-04, forward_time=0.110, loss_ctc=79.481, loss_att=59.825, acc=0.685, loss=65.722, backward_time=0.758, grad_norm=89.848, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.113, optim0_lr0=9.621e-05, train_time=2.039
+[gpua003:0/64] 2023-07-06 04:29:37,013 (trainer:732) INFO: 15epoch:train:5101-5200batch: iter_time=1.032e-04, forward_time=0.108, loss_ctc=86.315, loss_att=56.327, acc=0.690, loss=65.324, backward_time=0.752, grad_norm=105.057, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.617e-05, train_time=2.000
+[gpua003:0/64] 2023-07-06 04:31:16,972 (trainer:732) INFO: 15epoch:train:5201-5300batch: iter_time=1.038e-04, forward_time=0.106, loss_ctc=70.861, loss_att=50.758, acc=0.689, loss=56.789, backward_time=0.752, grad_norm=87.606, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.614e-05, train_time=1.999
+[gpua003:0/64] 2023-07-06 04:32:56,687 (trainer:732) INFO: 15epoch:train:5301-5400batch: iter_time=9.220e-05, forward_time=0.106, loss_ctc=71.546, loss_att=60.197, acc=0.685, loss=63.601, backward_time=0.750, grad_norm=91.661, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.610e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 04:34:36,595 (trainer:732) INFO: 15epoch:train:5401-5500batch: iter_time=9.018e-05, forward_time=0.107, loss_ctc=82.998, loss_att=65.769, acc=0.666, loss=70.938, backward_time=0.752, grad_norm=109.581, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.606e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 04:36:16,255 (trainer:732) INFO: 15epoch:train:5501-5600batch: iter_time=9.943e-05, forward_time=0.107, loss_ctc=82.860, loss_att=64.257, acc=0.691, loss=69.838, backward_time=0.750, grad_norm=91.728, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.603e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 04:37:55,929 (trainer:732) INFO: 15epoch:train:5601-5700batch: iter_time=9.638e-05, forward_time=0.106, loss_ctc=74.731, loss_att=53.015, acc=0.702, loss=59.530, backward_time=0.751, grad_norm=90.537, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.599e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 04:39:35,714 (trainer:732) INFO: 15epoch:train:5701-5800batch: iter_time=9.332e-05, forward_time=0.107, loss_ctc=76.746, loss_att=60.403, acc=0.686, loss=65.306, backward_time=0.751, grad_norm=100.897, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.596e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 04:40:08,943 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpua003:0/64] 2023-07-06 04:40:28,479 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 04:40:32,043 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8a02697520>)
+[gpua003:0/64] 2023-07-06 04:40:32,043 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpua003:0/64] 2023-07-06 04:40:32,049 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 04:44:18,911 (trainer:732) INFO: 15epoch:train:5801-5900batch: iter_time=1.331, forward_time=0.108, loss_ctc=75.757, loss_att=54.746, acc=0.692, loss=61.049, backward_time=0.767, grad_norm=90.029, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.592e-05, train_time=5.664
+[gpua003:0/64] 2023-07-06 04:45:59,455 (trainer:732) INFO: 15epoch:train:5901-6000batch: iter_time=1.005e-04, forward_time=0.107, loss_ctc=80.541, loss_att=59.034, acc=0.691, loss=65.486, backward_time=0.753, grad_norm=101.034, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.589e-05, train_time=2.011
+[gpua003:0/64] 2023-07-06 04:47:39,684 (trainer:732) INFO: 15epoch:train:6001-6100batch: iter_time=9.840e-05, forward_time=0.107, loss_ctc=77.216, loss_att=50.560, acc=0.689, loss=58.557, backward_time=0.750, grad_norm=91.962, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.585e-05, train_time=2.004
+[gpua003:0/64] 2023-07-06 04:49:19,460 (trainer:732) INFO: 15epoch:train:6101-6200batch: iter_time=9.863e-05, forward_time=0.108, loss_ctc=70.142, loss_att=56.782, acc=0.679, loss=60.790, backward_time=0.751, grad_norm=87.925, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.582e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 04:50:59,293 (trainer:732) INFO: 15epoch:train:6201-6300batch: iter_time=9.683e-05, forward_time=0.107, loss_ctc=84.691, loss_att=67.527, acc=0.672, loss=72.676, backward_time=0.751, grad_norm=96.501, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.578e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 04:52:39,001 (trainer:732) INFO: 15epoch:train:6301-6400batch: iter_time=9.518e-05, forward_time=0.108, loss_ctc=78.908, loss_att=59.469, acc=0.696, loss=65.301, backward_time=0.751, grad_norm=90.460, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.575e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 04:54:18,553 (trainer:732) INFO: 15epoch:train:6401-6500batch: iter_time=1.032e-04, forward_time=0.108, loss_ctc=79.232, loss_att=59.510, acc=0.686, loss=65.427, backward_time=0.749, grad_norm=104.175, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.571e-05, train_time=1.991
+[gpua003:0/64] 2023-07-06 04:55:58,195 (trainer:732) INFO: 15epoch:train:6501-6600batch: iter_time=9.919e-05, forward_time=0.108, loss_ctc=74.355, loss_att=57.326, acc=0.685, loss=62.435, backward_time=0.750, grad_norm=89.442, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.568e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 04:57:05,281 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpua003:0/64] 2023-07-06 04:57:24,341 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 04:57:27,875 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f97a3dab7f0>)
+[gpua003:0/64] 2023-07-06 04:57:27,875 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpua003:0/64] 2023-07-06 04:57:27,881 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 05:03:01,395 (trainer:732) INFO: 15epoch:train:6601-6700batch: iter_time=1.280, forward_time=0.108, loss_ctc=78.273, loss_att=57.077, acc=0.685, loss=63.436, backward_time=0.759, grad_norm=85.784, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.564e-05, train_time=8.464
+[gpua003:0/64] 2023-07-06 05:04:42,396 (trainer:732) INFO: 15epoch:train:6701-6800batch: iter_time=1.146e-04, forward_time=0.110, loss_ctc=75.552, loss_att=54.344, acc=0.708, loss=60.707, backward_time=0.754, grad_norm=87.661, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.561e-05, train_time=2.020
+[gpua003:0/64] 2023-07-06 05:06:22,783 (trainer:732) INFO: 15epoch:train:6801-6900batch: iter_time=1.151e-04, forward_time=0.109, loss_ctc=87.760, loss_att=56.616, acc=0.702, loss=65.959, backward_time=0.754, grad_norm=109.179, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.557e-05, train_time=2.008
+[gpua003:0/64] 2023-07-06 05:08:02,835 (trainer:732) INFO: 15epoch:train:6901-7000batch: iter_time=1.119e-04, forward_time=0.110, loss_ctc=68.674, loss_att=52.588, acc=0.694, loss=57.414, backward_time=0.753, grad_norm=86.799, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.554e-05, train_time=2.001
+[gpua003:0/64] 2023-07-06 05:09:42,653 (trainer:732) INFO: 15epoch:train:7001-7100batch: iter_time=1.118e-04, forward_time=0.109, loss_ctc=73.621, loss_att=61.316, acc=0.689, loss=65.008, backward_time=0.752, grad_norm=105.693, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.550e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 05:11:22,495 (trainer:732) INFO: 15epoch:train:7101-7200batch: iter_time=1.103e-04, forward_time=0.109, loss_ctc=83.133, loss_att=61.505, acc=0.686, loss=67.994, backward_time=0.752, grad_norm=104.402, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.547e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 05:13:02,248 (trainer:732) INFO: 15epoch:train:7201-7300batch: iter_time=1.110e-04, forward_time=0.109, loss_ctc=81.010, loss_att=64.819, acc=0.692, loss=69.677, backward_time=0.751, grad_norm=110.286, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.543e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 05:14:42,319 (trainer:732) INFO: 15epoch:train:7301-7400batch: iter_time=1.107e-04, forward_time=0.109, loss_ctc=72.689, loss_att=54.067, acc=0.697, loss=59.654, backward_time=0.752, grad_norm=85.938, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.540e-05, train_time=2.001
+[gpua003:0/64] 2023-07-06 05:16:26,124 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpua003:0/64] 2023-07-06 05:16:45,225 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 05:16:48,776 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f884e257fd0>)
+[gpua003:0/64] 2023-07-06 05:16:48,777 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpua003:0/64] 2023-07-06 05:16:48,783 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 05:21:21,795 (trainer:732) INFO: 15epoch:train:7401-7500batch: iter_time=1.282, forward_time=0.110, loss_ctc=75.686, loss_att=56.849, acc=0.702, loss=62.500, backward_time=0.778, grad_norm=88.301, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.536e-05, train_time=7.989
+[gpua003:0/64] 2023-07-06 05:23:04,740 (trainer:732) INFO: 15epoch:train:7501-7600batch: iter_time=1.225e-04, forward_time=0.109, loss_ctc=74.838, loss_att=55.781, acc=0.699, loss=61.498, backward_time=0.759, grad_norm=93.290, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.533e-05, train_time=2.059
+[gpua003:0/64] 2023-07-06 05:24:45,328 (trainer:732) INFO: 15epoch:train:7601-7700batch: iter_time=1.022e-04, forward_time=0.110, loss_ctc=83.499, loss_att=54.322, acc=0.699, loss=63.075, backward_time=0.754, grad_norm=90.328, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.529e-05, train_time=2.012
+[gpua003:0/64] 2023-07-06 05:26:25,161 (trainer:732) INFO: 15epoch:train:7701-7800batch: iter_time=1.134e-04, forward_time=0.109, loss_ctc=71.384, loss_att=51.519, acc=0.703, loss=57.479, backward_time=0.751, grad_norm=86.506, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.526e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 05:28:04,981 (trainer:732) INFO: 15epoch:train:7801-7900batch: iter_time=1.131e-04, forward_time=0.109, loss_ctc=73.904, loss_att=64.610, acc=0.679, loss=67.398, backward_time=0.751, grad_norm=102.632, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.522e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 05:29:44,880 (trainer:732) INFO: 15epoch:train:7901-8000batch: iter_time=1.167e-04, forward_time=0.109, loss_ctc=83.085, loss_att=62.060, acc=0.679, loss=68.368, backward_time=0.753, grad_norm=103.977, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.519e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 05:31:24,806 (trainer:732) INFO: 15epoch:train:8001-8100batch: iter_time=1.085e-04, forward_time=0.109, loss_ctc=80.413, loss_att=61.210, acc=0.698, loss=66.971, backward_time=0.752, grad_norm=96.822, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.516e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 05:33:06,003 (trainer:732) INFO: 15epoch:train:8101-8200batch: iter_time=9.574e-05, forward_time=0.109, loss_ctc=75.570, loss_att=55.287, acc=0.694, loss=61.372, backward_time=0.753, grad_norm=88.815, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.512e-05, train_time=2.024
+[gpua003:0/64] 2023-07-06 05:34:48,433 (trainer:732) INFO: 15epoch:train:8201-8300batch: iter_time=1.065e-04, forward_time=0.110, loss_ctc=73.115, loss_att=57.381, acc=0.707, loss=62.101, backward_time=0.755, grad_norm=84.564, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.509e-05, train_time=2.048
+[gpua003:0/64] 2023-07-06 05:35:22,627 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpua003:0/64] 2023-07-06 05:35:42,157 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 05:35:45,995 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f884e29b7c0>)
+[gpua003:0/64] 2023-07-06 05:35:45,995 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpua003:0/64] 2023-07-06 05:35:46,001 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 05:40:45,752 (trainer:732) INFO: 15epoch:train:8301-8400batch: iter_time=1.326, forward_time=0.109, loss_ctc=77.487, loss_att=56.217, acc=0.681, loss=62.598, backward_time=0.787, grad_norm=82.284, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.505e-05, train_time=7.146
+[gpua003:0/64] 2023-07-06 05:42:30,027 (trainer:732) INFO: 15epoch:train:8401-8500batch: iter_time=1.096e-04, forward_time=0.109, loss_ctc=81.840, loss_att=60.268, acc=0.694, loss=66.740, backward_time=0.763, grad_norm=98.195, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.502e-05, train_time=2.085
+[gpua003:0/64] 2023-07-06 05:44:15,543 (trainer:732) INFO: 15epoch:train:8501-8600batch: iter_time=1.047e-04, forward_time=0.108, loss_ctc=78.760, loss_att=51.979, acc=0.691, loss=60.013, backward_time=0.768, grad_norm=92.124, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.498e-05, train_time=2.110
+[gpua003:0/64] 2023-07-06 05:46:05,064 (trainer:732) INFO: 15epoch:train:8601-8700batch: iter_time=1.001e-04, forward_time=0.108, loss_ctc=70.996, loss_att=57.564, acc=0.676, loss=61.593, backward_time=0.777, grad_norm=91.103, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.495e-05, train_time=2.190
+[gpua003:0/64] 2023-07-06 05:47:51,510 (trainer:732) INFO: 15epoch:train:8701-8800batch: iter_time=1.110e-04, forward_time=0.108, loss_ctc=80.422, loss_att=66.412, acc=0.672, loss=70.615, backward_time=0.757, grad_norm=94.400, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.491e-05, train_time=2.129
+[gpua003:0/64] 2023-07-06 05:49:31,215 (trainer:732) INFO: 15epoch:train:8801-8900batch: iter_time=1.153e-04, forward_time=0.108, loss_ctc=78.907, loss_att=60.674, acc=0.693, loss=66.144, backward_time=0.750, grad_norm=89.498, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.488e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 05:51:11,993 (trainer:732) INFO: 15epoch:train:8901-9000batch: iter_time=1.134e-04, forward_time=0.108, loss_ctc=76.299, loss_att=56.694, acc=0.694, loss=62.575, backward_time=0.751, grad_norm=82.090, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.485e-05, train_time=2.015
+[gpua003:0/64] 2023-07-06 05:52:52,430 (trainer:732) INFO: 15epoch:train:9001-9100batch: iter_time=1.122e-04, forward_time=0.108, loss_ctc=74.360, loss_att=57.538, acc=0.689, loss=62.585, backward_time=0.750, grad_norm=87.996, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.481e-05, train_time=2.009
+[gpua003:0/64] 2023-07-06 05:54:01,666 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpua003:0/64] 2023-07-06 05:54:20,653 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 05:54:24,452 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f95205e3ee0>)
+[gpua003:0/64] 2023-07-06 05:54:24,452 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpua003:0/64] 2023-07-06 05:54:24,459 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 05:58:56,052 (trainer:732) INFO: 15epoch:train:9101-9200batch: iter_time=1.330, forward_time=0.135, loss_ctc=76.652, loss_att=55.319, acc=0.689, loss=61.719, backward_time=0.760, grad_norm=94.798, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.478e-05, train_time=7.272
+[gpua003:0/64] 2023-07-06 06:00:39,277 (trainer:732) INFO: 15epoch:train:9201-9300batch: iter_time=9.552e-05, forward_time=0.111, loss_ctc=77.373, loss_att=56.510, acc=0.706, loss=62.769, backward_time=0.757, grad_norm=98.925, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.474e-05, train_time=2.064
+[gpua003:0/64] 2023-07-06 06:02:26,540 (trainer:732) INFO: 15epoch:train:9301-9400batch: iter_time=1.017e-04, forward_time=0.109, loss_ctc=86.609, loss_att=55.256, acc=0.704, loss=64.662, backward_time=0.763, grad_norm=94.931, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.471e-05, train_time=2.145
+[gpua003:0/64] 2023-07-06 06:04:09,380 (trainer:732) INFO: 15epoch:train:9401-9500batch: iter_time=1.045e-04, forward_time=0.109, loss_ctc=69.832, loss_att=52.386, acc=0.693, loss=57.620, backward_time=0.753, grad_norm=89.310, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.468e-05, train_time=2.057
+[gpua003:0/64] 2023-07-06 06:05:51,252 (trainer:732) INFO: 15epoch:train:9501-9600batch: iter_time=1.037e-04, forward_time=0.109, loss_ctc=73.533, loss_att=62.841, acc=0.688, loss=66.049, backward_time=0.751, grad_norm=93.939, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.464e-05, train_time=2.037
+[gpua003:0/64] 2023-07-06 06:07:41,819 (trainer:732) INFO: 15epoch:train:9601-9700batch: iter_time=1.076e-04, forward_time=0.108, loss_ctc=83.150, loss_att=60.639, acc=0.687, loss=67.393, backward_time=0.774, grad_norm=94.514, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.461e-05, train_time=2.211
+[gpua003:0/64] 2023-07-06 06:09:23,340 (trainer:732) INFO: 15epoch:train:9701-9800batch: iter_time=1.286e-04, forward_time=0.108, loss_ctc=81.189, loss_att=65.286, acc=0.694, loss=70.057, backward_time=0.752, grad_norm=92.569, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.457e-05, train_time=2.030
+[gpua003:0/64] 2023-07-06 06:11:03,236 (trainer:732) INFO: 15epoch:train:9801-9900batch: iter_time=1.122e-04, forward_time=0.107, loss_ctc=72.362, loss_att=53.624, acc=0.694, loss=59.246, backward_time=0.751, grad_norm=87.692, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.111, optim0_lr0=9.454e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 06:12:42,895 (trainer:732) INFO: 15epoch:train:9901-10000batch: iter_time=1.061e-04, forward_time=0.107, loss_ctc=75.625, loss_att=55.477, acc=0.704, loss=61.522, backward_time=0.750, grad_norm=86.254, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.451e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 06:24:28,681 (trainer:338) INFO: 15epoch results: [train] iter_time=0.185, forward_time=0.110, loss_ctc=78.489, loss_att=58.649, acc=0.688, loss=64.601, backward_time=0.756, grad_norm=93.007, clip=100.000, loss_scale=7.318e+14, optim_step_time=0.112, optim0_lr0=9.624e-05, train_time=2.644, time=3 hours, 40 minutes and 32.98 seconds, total_count=120000, gpu_max_cached_mem_GB=37.770, [valid] loss_ctc=52.634, cer_ctc=0.298, loss_att=43.555, acc=0.657, cer=0.377, wer=0.991, loss=46.279, time=5 minutes and 28.62 seconds, total_count=12650, gpu_max_cached_mem_GB=37.770, [att_plot] time=6 minutes and 8.01 seconds, total_count=0, gpu_max_cached_mem_GB=37.770
+[gpua003:0/64] 2023-07-06 06:24:45,984 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count
+[gpua003:0/64] 2023-07-06 06:24:45,990 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/9epoch.pth, exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/10epoch.pth
+[gpua003:0/64] 2023-07-06 06:24:46,016 (trainer:272) INFO: 16/100epoch started. Estimated time to finish: 1 week, 6 days and 16 hours
+[gpua003:0/64] 2023-07-06 06:24:46,836 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua003:0/64] 2023-07-06 06:25:05,794 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 06:25:10,550 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8f2255bbb0>)
+[gpua003:0/64] 2023-07-06 06:25:10,550 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpua003:0/64] 2023-07-06 06:25:10,617 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 06:31:07,295 (trainer:732) INFO: 16epoch:train:1-100batch: iter_time=2.752, forward_time=0.134, loss_ctc=75.117, loss_att=48.716, acc=0.689, loss=56.636, backward_time=0.771, grad_norm=101.201, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.116, optim0_lr0=9.447e-05, train_time=7.615
+[gpua003:0/64] 2023-07-06 06:32:48,088 (trainer:732) INFO: 16epoch:train:101-200batch: iter_time=1.138e-04, forward_time=0.108, loss_ctc=74.028, loss_att=52.520, acc=0.697, loss=58.972, backward_time=0.754, grad_norm=94.459, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.444e-05, train_time=2.019
+[gpua003:0/64] 2023-07-06 06:34:29,724 (trainer:732) INFO: 16epoch:train:201-300batch: iter_time=1.056e-04, forward_time=0.109, loss_ctc=64.990, loss_att=51.033, acc=0.694, loss=55.220, backward_time=0.752, grad_norm=82.414, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.441e-05, train_time=2.033
+[gpua003:0/64] 2023-07-06 06:36:17,878 (trainer:732) INFO: 16epoch:train:301-400batch: iter_time=1.070e-04, forward_time=0.108, loss_ctc=76.120, loss_att=59.374, acc=0.683, loss=64.398, backward_time=0.760, grad_norm=93.942, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.437e-05, train_time=2.163
+[gpua003:0/64] 2023-07-06 06:38:00,826 (trainer:732) INFO: 16epoch:train:401-500batch: iter_time=1.101e-04, forward_time=0.107, loss_ctc=67.187, loss_att=49.787, acc=0.695, loss=55.007, backward_time=0.754, grad_norm=82.026, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.434e-05, train_time=2.059
+[gpua003:0/64] 2023-07-06 06:39:53,617 (trainer:732) INFO: 16epoch:train:501-600batch: iter_time=1.038e-04, forward_time=0.107, loss_ctc=81.351, loss_att=59.096, acc=0.689, loss=65.772, backward_time=0.772, grad_norm=90.956, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.431e-05, train_time=2.256
+[gpua003:0/64] 2023-07-06 06:41:43,234 (trainer:732) INFO: 16epoch:train:601-700batch: iter_time=9.752e-05, forward_time=0.108, loss_ctc=87.613, loss_att=64.737, acc=0.688, loss=71.600, backward_time=0.776, grad_norm=130.042, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.427e-05, train_time=2.192
+[gpua003:0/64] 2023-07-06 06:43:33,517 (trainer:732) INFO: 16epoch:train:701-800batch: iter_time=1.003e-04, forward_time=0.109, loss_ctc=72.526, loss_att=59.461, acc=0.682, loss=63.381, backward_time=0.768, grad_norm=92.190, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.424e-05, train_time=2.205
+[gpua003:0/64] 2023-07-06 06:44:14,263 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpua003:0/64] 2023-07-06 06:44:33,081 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 06:44:36,840 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f950ddea110>)
+[gpua003:0/64] 2023-07-06 06:44:36,840 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpua003:0/64] 2023-07-06 06:44:36,846 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 06:48:21,733 (trainer:732) INFO: 16epoch:train:801-900batch: iter_time=1.321, forward_time=0.107, loss_ctc=80.667, loss_att=58.981, acc=0.688, loss=65.487, backward_time=0.768, grad_norm=96.967, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.420e-05, train_time=5.764
+[gpua003:0/64] 2023-07-06 06:50:06,977 (trainer:732) INFO: 16epoch:train:901-1000batch: iter_time=9.995e-05, forward_time=0.107, loss_ctc=71.004, loss_att=50.281, acc=0.699, loss=56.498, backward_time=0.758, grad_norm=96.342, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.417e-05, train_time=2.105
+[gpua003:0/64] 2023-07-06 06:51:47,247 (trainer:732) INFO: 16epoch:train:1001-1100batch: iter_time=1.007e-04, forward_time=0.107, loss_ctc=62.698, loss_att=47.891, acc=0.697, loss=52.333, backward_time=0.752, grad_norm=74.518, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.414e-05, train_time=2.005
+[gpua003:0/64] 2023-07-06 06:53:27,011 (trainer:732) INFO: 16epoch:train:1101-1200batch: iter_time=1.048e-04, forward_time=0.108, loss_ctc=75.410, loss_att=57.557, acc=0.687, loss=62.913, backward_time=0.751, grad_norm=83.906, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.410e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 06:55:06,788 (trainer:732) INFO: 16epoch:train:1201-1300batch: iter_time=1.035e-04, forward_time=0.107, loss_ctc=64.005, loss_att=47.055, acc=0.700, loss=52.140, backward_time=0.751, grad_norm=79.784, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.407e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 06:56:46,657 (trainer:732) INFO: 16epoch:train:1301-1400batch: iter_time=9.661e-05, forward_time=0.107, loss_ctc=81.759, loss_att=59.701, acc=0.678, loss=66.319, backward_time=0.751, grad_norm=92.222, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.404e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 06:58:26,430 (trainer:732) INFO: 16epoch:train:1401-1500batch: iter_time=9.571e-05, forward_time=0.107, loss_ctc=82.212, loss_att=60.520, acc=0.699, loss=67.027, backward_time=0.752, grad_norm=85.514, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.400e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 07:00:06,377 (trainer:732) INFO: 16epoch:train:1501-1600batch: iter_time=9.767e-05, forward_time=0.108, loss_ctc=70.485, loss_att=56.946, acc=0.687, loss=61.008, backward_time=0.752, grad_norm=93.511, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.397e-05, train_time=1.999
+[gpua003:0/64] 2023-07-06 07:01:14,601 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpua003:0/64] 2023-07-06 07:01:33,950 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 07:01:37,876 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f89e698e350>)
+[gpua003:0/64] 2023-07-06 07:01:37,876 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpua003:0/64] 2023-07-06 07:01:37,882 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 07:05:32,440 (trainer:732) INFO: 16epoch:train:1601-1700batch: iter_time=1.290, forward_time=0.108, loss_ctc=76.855, loss_att=60.487, acc=0.696, loss=65.397, backward_time=0.761, grad_norm=104.000, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.394e-05, train_time=6.521
+[gpua003:0/64] 2023-07-06 07:07:12,660 (trainer:732) INFO: 16epoch:train:1701-1800batch: iter_time=9.317e-05, forward_time=0.107, loss_ctc=68.609, loss_att=46.164, acc=0.700, loss=52.897, backward_time=0.753, grad_norm=92.800, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.391e-05, train_time=2.004
+[gpua003:0/64] 2023-07-06 07:08:52,753 (trainer:732) INFO: 16epoch:train:1801-1900batch: iter_time=8.951e-05, forward_time=0.107, loss_ctc=67.542, loss_att=52.449, acc=0.694, loss=56.977, backward_time=0.752, grad_norm=85.811, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.112, optim0_lr0=9.387e-05, train_time=2.002
+[gpua003:0/64] 2023-07-06 07:10:32,527 (trainer:732) INFO: 16epoch:train:1901-2000batch: iter_time=8.930e-05, forward_time=0.107, loss_ctc=69.790, loss_att=55.626, acc=0.680, loss=59.875, backward_time=0.751, grad_norm=78.275, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.112, optim0_lr0=9.384e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 07:12:12,369 (trainer:732) INFO: 16epoch:train:2001-2100batch: iter_time=1.018e-04, forward_time=0.108, loss_ctc=69.942, loss_att=52.418, acc=0.688, loss=57.675, backward_time=0.752, grad_norm=83.432, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.381e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 07:13:52,056 (trainer:732) INFO: 16epoch:train:2101-2200batch: iter_time=1.072e-04, forward_time=0.108, loss_ctc=66.694, loss_att=50.173, acc=0.698, loss=55.129, backward_time=0.752, grad_norm=74.273, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.377e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 07:15:31,827 (trainer:732) INFO: 16epoch:train:2201-2300batch: iter_time=1.030e-04, forward_time=0.109, loss_ctc=86.699, loss_att=65.479, acc=0.678, loss=71.845, backward_time=0.753, grad_norm=112.678, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.374e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 07:17:11,502 (trainer:732) INFO: 16epoch:train:2301-2400batch: iter_time=1.009e-04, forward_time=0.109, loss_ctc=78.962, loss_att=61.486, acc=0.676, loss=66.729, backward_time=0.752, grad_norm=92.485, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.371e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 07:18:51,509 (trainer:732) INFO: 16epoch:train:2401-2500batch: iter_time=1.015e-04, forward_time=0.110, loss_ctc=75.890, loss_att=62.591, acc=0.690, loss=66.581, backward_time=0.754, grad_norm=93.157, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.367e-05, train_time=2.000
+[gpua003:0/64] 2023-07-06 07:18:52,779 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpua003:0/64] 2023-07-06 07:19:12,128 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 07:19:15,913 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f944a257d60>)
+[gpua003:0/64] 2023-07-06 07:19:15,913 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpua003:0/64] 2023-07-06 07:19:15,919 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 07:24:48,132 (trainer:732) INFO: 16epoch:train:2501-2600batch: iter_time=1.279, forward_time=0.108, loss_ctc=71.726, loss_att=46.606, acc=0.697, loss=54.142, backward_time=0.767, grad_norm=105.412, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.364e-05, train_time=7.132
+[gpua003:0/64] 2023-07-06 07:26:27,825 (trainer:732) INFO: 16epoch:train:2601-2700batch: iter_time=1.075e-04, forward_time=0.108, loss_ctc=66.631, loss_att=51.594, acc=0.698, loss=56.105, backward_time=0.752, grad_norm=90.533, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.361e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 07:28:07,530 (trainer:732) INFO: 16epoch:train:2701-2800batch: iter_time=9.716e-05, forward_time=0.108, loss_ctc=70.966, loss_att=55.570, acc=0.680, loss=60.189, backward_time=0.752, grad_norm=86.466, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.358e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 07:29:47,312 (trainer:732) INFO: 16epoch:train:2801-2900batch: iter_time=1.072e-04, forward_time=0.108, loss_ctc=71.097, loss_att=53.450, acc=0.675, loss=58.744, backward_time=0.753, grad_norm=93.208, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.354e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 07:31:27,133 (trainer:732) INFO: 16epoch:train:2901-3000batch: iter_time=8.955e-05, forward_time=0.108, loss_ctc=63.821, loss_att=46.903, acc=0.701, loss=51.979, backward_time=0.752, grad_norm=74.418, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.351e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 07:33:06,795 (trainer:732) INFO: 16epoch:train:3001-3100batch: iter_time=9.687e-05, forward_time=0.108, loss_ctc=82.866, loss_att=60.932, acc=0.685, loss=67.512, backward_time=0.752, grad_norm=97.470, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.348e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 07:34:46,471 (trainer:732) INFO: 16epoch:train:3101-3200batch: iter_time=1.035e-04, forward_time=0.108, loss_ctc=82.047, loss_att=62.386, acc=0.681, loss=68.285, backward_time=0.752, grad_norm=94.406, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.344e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 07:36:26,118 (trainer:732) INFO: 16epoch:train:3201-3300batch: iter_time=9.042e-05, forward_time=0.108, loss_ctc=77.669, loss_att=65.484, acc=0.671, loss=69.140, backward_time=0.752, grad_norm=91.421, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.341e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 07:37:01,224 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpua003:0/64] 2023-07-06 07:37:20,629 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 07:37:24,141 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f97a3d1e7d0>)
+[gpua003:0/64] 2023-07-06 07:37:24,141 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpua003:0/64] 2023-07-06 07:37:24,147 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 07:41:53,320 (trainer:732) INFO: 16epoch:train:3301-3400batch: iter_time=2.180, forward_time=0.108, loss_ctc=78.290, loss_att=55.680, acc=0.688, loss=62.463, backward_time=0.766, grad_norm=91.637, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.338e-05, train_time=6.544
+[gpua003:0/64] 2023-07-06 07:43:33,605 (trainer:732) INFO: 16epoch:train:3401-3500batch: iter_time=1.037e-04, forward_time=0.107, loss_ctc=68.344, loss_att=49.906, acc=0.712, loss=55.438, backward_time=0.754, grad_norm=81.948, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.112, optim0_lr0=9.335e-05, train_time=2.005
+[gpua003:0/64] 2023-07-06 07:45:13,347 (trainer:732) INFO: 16epoch:train:3501-3600batch: iter_time=9.421e-05, forward_time=0.108, loss_ctc=65.417, loss_att=50.435, acc=0.688, loss=54.930, backward_time=0.752, grad_norm=90.718, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.331e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 07:46:53,165 (trainer:732) INFO: 16epoch:train:3601-3700batch: iter_time=1.013e-04, forward_time=0.109, loss_ctc=70.603, loss_att=51.998, acc=0.686, loss=57.579, backward_time=0.751, grad_norm=85.036, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.328e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 07:48:33,178 (trainer:732) INFO: 16epoch:train:3701-3800batch: iter_time=1.098e-04, forward_time=0.109, loss_ctc=65.655, loss_att=51.080, acc=0.697, loss=55.453, backward_time=0.753, grad_norm=88.931, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.325e-05, train_time=2.000
+[gpua003:0/64] 2023-07-06 07:50:12,906 (trainer:732) INFO: 16epoch:train:3801-3900batch: iter_time=9.838e-05, forward_time=0.108, loss_ctc=74.297, loss_att=55.407, acc=0.684, loss=61.074, backward_time=0.751, grad_norm=82.975, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.322e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 07:51:56,731 (trainer:732) INFO: 16epoch:train:3901-4000batch: iter_time=9.921e-05, forward_time=0.109, loss_ctc=84.998, loss_att=61.759, acc=0.691, loss=68.731, backward_time=0.760, grad_norm=94.665, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.318e-05, train_time=2.076
+[gpua003:0/64] 2023-07-06 07:53:42,883 (trainer:732) INFO: 16epoch:train:4001-4100batch: iter_time=1.143e-04, forward_time=0.109, loss_ctc=75.067, loss_att=62.875, acc=0.670, loss=66.533, backward_time=0.758, grad_norm=91.761, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.315e-05, train_time=2.123
+[gpua003:0/64] 2023-07-06 07:54:50,953 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpua003:0/64] 2023-07-06 07:55:10,233 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 07:55:13,721 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f97a3d44dc0>)
+[gpua003:0/64] 2023-07-06 07:55:13,721 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpua003:0/64] 2023-07-06 07:55:13,728 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 07:58:55,908 (trainer:732) INFO: 16epoch:train:4101-4200batch: iter_time=2.076, forward_time=0.141, loss_ctc=74.642, loss_att=55.845, acc=0.693, loss=61.484, backward_time=0.761, grad_norm=97.003, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.114, optim0_lr0=9.312e-05, train_time=6.260
+[gpua003:0/64] 2023-07-06 08:00:37,112 (trainer:732) INFO: 16epoch:train:4201-4300batch: iter_time=1.039e-04, forward_time=0.107, loss_ctc=67.813, loss_att=44.642, acc=0.709, loss=51.593, backward_time=0.754, grad_norm=88.191, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.111, optim0_lr0=9.309e-05, train_time=2.024
+[gpua003:0/64] 2023-07-06 08:02:16,904 (trainer:732) INFO: 16epoch:train:4301-4400batch: iter_time=1.034e-04, forward_time=0.108, loss_ctc=68.215, loss_att=52.130, acc=0.696, loss=56.955, backward_time=0.752, grad_norm=94.395, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.306e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 08:03:56,527 (trainer:732) INFO: 16epoch:train:4401-4500batch: iter_time=9.344e-05, forward_time=0.106, loss_ctc=69.906, loss_att=54.891, acc=0.683, loss=59.395, backward_time=0.751, grad_norm=83.039, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.302e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 08:05:37,831 (trainer:732) INFO: 16epoch:train:4501-4600batch: iter_time=1.004e-04, forward_time=0.108, loss_ctc=68.748, loss_att=51.394, acc=0.689, loss=56.600, backward_time=0.755, grad_norm=93.992, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.299e-05, train_time=2.026
+[gpua003:0/64] 2023-07-06 08:07:17,508 (trainer:732) INFO: 16epoch:train:4601-4700batch: iter_time=1.059e-04, forward_time=0.107, loss_ctc=65.443, loss_att=48.597, acc=0.702, loss=53.651, backward_time=0.752, grad_norm=111.503, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.296e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 08:08:57,106 (trainer:732) INFO: 16epoch:train:4701-4800batch: iter_time=1.018e-04, forward_time=0.107, loss_ctc=84.020, loss_att=64.596, acc=0.679, loss=70.423, backward_time=0.751, grad_norm=127.671, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.293e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 08:10:36,976 (trainer:732) INFO: 16epoch:train:4801-4900batch: iter_time=1.060e-04, forward_time=0.108, loss_ctc=76.921, loss_att=58.835, acc=0.684, loss=64.261, backward_time=0.752, grad_norm=114.068, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.289e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 08:12:16,723 (trainer:732) INFO: 16epoch:train:4901-5000batch: iter_time=1.049e-04, forward_time=0.109, loss_ctc=74.951, loss_att=61.324, acc=0.694, loss=65.412, backward_time=0.751, grad_norm=92.245, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.286e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 08:12:18,610 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpua003:0/64] 2023-07-06 08:12:37,768 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 08:12:41,328 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8a02da2f20>)
+[gpua003:0/64] 2023-07-06 08:12:41,328 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpua003:0/64] 2023-07-06 08:12:41,335 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 08:18:49,163 (trainer:732) INFO: 16epoch:train:5001-5100batch: iter_time=1.303, forward_time=0.116, loss_ctc=72.397, loss_att=47.284, acc=0.706, loss=54.818, backward_time=0.768, grad_norm=87.352, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.283e-05, train_time=7.848
+[gpua003:0/64] 2023-07-06 08:20:28,927 (trainer:732) INFO: 16epoch:train:5101-5200batch: iter_time=1.050e-04, forward_time=0.109, loss_ctc=69.999, loss_att=51.235, acc=0.705, loss=56.864, backward_time=0.750, grad_norm=90.609, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.280e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 08:22:09,028 (trainer:732) INFO: 16epoch:train:5201-5300batch: iter_time=8.618e-05, forward_time=0.109, loss_ctc=62.913, loss_att=49.293, acc=0.704, loss=53.379, backward_time=0.753, grad_norm=77.097, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.277e-05, train_time=2.002
+[gpua003:0/64] 2023-07-06 08:23:48,933 (trainer:732) INFO: 16epoch:train:5301-5400batch: iter_time=8.628e-05, forward_time=0.109, loss_ctc=72.795, loss_att=55.439, acc=0.694, loss=60.646, backward_time=0.752, grad_norm=88.288, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.273e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 08:25:28,752 (trainer:732) INFO: 16epoch:train:5401-5500batch: iter_time=9.338e-05, forward_time=0.109, loss_ctc=64.718, loss_att=47.439, acc=0.702, loss=52.623, backward_time=0.752, grad_norm=82.629, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.270e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 08:27:08,812 (trainer:732) INFO: 16epoch:train:5501-5600batch: iter_time=8.449e-05, forward_time=0.108, loss_ctc=78.843, loss_att=57.892, acc=0.696, loss=64.177, backward_time=0.754, grad_norm=92.290, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.267e-05, train_time=2.001
+[gpua003:0/64] 2023-07-06 08:28:55,560 (trainer:732) INFO: 16epoch:train:5601-5700batch: iter_time=8.973e-05, forward_time=0.107, loss_ctc=83.966, loss_att=62.853, acc=0.700, loss=69.187, backward_time=0.778, grad_norm=96.335, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.264e-05, train_time=2.135
+[gpua003:0/64] 2023-07-06 08:30:43,294 (trainer:732) INFO: 16epoch:train:5701-5800batch: iter_time=8.613e-05, forward_time=0.108, loss_ctc=69.962, loss_att=57.568, acc=0.690, loss=61.286, backward_time=0.762, grad_norm=89.847, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.261e-05, train_time=2.154
+[gpua003:0/64] 2023-07-06 08:31:17,439 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpua003:0/64] 2023-07-06 08:31:36,652 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 08:31:40,135 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8a3b503fd0>)
+[gpua003:0/64] 2023-07-06 08:31:40,136 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpua003:0/64] 2023-07-06 08:31:40,142 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 08:35:45,247 (trainer:732) INFO: 16epoch:train:5801-5900batch: iter_time=1.276, forward_time=0.107, loss_ctc=80.360, loss_att=57.972, acc=0.693, loss=64.689, backward_time=0.765, grad_norm=106.653, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.258e-05, train_time=6.039
+[gpua003:0/64] 2023-07-06 08:37:25,600 (trainer:732) INFO: 16epoch:train:5901-6000batch: iter_time=9.151e-05, forward_time=0.107, loss_ctc=69.337, loss_att=47.625, acc=0.723, loss=54.139, backward_time=0.753, grad_norm=82.017, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.254e-05, train_time=2.007
+[gpua003:0/64] 2023-07-06 08:39:05,439 (trainer:732) INFO: 16epoch:train:6001-6100batch: iter_time=9.170e-05, forward_time=0.107, loss_ctc=65.149, loss_att=50.375, acc=0.696, loss=54.807, backward_time=0.752, grad_norm=92.310, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.251e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 08:40:45,317 (trainer:732) INFO: 16epoch:train:6101-6200batch: iter_time=1.040e-04, forward_time=0.107, loss_ctc=70.697, loss_att=51.205, acc=0.700, loss=57.052, backward_time=0.753, grad_norm=86.258, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.248e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 08:42:24,960 (trainer:732) INFO: 16epoch:train:6201-6300batch: iter_time=9.827e-05, forward_time=0.108, loss_ctc=64.757, loss_att=50.584, acc=0.702, loss=54.836, backward_time=0.751, grad_norm=79.471, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.245e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 08:44:04,631 (trainer:732) INFO: 16epoch:train:6301-6400batch: iter_time=1.131e-04, forward_time=0.109, loss_ctc=72.490, loss_att=54.788, acc=0.696, loss=60.098, backward_time=0.751, grad_norm=92.689, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.242e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 08:45:44,133 (trainer:732) INFO: 16epoch:train:6401-6500batch: iter_time=1.081e-04, forward_time=0.108, loss_ctc=85.155, loss_att=58.768, acc=0.706, loss=66.684, backward_time=0.749, grad_norm=98.779, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.239e-05, train_time=1.990
+[gpua003:0/64] 2023-07-06 08:47:23,850 (trainer:732) INFO: 16epoch:train:6501-6600batch: iter_time=1.083e-04, forward_time=0.109, loss_ctc=73.711, loss_att=60.604, acc=0.688, loss=64.536, backward_time=0.751, grad_norm=84.467, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.235e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 08:48:30,857 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpua003:0/64] 2023-07-06 08:48:50,019 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 08:48:53,511 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f884cdcfc40>)
+[gpua003:0/64] 2023-07-06 08:48:53,512 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpua003:0/64] 2023-07-06 08:48:53,518 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 08:53:20,677 (trainer:732) INFO: 16epoch:train:6601-6700batch: iter_time=1.289, forward_time=0.108, loss_ctc=74.070, loss_att=54.401, acc=0.697, loss=60.302, backward_time=0.761, grad_norm=86.767, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.232e-05, train_time=7.136
+[gpua003:0/64] 2023-07-06 08:55:01,310 (trainer:732) INFO: 16epoch:train:6701-6800batch: iter_time=1.138e-04, forward_time=0.107, loss_ctc=71.082, loss_att=51.771, acc=0.709, loss=57.564, backward_time=0.755, grad_norm=90.309, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.229e-05, train_time=2.012
+[gpua003:0/64] 2023-07-06 08:56:41,532 (trainer:732) INFO: 16epoch:train:6801-6900batch: iter_time=1.140e-04, forward_time=0.108, loss_ctc=65.828, loss_att=50.568, acc=0.693, loss=55.146, backward_time=0.752, grad_norm=92.849, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.226e-05, train_time=2.004
+[gpua003:0/64] 2023-07-06 08:58:21,807 (trainer:732) INFO: 16epoch:train:6901-7000batch: iter_time=1.105e-04, forward_time=0.107, loss_ctc=69.044, loss_att=51.957, acc=0.681, loss=57.083, backward_time=0.751, grad_norm=85.863, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.223e-05, train_time=2.005
+[gpua003:0/64] 2023-07-06 09:00:01,383 (trainer:732) INFO: 16epoch:train:7001-7100batch: iter_time=1.136e-04, forward_time=0.108, loss_ctc=66.173, loss_att=51.594, acc=0.692, loss=55.968, backward_time=0.751, grad_norm=107.161, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.220e-05, train_time=1.991
+[gpua003:0/64] 2023-07-06 09:01:41,306 (trainer:732) INFO: 16epoch:train:7101-7200batch: iter_time=1.196e-04, forward_time=0.108, loss_ctc=68.998, loss_att=50.099, acc=0.697, loss=55.769, backward_time=0.751, grad_norm=84.671, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.217e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 09:03:21,067 (trainer:732) INFO: 16epoch:train:7201-7300batch: iter_time=1.122e-04, forward_time=0.108, loss_ctc=83.622, loss_att=62.831, acc=0.690, loss=69.069, backward_time=0.751, grad_norm=94.437, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.213e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 09:05:00,740 (trainer:732) INFO: 16epoch:train:7301-7400batch: iter_time=1.076e-04, forward_time=0.108, loss_ctc=76.063, loss_att=61.450, acc=0.677, loss=65.834, backward_time=0.751, grad_norm=89.194, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.210e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 09:06:40,479 (trainer:732) INFO: 16epoch:train:7401-7500batch: iter_time=1.026e-04, forward_time=0.108, loss_ctc=74.348, loss_att=63.331, acc=0.691, loss=66.636, backward_time=0.752, grad_norm=94.746, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.207e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 09:06:50,888 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpua003:0/64] 2023-07-06 09:07:09,872 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 09:07:13,343 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8a390f3580>)
+[gpua003:0/64] 2023-07-06 09:07:13,343 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpua003:0/64] 2023-07-06 09:07:13,350 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 09:12:31,328 (trainer:732) INFO: 16epoch:train:7501-7600batch: iter_time=1.366, forward_time=0.109, loss_ctc=70.718, loss_att=46.675, acc=0.709, loss=53.888, backward_time=0.765, grad_norm=84.044, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.204e-05, train_time=7.017
+[gpua003:0/64] 2023-07-06 09:14:11,524 (trainer:732) INFO: 16epoch:train:7601-7700batch: iter_time=1.096e-04, forward_time=0.109, loss_ctc=71.556, loss_att=49.715, acc=0.711, loss=56.267, backward_time=0.752, grad_norm=84.595, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.201e-05, train_time=2.004
+[gpua003:0/64] 2023-07-06 09:15:55,971 (trainer:732) INFO: 16epoch:train:7701-7800batch: iter_time=1.048e-04, forward_time=0.110, loss_ctc=63.324, loss_att=50.467, acc=0.700, loss=54.324, backward_time=0.762, grad_norm=79.834, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.198e-05, train_time=2.089
+[gpua003:0/64] 2023-07-06 09:17:38,069 (trainer:732) INFO: 16epoch:train:7801-7900batch: iter_time=1.107e-04, forward_time=0.109, loss_ctc=74.072, loss_att=55.493, acc=0.692, loss=61.067, backward_time=0.764, grad_norm=89.346, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.195e-05, train_time=2.042
+[gpua003:0/64] 2023-07-06 09:19:24,407 (trainer:732) INFO: 16epoch:train:7901-8000batch: iter_time=1.124e-04, forward_time=0.109, loss_ctc=63.418, loss_att=47.702, acc=0.707, loss=52.417, backward_time=0.766, grad_norm=74.133, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.192e-05, train_time=2.127
+[gpua003:0/64] 2023-07-06 09:21:04,320 (trainer:732) INFO: 16epoch:train:8001-8100batch: iter_time=1.101e-04, forward_time=0.109, loss_ctc=80.173, loss_att=56.871, acc=0.698, loss=63.862, backward_time=0.751, grad_norm=90.058, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.189e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 09:22:51,435 (trainer:732) INFO: 16epoch:train:8101-8200batch: iter_time=1.128e-04, forward_time=0.109, loss_ctc=82.878, loss_att=61.648, acc=0.703, loss=68.017, backward_time=0.757, grad_norm=96.301, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.185e-05, train_time=2.142
+[gpua003:0/64] 2023-07-06 09:24:32,016 (trainer:732) INFO: 16epoch:train:8201-8300batch: iter_time=1.174e-04, forward_time=0.109, loss_ctc=70.061, loss_att=57.659, acc=0.693, loss=61.380, backward_time=0.750, grad_norm=90.693, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.182e-05, train_time=2.011
+[gpua003:0/64] 2023-07-06 09:25:08,632 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpua003:0/64] 2023-07-06 09:25:27,808 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 09:25:31,292 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8844ad3850>)
+[gpua003:0/64] 2023-07-06 09:25:31,292 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpua003:0/64] 2023-07-06 09:25:31,298 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 09:30:43,049 (trainer:732) INFO: 16epoch:train:8301-8400batch: iter_time=1.323, forward_time=0.109, loss_ctc=78.228, loss_att=55.459, acc=0.696, loss=62.290, backward_time=0.766, grad_norm=102.805, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.179e-05, train_time=7.420
+[gpua003:0/64] 2023-07-06 09:32:24,140 (trainer:732) INFO: 16epoch:train:8401-8500batch: iter_time=1.051e-04, forward_time=0.107, loss_ctc=65.900, loss_att=49.152, acc=0.721, loss=54.176, backward_time=0.753, grad_norm=86.699, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.176e-05, train_time=2.022
+[gpua003:0/64] 2023-07-06 09:34:03,912 (trainer:732) INFO: 16epoch:train:8501-8600batch: iter_time=9.517e-05, forward_time=0.107, loss_ctc=64.248, loss_att=49.488, acc=0.696, loss=53.916, backward_time=0.752, grad_norm=92.458, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.173e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 09:35:44,278 (trainer:732) INFO: 16epoch:train:8601-8700batch: iter_time=1.134e-04, forward_time=0.109, loss_ctc=68.813, loss_att=50.173, acc=0.693, loss=55.765, backward_time=0.753, grad_norm=79.140, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.170e-05, train_time=2.007
+[gpua003:0/64] 2023-07-06 09:37:24,339 (trainer:732) INFO: 16epoch:train:8701-8800batch: iter_time=1.014e-04, forward_time=0.107, loss_ctc=63.927, loss_att=49.180, acc=0.703, loss=53.604, backward_time=0.752, grad_norm=74.079, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.167e-05, train_time=2.001
+[gpua003:0/64] 2023-07-06 09:39:04,178 (trainer:732) INFO: 16epoch:train:8801-8900batch: iter_time=9.886e-05, forward_time=0.107, loss_ctc=71.856, loss_att=54.471, acc=0.690, loss=59.686, backward_time=0.751, grad_norm=84.227, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.164e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 09:40:44,106 (trainer:732) INFO: 16epoch:train:8901-9000batch: iter_time=9.990e-05, forward_time=0.108, loss_ctc=83.183, loss_att=60.219, acc=0.697, loss=67.108, backward_time=0.752, grad_norm=105.843, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.161e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 09:42:23,729 (trainer:732) INFO: 16epoch:train:9001-9100batch: iter_time=1.063e-04, forward_time=0.107, loss_ctc=73.879, loss_att=62.193, acc=0.675, loss=65.699, backward_time=0.750, grad_norm=87.852, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.158e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 09:43:30,780 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpua003:0/64] 2023-07-06 09:43:50,013 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 09:43:53,960 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8844af7850>)
+[gpua003:0/64] 2023-07-06 09:43:53,960 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpua003:0/64] 2023-07-06 09:43:53,967 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 09:47:02,373 (trainer:732) INFO: 16epoch:train:9101-9200batch: iter_time=1.335, forward_time=0.108, loss_ctc=73.193, loss_att=55.038, acc=0.695, loss=60.485, backward_time=0.764, grad_norm=100.991, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.155e-05, train_time=5.573
+[gpua003:0/64] 2023-07-06 09:48:42,805 (trainer:732) INFO: 16epoch:train:9201-9300batch: iter_time=1.082e-04, forward_time=0.108, loss_ctc=70.916, loss_att=49.501, acc=0.717, loss=55.926, backward_time=0.755, grad_norm=111.947, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.152e-05, train_time=2.008
+[gpua003:0/64] 2023-07-06 09:50:23,695 (trainer:732) INFO: 16epoch:train:9301-9400batch: iter_time=1.008e-04, forward_time=0.109, loss_ctc=65.130, loss_att=48.335, acc=0.700, loss=53.374, backward_time=0.753, grad_norm=76.035, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.148e-05, train_time=2.018
+[gpua003:0/64] 2023-07-06 09:52:03,613 (trainer:732) INFO: 16epoch:train:9401-9500batch: iter_time=1.046e-04, forward_time=0.108, loss_ctc=69.151, loss_att=50.398, acc=0.699, loss=56.024, backward_time=0.752, grad_norm=82.318, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.111, optim0_lr0=9.145e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 09:53:43,866 (trainer:732) INFO: 16epoch:train:9501-9600batch: iter_time=9.826e-05, forward_time=0.108, loss_ctc=64.096, loss_att=50.574, acc=0.700, loss=54.631, backward_time=0.752, grad_norm=76.082, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.142e-05, train_time=2.005
+[gpua003:0/64] 2023-07-06 09:55:23,809 (trainer:732) INFO: 16epoch:train:9601-9700batch: iter_time=1.055e-04, forward_time=0.107, loss_ctc=68.502, loss_att=50.633, acc=0.705, loss=55.994, backward_time=0.752, grad_norm=90.453, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.139e-05, train_time=1.999
+[gpua003:0/64] 2023-07-06 09:57:03,795 (trainer:732) INFO: 16epoch:train:9701-9800batch: iter_time=1.007e-04, forward_time=0.108, loss_ctc=83.554, loss_att=60.263, acc=0.706, loss=67.250, backward_time=0.752, grad_norm=121.058, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.136e-05, train_time=1.999
+[gpua003:0/64] 2023-07-06 09:58:43,540 (trainer:732) INFO: 16epoch:train:9801-9900batch: iter_time=1.058e-04, forward_time=0.107, loss_ctc=75.767, loss_att=61.630, acc=0.686, loss=65.871, backward_time=0.752, grad_norm=99.766, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.133e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 10:00:23,449 (trainer:732) INFO: 16epoch:train:9901-10000batch: iter_time=1.125e-04, forward_time=0.109, loss_ctc=73.492, loss_att=62.177, acc=0.702, loss=65.572, backward_time=0.752, grad_norm=87.104, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.130e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 10:13:27,455 (trainer:338) INFO: 16epoch results: [train] iter_time=0.188, forward_time=0.109, loss_ctc=72.648, loss_att=54.689, acc=0.694, loss=60.077, backward_time=0.755, grad_norm=91.310, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.286e-05, train_time=2.587, time=3 hours, 35 minutes and 51.24 seconds, total_count=130000, gpu_max_cached_mem_GB=37.775, [valid] loss_ctc=53.067, cer_ctc=0.290, loss_att=44.062, acc=0.664, cer=0.358, wer=0.991, loss=46.763, time=6 minutes and 19.52 seconds, total_count=13662, gpu_max_cached_mem_GB=37.775, [att_plot] time=6 minutes and 30.6 seconds, total_count=0, gpu_max_cached_mem_GB=37.775
+[gpua003:0/64] 2023-07-06 10:13:43,422 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count
+[gpua003:0/64] 2023-07-06 10:13:43,465 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/11epoch.pth
+[gpua003:0/64] 2023-07-06 10:13:43,465 (trainer:272) INFO: 17/100epoch started. Estimated time to finish: 1 week, 6 days and 10 hours
+[gpua003:0/64] 2023-07-06 10:13:43,501 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua003:0/64] 2023-07-06 10:14:02,307 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 10:14:05,803 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9558507e80>)
+[gpua003:0/64] 2023-07-06 10:14:05,804 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpua003:0/64] 2023-07-06 10:14:05,819 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 10:20:06,308 (trainer:732) INFO: 17epoch:train:1-100batch: iter_time=2.583, forward_time=0.213, loss_ctc=70.673, loss_att=58.961, acc=0.684, loss=62.475, backward_time=0.820, grad_norm=95.879, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.123, optim0_lr0=9.127e-05, train_time=7.656
+[gpua003:0/64] 2023-07-06 10:21:47,983 (trainer:732) INFO: 17epoch:train:101-200batch: iter_time=1.078e-04, forward_time=0.110, loss_ctc=72.857, loss_att=65.701, acc=0.683, loss=67.848, backward_time=0.753, grad_norm=87.783, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.124e-05, train_time=2.034
+[gpua003:0/64] 2023-07-06 10:23:43,718 (trainer:732) INFO: 17epoch:train:201-300batch: iter_time=3.269e-04, forward_time=0.202, loss_ctc=74.298, loss_att=62.216, acc=0.684, loss=65.841, backward_time=0.776, grad_norm=85.905, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.119, optim0_lr0=9.121e-05, train_time=2.314
+[gpua003:0/64] 2023-07-06 10:25:26,949 (trainer:732) INFO: 17epoch:train:301-400batch: iter_time=9.790e-05, forward_time=0.108, loss_ctc=72.162, loss_att=54.531, acc=0.685, loss=59.820, backward_time=0.756, grad_norm=86.861, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.118e-05, train_time=2.064
+[gpua003:0/64] 2023-07-06 10:27:13,540 (trainer:732) INFO: 17epoch:train:401-500batch: iter_time=1.134e-04, forward_time=0.120, loss_ctc=74.989, loss_att=52.743, acc=0.701, loss=59.416, backward_time=0.763, grad_norm=88.929, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.117, optim0_lr0=9.115e-05, train_time=2.132
+[gpua003:0/64] 2023-07-06 10:29:07,314 (trainer:732) INFO: 17epoch:train:501-600batch: iter_time=0.006, forward_time=0.165, loss_ctc=74.139, loss_att=57.626, acc=0.683, loss=62.580, backward_time=0.774, grad_norm=89.735, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.116, optim0_lr0=9.112e-05, train_time=2.275
+[gpua003:0/64] 2023-07-06 10:31:07,776 (trainer:732) INFO: 17epoch:train:601-700batch: iter_time=9.955e-05, forward_time=0.110, loss_ctc=75.682, loss_att=59.754, acc=0.684, loss=64.533, backward_time=0.796, grad_norm=95.065, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.109e-05, train_time=2.409
+[gpua003:0/64] 2023-07-06 10:33:08,425 (trainer:732) INFO: 17epoch:train:701-800batch: iter_time=0.001, forward_time=0.197, loss_ctc=73.468, loss_att=56.545, acc=0.693, loss=61.622, backward_time=0.799, grad_norm=89.781, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.117, optim0_lr0=9.106e-05, train_time=2.411
+[gpua003:0/64] 2023-07-06 10:33:52,355 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpua003:0/64] 2023-07-06 10:34:11,454 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 10:34:14,921 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f95640dcd90>)
+[gpua003:0/64] 2023-07-06 10:34:14,921 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpua003:0/64] 2023-07-06 10:34:14,928 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 10:38:42,065 (trainer:732) INFO: 17epoch:train:801-900batch: iter_time=1.652, forward_time=0.108, loss_ctc=78.264, loss_att=59.917, acc=0.689, loss=65.421, backward_time=0.771, grad_norm=105.575, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.103e-05, train_time=6.674
+[gpua003:0/64] 2023-07-06 10:40:21,942 (trainer:732) INFO: 17epoch:train:901-1000batch: iter_time=9.108e-05, forward_time=0.107, loss_ctc=68.668, loss_att=64.753, acc=0.669, loss=65.927, backward_time=0.750, grad_norm=89.386, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.100e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 10:42:01,601 (trainer:732) INFO: 17epoch:train:1001-1100batch: iter_time=9.405e-05, forward_time=0.107, loss_ctc=73.564, loss_att=60.516, acc=0.685, loss=64.430, backward_time=0.750, grad_norm=93.150, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.097e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 10:43:41,772 (trainer:732) INFO: 17epoch:train:1101-1200batch: iter_time=9.308e-05, forward_time=0.107, loss_ctc=70.639, loss_att=56.045, acc=0.686, loss=60.423, backward_time=0.751, grad_norm=87.677, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.094e-05, train_time=2.003
+[gpua003:0/64] 2023-07-06 10:45:21,649 (trainer:732) INFO: 17epoch:train:1201-1300batch: iter_time=9.487e-05, forward_time=0.107, loss_ctc=71.051, loss_att=55.501, acc=0.686, loss=60.166, backward_time=0.752, grad_norm=91.712, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.091e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 10:47:01,483 (trainer:732) INFO: 17epoch:train:1301-1400batch: iter_time=9.825e-05, forward_time=0.108, loss_ctc=78.942, loss_att=56.523, acc=0.684, loss=63.249, backward_time=0.752, grad_norm=90.368, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.088e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 10:48:41,812 (trainer:732) INFO: 17epoch:train:1401-1500batch: iter_time=1.015e-04, forward_time=0.114, loss_ctc=76.055, loss_att=56.659, acc=0.684, loss=62.478, backward_time=0.752, grad_norm=88.888, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.085e-05, train_time=2.006
+[gpua003:0/64] 2023-07-06 10:50:21,503 (trainer:732) INFO: 17epoch:train:1501-1600batch: iter_time=1.011e-04, forward_time=0.106, loss_ctc=72.092, loss_att=59.329, acc=0.681, loss=63.157, backward_time=0.751, grad_norm=83.857, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.082e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 10:51:28,539 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpua003:0/64] 2023-07-06 10:51:47,761 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 10:51:51,243 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f89e72b7d00>)
+[gpua003:0/64] 2023-07-06 10:51:51,244 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpua003:0/64] 2023-07-06 10:51:51,250 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 10:55:23,750 (trainer:732) INFO: 17epoch:train:1601-1700batch: iter_time=1.339, forward_time=0.136, loss_ctc=75.727, loss_att=56.568, acc=0.694, loss=62.316, backward_time=0.762, grad_norm=98.984, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.114, optim0_lr0=9.079e-05, train_time=6.043
+[gpua003:0/64] 2023-07-06 10:57:04,140 (trainer:732) INFO: 17epoch:train:1701-1800batch: iter_time=1.070e-04, forward_time=0.110, loss_ctc=65.293, loss_att=58.713, acc=0.683, loss=60.687, backward_time=0.755, grad_norm=89.446, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.076e-05, train_time=2.009
+[gpua003:0/64] 2023-07-06 10:58:44,084 (trainer:732) INFO: 17epoch:train:1801-1900batch: iter_time=1.046e-04, forward_time=0.108, loss_ctc=74.486, loss_att=67.076, acc=0.685, loss=69.299, backward_time=0.753, grad_norm=99.307, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.073e-05, train_time=1.999
+[gpua003:0/64] 2023-07-06 11:00:24,124 (trainer:732) INFO: 17epoch:train:1901-2000batch: iter_time=1.090e-04, forward_time=0.108, loss_ctc=74.341, loss_att=60.362, acc=0.692, loss=64.556, backward_time=0.753, grad_norm=87.026, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.070e-05, train_time=2.001
+[gpua003:0/64] 2023-07-06 11:02:04,044 (trainer:732) INFO: 17epoch:train:2001-2100batch: iter_time=1.014e-04, forward_time=0.108, loss_ctc=66.178, loss_att=49.767, acc=0.697, loss=54.690, backward_time=0.753, grad_norm=86.555, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=9.067e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 11:03:43,843 (trainer:732) INFO: 17epoch:train:2101-2200batch: iter_time=9.901e-05, forward_time=0.109, loss_ctc=77.231, loss_att=58.176, acc=0.697, loss=63.892, backward_time=0.752, grad_norm=93.954, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=9.064e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 11:05:23,555 (trainer:732) INFO: 17epoch:train:2201-2300batch: iter_time=9.750e-05, forward_time=0.108, loss_ctc=75.802, loss_att=57.441, acc=0.693, loss=62.949, backward_time=0.751, grad_norm=89.231, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=9.061e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 11:07:03,243 (trainer:732) INFO: 17epoch:train:2301-2400batch: iter_time=1.062e-04, forward_time=0.108, loss_ctc=70.913, loss_att=52.438, acc=0.689, loss=57.980, backward_time=0.751, grad_norm=110.095, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=9.058e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 11:09:01,745 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpua003:0/64] 2023-07-06 11:09:21,136 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 11:09:24,685 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f950d8afe50>)
+[gpua003:0/64] 2023-07-06 11:09:24,685 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpua003:0/64] 2023-07-06 11:09:24,692 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 11:12:11,094 (trainer:732) INFO: 17epoch:train:2401-2500batch: iter_time=1.565, forward_time=0.109, loss_ctc=75.267, loss_att=58.496, acc=0.696, loss=63.527, backward_time=0.755, grad_norm=104.336, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=9.055e-05, train_time=6.157
+[gpua003:0/64] 2023-07-06 11:13:53,368 (trainer:732) INFO: 17epoch:train:2501-2600batch: iter_time=1.102e-04, forward_time=0.108, loss_ctc=70.363, loss_att=57.552, acc=0.686, loss=61.395, backward_time=0.759, grad_norm=92.372, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.052e-05, train_time=2.045
+[gpua003:0/64] 2023-07-06 11:15:33,128 (trainer:732) INFO: 17epoch:train:2601-2700batch: iter_time=1.054e-04, forward_time=0.108, loss_ctc=71.407, loss_att=65.980, acc=0.682, loss=67.608, backward_time=0.750, grad_norm=88.759, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.049e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 11:17:13,072 (trainer:732) INFO: 17epoch:train:2701-2800batch: iter_time=1.124e-04, forward_time=0.108, loss_ctc=73.031, loss_att=60.033, acc=0.686, loss=63.933, backward_time=0.751, grad_norm=87.774, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.046e-05, train_time=1.999
+[gpua003:0/64] 2023-07-06 11:18:52,468 (trainer:732) INFO: 17epoch:train:2801-2900batch: iter_time=1.132e-04, forward_time=0.106, loss_ctc=70.091, loss_att=54.398, acc=0.683, loss=59.106, backward_time=0.749, grad_norm=91.603, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.043e-05, train_time=1.988
+[gpua003:0/64] 2023-07-06 11:20:32,129 (trainer:732) INFO: 17epoch:train:2901-3000batch: iter_time=1.165e-04, forward_time=0.108, loss_ctc=74.247, loss_att=52.240, acc=0.696, loss=58.842, backward_time=0.750, grad_norm=94.573, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.040e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 11:22:11,683 (trainer:732) INFO: 17epoch:train:3001-3100batch: iter_time=1.201e-04, forward_time=0.107, loss_ctc=73.832, loss_att=58.188, acc=0.682, loss=62.881, backward_time=0.749, grad_norm=91.783, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.037e-05, train_time=1.991
+[gpua003:0/64] 2023-07-06 11:23:51,407 (trainer:732) INFO: 17epoch:train:3101-3200batch: iter_time=1.178e-04, forward_time=0.108, loss_ctc=72.306, loss_att=57.425, acc=0.683, loss=61.889, backward_time=0.751, grad_norm=96.037, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.034e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 11:25:30,906 (trainer:732) INFO: 17epoch:train:3201-3300batch: iter_time=1.122e-04, forward_time=0.107, loss_ctc=73.002, loss_att=56.253, acc=0.694, loss=61.278, backward_time=0.750, grad_norm=94.698, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.031e-05, train_time=1.990
+[gpua003:0/64] 2023-07-06 11:26:04,198 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpua003:0/64] 2023-07-06 11:26:23,282 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 11:26:26,814 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9521c796c0>)
+[gpua003:0/64] 2023-07-06 11:26:26,814 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpua003:0/64] 2023-07-06 11:26:26,821 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 11:32:52,064 (trainer:732) INFO: 17epoch:train:3301-3400batch: iter_time=1.297, forward_time=0.107, loss_ctc=74.660, loss_att=57.798, acc=0.692, loss=62.857, backward_time=0.766, grad_norm=124.515, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.028e-05, train_time=8.823
+[gpua003:0/64] 2023-07-06 11:34:32,228 (trainer:732) INFO: 17epoch:train:3401-3500batch: iter_time=1.056e-04, forward_time=0.108, loss_ctc=67.842, loss_att=63.769, acc=0.687, loss=64.991, backward_time=0.753, grad_norm=85.874, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.025e-05, train_time=2.003
+[gpua003:0/64] 2023-07-06 11:36:12,063 (trainer:732) INFO: 17epoch:train:3501-3600batch: iter_time=9.523e-05, forward_time=0.108, loss_ctc=71.756, loss_att=59.431, acc=0.693, loss=63.129, backward_time=0.752, grad_norm=81.853, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=9.022e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 11:37:52,081 (trainer:732) INFO: 17epoch:train:3601-3700batch: iter_time=8.824e-05, forward_time=0.108, loss_ctc=71.855, loss_att=55.535, acc=0.697, loss=60.431, backward_time=0.752, grad_norm=82.300, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=9.020e-05, train_time=2.000
+[gpua003:0/64] 2023-07-06 11:39:37,163 (trainer:732) INFO: 17epoch:train:3701-3800batch: iter_time=9.169e-05, forward_time=0.108, loss_ctc=69.667, loss_att=54.813, acc=0.695, loss=59.269, backward_time=0.759, grad_norm=90.155, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.017e-05, train_time=2.101
+[gpua003:0/64] 2023-07-06 11:41:16,991 (trainer:732) INFO: 17epoch:train:3801-3900batch: iter_time=9.824e-05, forward_time=0.107, loss_ctc=77.697, loss_att=55.817, acc=0.700, loss=62.381, backward_time=0.751, grad_norm=89.894, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.014e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 11:42:56,825 (trainer:732) INFO: 17epoch:train:3901-4000batch: iter_time=9.875e-05, forward_time=0.107, loss_ctc=73.227, loss_att=54.909, acc=0.698, loss=60.404, backward_time=0.751, grad_norm=98.795, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.011e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 11:44:38,362 (trainer:732) INFO: 17epoch:train:4001-4100batch: iter_time=1.006e-04, forward_time=0.107, loss_ctc=72.543, loss_att=58.175, acc=0.689, loss=62.485, backward_time=0.755, grad_norm=89.796, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.008e-05, train_time=2.031
+[gpua003:0/64] 2023-07-06 11:45:44,663 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpua003:0/64] 2023-07-06 11:46:03,824 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 11:46:07,363 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9520e1be20>)
+[gpua003:0/64] 2023-07-06 11:46:07,363 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpua003:0/64] 2023-07-06 11:46:07,369 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 11:51:11,190 (trainer:732) INFO: 17epoch:train:4101-4200batch: iter_time=1.302, forward_time=0.107, loss_ctc=75.093, loss_att=55.212, acc=0.696, loss=61.176, backward_time=0.773, grad_norm=88.273, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.005e-05, train_time=7.856
+[gpua003:0/64] 2023-07-06 11:52:51,935 (trainer:732) INFO: 17epoch:train:4201-4300batch: iter_time=9.852e-05, forward_time=0.108, loss_ctc=66.394, loss_att=59.086, acc=0.681, loss=61.279, backward_time=0.754, grad_norm=84.241, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.002e-05, train_time=2.015
+[gpua003:0/64] 2023-07-06 11:54:31,640 (trainer:732) INFO: 17epoch:train:4301-4400batch: iter_time=9.413e-05, forward_time=0.107, loss_ctc=71.953, loss_att=65.497, acc=0.672, loss=67.434, backward_time=0.751, grad_norm=102.244, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=8.999e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 11:56:11,417 (trainer:732) INFO: 17epoch:train:4401-4500batch: iter_time=9.684e-05, forward_time=0.107, loss_ctc=72.922, loss_att=58.511, acc=0.684, loss=62.834, backward_time=0.751, grad_norm=97.141, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=8.996e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 11:57:51,231 (trainer:732) INFO: 17epoch:train:4501-4600batch: iter_time=1.040e-04, forward_time=0.107, loss_ctc=70.213, loss_att=52.171, acc=0.696, loss=57.583, backward_time=0.751, grad_norm=81.295, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=8.993e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 11:59:30,948 (trainer:732) INFO: 17epoch:train:4601-4700batch: iter_time=1.024e-04, forward_time=0.107, loss_ctc=80.215, loss_att=60.929, acc=0.678, loss=66.715, backward_time=0.751, grad_norm=101.501, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=8.990e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 12:01:10,656 (trainer:732) INFO: 17epoch:train:4701-4800batch: iter_time=1.032e-04, forward_time=0.107, loss_ctc=72.860, loss_att=54.463, acc=0.685, loss=59.982, backward_time=0.750, grad_norm=86.781, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=8.987e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 12:02:50,263 (trainer:732) INFO: 17epoch:train:4801-4900batch: iter_time=1.006e-04, forward_time=0.107, loss_ctc=72.023, loss_att=55.370, acc=0.691, loss=60.366, backward_time=0.751, grad_norm=80.330, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=8.985e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 12:04:30,172 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpua003:0/64] 2023-07-06 12:04:49,660 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 12:04:53,209 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f952b8134f0>)
+[gpua003:0/64] 2023-07-06 12:04:53,209 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpua003:0/64] 2023-07-06 12:04:53,232 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 12:09:03,264 (trainer:732) INFO: 17epoch:train:4901-5000batch: iter_time=2.218, forward_time=0.107, loss_ctc=69.690, loss_att=56.365, acc=0.679, loss=60.363, backward_time=0.760, grad_norm=86.852, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=8.982e-05, train_time=7.460
+[gpua003:0/64] 2023-07-06 12:10:45,243 (trainer:732) INFO: 17epoch:train:5001-5100batch: iter_time=9.156e-05, forward_time=0.108, loss_ctc=69.222, loss_att=56.473, acc=0.696, loss=60.298, backward_time=0.760, grad_norm=92.071, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=8.979e-05, train_time=2.039
+[gpua003:0/64] 2023-07-06 12:12:25,764 (trainer:732) INFO: 17epoch:train:5101-5200batch: iter_time=1.022e-04, forward_time=0.108, loss_ctc=70.202, loss_att=64.051, acc=0.686, loss=65.896, backward_time=0.754, grad_norm=83.538, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=8.976e-05, train_time=2.010
+[gpua003:0/64] 2023-07-06 12:14:28,497 (trainer:732) INFO: 17epoch:train:5201-5300batch: iter_time=1.115e-04, forward_time=0.108, loss_ctc=72.332, loss_att=60.244, acc=0.687, loss=63.870, backward_time=0.813, grad_norm=91.528, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=8.973e-05, train_time=2.454
+[gpua003:0/64] 2023-07-06 12:16:08,436 (trainer:732) INFO: 17epoch:train:5301-5400batch: iter_time=8.814e-05, forward_time=0.109, loss_ctc=69.588, loss_att=52.565, acc=0.687, loss=57.672, backward_time=0.753, grad_norm=77.684, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.114, optim0_lr0=8.970e-05, train_time=1.999
+[gpua003:0/64] 2023-07-06 12:17:52,763 (trainer:732) INFO: 17epoch:train:5401-5500batch: iter_time=8.935e-05, forward_time=0.109, loss_ctc=73.912, loss_att=52.422, acc=0.696, loss=58.869, backward_time=0.776, grad_norm=84.516, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.114, optim0_lr0=8.967e-05, train_time=2.086
+[gpua003:0/64] 2023-07-06 12:19:32,754 (trainer:732) INFO: 17epoch:train:5501-5600batch: iter_time=8.392e-05, forward_time=0.108, loss_ctc=73.547, loss_att=57.855, acc=0.682, loss=62.562, backward_time=0.753, grad_norm=96.965, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.114, optim0_lr0=8.964e-05, train_time=2.000
+[gpua003:0/64] 2023-07-06 12:21:12,375 (trainer:732) INFO: 17epoch:train:5601-5700batch: iter_time=1.111e-04, forward_time=0.108, loss_ctc=71.195, loss_att=56.412, acc=0.687, loss=60.847, backward_time=0.751, grad_norm=116.013, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=8.961e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 12:22:52,067 (trainer:732) INFO: 17epoch:train:5701-5800batch: iter_time=8.850e-05, forward_time=0.108, loss_ctc=71.813, loss_att=55.666, acc=0.695, loss=60.510, backward_time=0.751, grad_norm=115.475, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=8.959e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 12:23:25,320 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpua003:0/64] 2023-07-06 12:23:44,500 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 12:23:48,062 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f952a62b5b0>)
+[gpua003:0/64] 2023-07-06 12:23:48,062 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpua003:0/64] 2023-07-06 12:23:48,068 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 12:29:41,148 (trainer:732) INFO: 17epoch:train:5801-5900batch: iter_time=1.954, forward_time=0.110, loss_ctc=75.608, loss_att=57.604, acc=0.698, loss=63.005, backward_time=0.765, grad_norm=95.989, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.114, optim0_lr0=8.956e-05, train_time=8.181
+[gpua003:0/64] 2023-07-06 12:31:32,248 (trainer:732) INFO: 17epoch:train:5901-6000batch: iter_time=1.021e-04, forward_time=0.110, loss_ctc=65.942, loss_att=62.877, acc=0.691, loss=63.796, backward_time=0.780, grad_norm=90.009, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.114, optim0_lr0=8.953e-05, train_time=2.222
+[gpua003:0/64] 2023-07-06 12:33:12,332 (trainer:732) INFO: 17epoch:train:6001-6100batch: iter_time=1.069e-04, forward_time=0.110, loss_ctc=71.278, loss_att=58.934, acc=0.693, loss=62.637, backward_time=0.753, grad_norm=83.756, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.950e-05, train_time=2.001
+[gpua003:0/64] 2023-07-06 12:34:52,381 (trainer:732) INFO: 17epoch:train:6101-6200batch: iter_time=1.058e-04, forward_time=0.110, loss_ctc=70.544, loss_att=53.727, acc=0.704, loss=58.772, backward_time=0.753, grad_norm=79.862, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.947e-05, train_time=2.001
+[gpua003:0/64] 2023-07-06 12:36:34,193 (trainer:732) INFO: 17epoch:train:6201-6300batch: iter_time=1.036e-04, forward_time=0.110, loss_ctc=69.284, loss_att=54.871, acc=0.698, loss=59.195, backward_time=0.759, grad_norm=80.439, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.944e-05, train_time=2.036
+[gpua003:0/64] 2023-07-06 12:38:16,533 (trainer:732) INFO: 17epoch:train:6301-6400batch: iter_time=1.054e-04, forward_time=0.110, loss_ctc=76.122, loss_att=54.596, acc=0.703, loss=61.054, backward_time=0.756, grad_norm=92.990, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.941e-05, train_time=2.047
+[gpua003:0/64] 2023-07-06 12:40:03,767 (trainer:732) INFO: 17epoch:train:6401-6500batch: iter_time=1.067e-04, forward_time=0.109, loss_ctc=72.416, loss_att=54.767, acc=0.698, loss=60.062, backward_time=0.760, grad_norm=82.798, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.938e-05, train_time=2.144
+[gpua003:0/64] 2023-07-06 12:41:43,845 (trainer:732) INFO: 17epoch:train:6501-6600batch: iter_time=1.023e-04, forward_time=0.109, loss_ctc=69.309, loss_att=56.242, acc=0.693, loss=60.162, backward_time=0.752, grad_norm=84.956, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.936e-05, train_time=2.001
+[gpua003:0/64] 2023-07-06 12:42:53,074 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpua003:0/64] 2023-07-06 12:43:12,502 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 12:43:16,051 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8aa662f970>)
+[gpua003:0/64] 2023-07-06 12:43:16,051 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpua003:0/64] 2023-07-06 12:43:16,058 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 12:48:47,376 (trainer:732) INFO: 17epoch:train:6601-6700batch: iter_time=1.326, forward_time=0.109, loss_ctc=73.679, loss_att=54.146, acc=0.702, loss=60.006, backward_time=0.774, grad_norm=87.071, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.933e-05, train_time=8.470
+[gpua003:0/64] 2023-07-06 12:50:28,471 (trainer:732) INFO: 17epoch:train:6701-6800batch: iter_time=9.504e-05, forward_time=0.109, loss_ctc=66.009, loss_att=56.529, acc=0.690, loss=59.373, backward_time=0.754, grad_norm=86.611, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.930e-05, train_time=2.022
+[gpua003:0/64] 2023-07-06 12:52:08,406 (trainer:732) INFO: 17epoch:train:6801-6900batch: iter_time=9.435e-05, forward_time=0.109, loss_ctc=71.563, loss_att=64.197, acc=0.675, loss=66.407, backward_time=0.752, grad_norm=91.386, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.927e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 12:53:48,230 (trainer:732) INFO: 17epoch:train:6901-7000batch: iter_time=1.083e-04, forward_time=0.108, loss_ctc=70.739, loss_att=56.404, acc=0.690, loss=60.704, backward_time=0.752, grad_norm=77.527, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.924e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 12:55:28,052 (trainer:732) INFO: 17epoch:train:7001-7100batch: iter_time=1.054e-04, forward_time=0.109, loss_ctc=67.590, loss_att=51.543, acc=0.698, loss=56.357, backward_time=0.752, grad_norm=86.798, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.921e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 12:57:07,643 (trainer:732) INFO: 17epoch:train:7101-7200batch: iter_time=1.132e-04, forward_time=0.108, loss_ctc=78.963, loss_att=61.659, acc=0.674, loss=66.850, backward_time=0.751, grad_norm=88.590, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.919e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 12:58:47,336 (trainer:732) INFO: 17epoch:train:7201-7300batch: iter_time=1.007e-04, forward_time=0.109, loss_ctc=71.829, loss_att=53.054, acc=0.688, loss=58.686, backward_time=0.752, grad_norm=99.178, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.916e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 13:00:27,154 (trainer:732) INFO: 17epoch:train:7301-7400batch: iter_time=9.624e-05, forward_time=0.110, loss_ctc=70.807, loss_att=54.983, acc=0.694, loss=59.730, backward_time=0.752, grad_norm=96.232, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.913e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 13:02:07,410 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpua003:0/64] 2023-07-06 13:02:26,581 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 13:02:30,447 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8ff3c339d0>)
+[gpua003:0/64] 2023-07-06 13:02:30,447 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpua003:0/64] 2023-07-06 13:02:30,453 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 13:06:49,822 (trainer:732) INFO: 17epoch:train:7401-7500batch: iter_time=1.317, forward_time=0.107, loss_ctc=69.012, loss_att=56.036, acc=0.683, loss=59.929, backward_time=0.766, grad_norm=92.081, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.910e-05, train_time=7.653
+[gpua003:0/64] 2023-07-06 13:08:33,082 (trainer:732) INFO: 17epoch:train:7501-7600batch: iter_time=1.091e-04, forward_time=0.109, loss_ctc=70.374, loss_att=56.800, acc=0.695, loss=60.872, backward_time=0.760, grad_norm=93.389, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.907e-05, train_time=2.065
+[gpua003:0/64] 2023-07-06 13:10:13,369 (trainer:732) INFO: 17epoch:train:7601-7700batch: iter_time=1.035e-04, forward_time=0.109, loss_ctc=71.632, loss_att=65.224, acc=0.684, loss=67.146, backward_time=0.753, grad_norm=93.615, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.904e-05, train_time=2.006
+[gpua003:0/64] 2023-07-06 13:11:53,366 (trainer:732) INFO: 17epoch:train:7701-7800batch: iter_time=1.042e-04, forward_time=0.108, loss_ctc=71.948, loss_att=59.571, acc=0.687, loss=63.284, backward_time=0.753, grad_norm=89.530, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.902e-05, train_time=2.000
+[gpua003:0/64] 2023-07-06 13:13:33,022 (trainer:732) INFO: 17epoch:train:7801-7900batch: iter_time=1.008e-04, forward_time=0.106, loss_ctc=71.162, loss_att=53.846, acc=0.687, loss=59.041, backward_time=0.751, grad_norm=93.637, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.899e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 13:15:12,530 (trainer:732) INFO: 17epoch:train:7901-8000batch: iter_time=1.035e-04, forward_time=0.106, loss_ctc=73.768, loss_att=51.961, acc=0.697, loss=58.503, backward_time=0.750, grad_norm=93.453, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.896e-05, train_time=1.990
+[gpua003:0/64] 2023-07-06 13:16:52,321 (trainer:732) INFO: 17epoch:train:8001-8100batch: iter_time=1.051e-04, forward_time=0.107, loss_ctc=72.484, loss_att=57.703, acc=0.684, loss=62.137, backward_time=0.751, grad_norm=92.203, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.893e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 13:18:31,917 (trainer:732) INFO: 17epoch:train:8101-8200batch: iter_time=1.031e-04, forward_time=0.106, loss_ctc=69.942, loss_att=55.765, acc=0.690, loss=60.018, backward_time=0.751, grad_norm=85.911, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.890e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 13:20:16,333 (trainer:732) INFO: 17epoch:train:8201-8300batch: iter_time=1.102e-04, forward_time=0.113, loss_ctc=69.944, loss_att=53.888, acc=0.702, loss=58.705, backward_time=0.762, grad_norm=93.148, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.887e-05, train_time=2.088
+[gpua003:0/64] 2023-07-06 13:20:50,086 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpua003:0/64] 2023-07-06 13:21:09,066 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 13:21:12,570 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8f1a9de740>)
+[gpua003:0/64] 2023-07-06 13:21:12,570 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpua003:0/64] 2023-07-06 13:21:12,576 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 13:25:44,808 (trainer:732) INFO: 17epoch:train:8301-8400batch: iter_time=1.287, forward_time=0.108, loss_ctc=72.637, loss_att=55.648, acc=0.694, loss=60.745, backward_time=0.776, grad_norm=93.234, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.885e-05, train_time=6.569
+[gpua003:0/64] 2023-07-06 13:27:26,151 (trainer:732) INFO: 17epoch:train:8401-8500batch: iter_time=9.020e-05, forward_time=0.108, loss_ctc=71.200, loss_att=66.006, acc=0.691, loss=67.564, backward_time=0.752, grad_norm=84.887, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.882e-05, train_time=2.027
+[gpua003:0/64] 2023-07-06 13:29:06,443 (trainer:732) INFO: 17epoch:train:8501-8600batch: iter_time=9.487e-05, forward_time=0.108, loss_ctc=69.241, loss_att=57.750, acc=0.691, loss=61.197, backward_time=0.752, grad_norm=80.923, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.879e-05, train_time=2.006
+[gpua003:0/64] 2023-07-06 13:30:46,540 (trainer:732) INFO: 17epoch:train:8601-8700batch: iter_time=9.387e-05, forward_time=0.108, loss_ctc=71.052, loss_att=54.608, acc=0.696, loss=59.541, backward_time=0.752, grad_norm=81.533, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.876e-05, train_time=2.002
+[gpua003:0/64] 2023-07-06 13:32:26,237 (trainer:732) INFO: 17epoch:train:8701-8800batch: iter_time=8.946e-05, forward_time=0.107, loss_ctc=68.519, loss_att=53.218, acc=0.703, loss=57.809, backward_time=0.751, grad_norm=87.296, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.873e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 13:34:06,088 (trainer:732) INFO: 17epoch:train:8801-8900batch: iter_time=9.110e-05, forward_time=0.108, loss_ctc=75.321, loss_att=56.064, acc=0.703, loss=61.841, backward_time=0.752, grad_norm=83.253, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.871e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 13:35:45,773 (trainer:732) INFO: 17epoch:train:8901-9000batch: iter_time=8.413e-05, forward_time=0.107, loss_ctc=71.840, loss_att=52.405, acc=0.690, loss=58.236, backward_time=0.751, grad_norm=98.759, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.868e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 13:37:25,621 (trainer:732) INFO: 17epoch:train:9001-9100batch: iter_time=8.465e-05, forward_time=0.107, loss_ctc=67.828, loss_att=57.185, acc=0.695, loss=60.378, backward_time=0.753, grad_norm=87.054, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.865e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 13:38:32,627 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpua003:0/64] 2023-07-06 13:38:52,045 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 13:38:55,588 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f952b246f80>)
+[gpua003:0/64] 2023-07-06 13:38:55,588 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpua003:0/64] 2023-07-06 13:38:55,594 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 13:43:25,159 (trainer:732) INFO: 17epoch:train:9101-9200batch: iter_time=1.294, forward_time=0.108, loss_ctc=73.928, loss_att=56.389, acc=0.694, loss=61.651, backward_time=0.764, grad_norm=128.846, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.862e-05, train_time=7.191
+[gpua003:0/64] 2023-07-06 13:45:08,132 (trainer:732) INFO: 17epoch:train:9201-9300batch: iter_time=9.071e-05, forward_time=0.106, loss_ctc=66.150, loss_att=58.093, acc=0.695, loss=60.510, backward_time=0.763, grad_norm=85.331, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.860e-05, train_time=2.059
+[gpua003:0/64] 2023-07-06 13:46:48,580 (trainer:732) INFO: 17epoch:train:9301-9400batch: iter_time=9.128e-05, forward_time=0.106, loss_ctc=69.745, loss_att=63.280, acc=0.688, loss=65.219, backward_time=0.751, grad_norm=101.929, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.857e-05, train_time=2.009
+[gpua003:0/64] 2023-07-06 13:48:37,657 (trainer:732) INFO: 17epoch:train:9401-9500batch: iter_time=8.943e-05, forward_time=0.106, loss_ctc=70.169, loss_att=55.042, acc=0.701, loss=59.580, backward_time=0.766, grad_norm=83.719, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.854e-05, train_time=2.181
+[gpua003:0/64] 2023-07-06 13:50:17,323 (trainer:732) INFO: 17epoch:train:9501-9600batch: iter_time=9.534e-05, forward_time=0.106, loss_ctc=68.339, loss_att=52.207, acc=0.704, loss=57.047, backward_time=0.751, grad_norm=77.700, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.851e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 13:51:57,215 (trainer:732) INFO: 17epoch:train:9601-9700batch: iter_time=9.239e-05, forward_time=0.106, loss_ctc=78.747, loss_att=60.575, acc=0.695, loss=66.026, backward_time=0.751, grad_norm=98.273, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.848e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 13:53:36,907 (trainer:732) INFO: 17epoch:train:9701-9800batch: iter_time=9.160e-05, forward_time=0.107, loss_ctc=71.624, loss_att=54.061, acc=0.696, loss=59.330, backward_time=0.750, grad_norm=94.224, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.846e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 13:55:16,555 (trainer:732) INFO: 17epoch:train:9801-9900batch: iter_time=8.975e-05, forward_time=0.106, loss_ctc=70.198, loss_att=52.936, acc=0.705, loss=58.115, backward_time=0.751, grad_norm=84.449, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.843e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 13:56:56,201 (trainer:732) INFO: 17epoch:train:9901-10000batch: iter_time=8.856e-05, forward_time=0.107, loss_ctc=71.418, loss_att=58.211, acc=0.687, loss=62.173, backward_time=0.750, grad_norm=99.355, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.840e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 14:09:08,370 (trainer:338) INFO: 17epoch results: [train] iter_time=0.192, forward_time=0.112, loss_ctc=72.034, loss_att=57.326, acc=0.690, loss=61.738, backward_time=0.758, grad_norm=91.386, clip=100.000, loss_scale=2.342e+16, optim_step_time=0.113, optim0_lr0=8.981e-05, train_time=2.678, time=3 hours, 43 minutes and 23.37 seconds, total_count=140000, gpu_max_cached_mem_GB=37.779, [valid] loss_ctc=58.336, cer_ctc=0.306, loss_att=49.239, acc=0.657, cer=0.353, wer=0.988, loss=51.968, time=5 minutes and 55.68 seconds, total_count=14674, gpu_max_cached_mem_GB=37.779, [att_plot] time=6 minutes and 5.85 seconds, total_count=0, gpu_max_cached_mem_GB=37.779
+[gpua003:0/64] 2023-07-06 14:09:27,436 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpua003:0/64] 2023-07-06 14:09:27,626 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/12epoch.pth
+[gpua003:0/64] 2023-07-06 14:09:27,714 (trainer:272) INFO: 18/100epoch started. Estimated time to finish: 1 week, 6 days and 8 hours
+[gpua003:0/64] 2023-07-06 14:09:29,063 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua003:0/64] 2023-07-06 14:09:48,025 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 14:09:53,240 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f97b4d36f80>)
+[gpua003:0/64] 2023-07-06 14:09:53,240 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpua003:0/64] 2023-07-06 14:09:53,338 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 14:19:30,835 (trainer:732) INFO: 18epoch:train:1-100batch: iter_time=4.929, forward_time=0.153, loss_ctc=76.115, loss_att=63.275, acc=0.682, loss=67.127, backward_time=0.773, grad_norm=114.970, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.116, optim0_lr0=8.837e-05, train_time=12.048
+[gpua003:0/64] 2023-07-06 14:21:10,892 (trainer:732) INFO: 18epoch:train:101-200batch: iter_time=1.040e-04, forward_time=0.108, loss_ctc=78.407, loss_att=64.441, acc=0.685, loss=68.631, backward_time=0.752, grad_norm=128.032, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.835e-05, train_time=2.001
+[gpua003:0/64] 2023-07-06 14:22:53,009 (trainer:732) INFO: 18epoch:train:201-300batch: iter_time=9.913e-05, forward_time=0.108, loss_ctc=66.363, loss_att=49.349, acc=0.706, loss=54.453, backward_time=0.751, grad_norm=82.992, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.832e-05, train_time=2.042
+[gpua003:0/64] 2023-07-06 14:24:32,622 (trainer:732) INFO: 18epoch:train:301-400batch: iter_time=8.736e-05, forward_time=0.107, loss_ctc=85.395, loss_att=58.982, acc=0.692, loss=66.906, backward_time=0.752, grad_norm=105.306, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.829e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 14:26:14,069 (trainer:732) INFO: 18epoch:train:401-500batch: iter_time=8.796e-05, forward_time=0.107, loss_ctc=74.067, loss_att=58.678, acc=0.674, loss=63.295, backward_time=0.752, grad_norm=98.618, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.826e-05, train_time=2.029
+[gpua003:0/64] 2023-07-06 14:27:54,124 (trainer:732) INFO: 18epoch:train:501-600batch: iter_time=9.281e-05, forward_time=0.107, loss_ctc=85.710, loss_att=69.303, acc=0.665, loss=74.225, backward_time=0.753, grad_norm=104.959, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.824e-05, train_time=2.001
+[gpua003:0/64] 2023-07-06 14:29:33,936 (trainer:732) INFO: 18epoch:train:601-700batch: iter_time=8.948e-05, forward_time=0.107, loss_ctc=68.980, loss_att=52.042, acc=0.680, loss=57.123, backward_time=0.752, grad_norm=124.515, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.821e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 14:31:21,706 (trainer:732) INFO: 18epoch:train:701-800batch: iter_time=3.960e-04, forward_time=0.142, loss_ctc=86.717, loss_att=65.821, acc=0.661, loss=72.090, backward_time=0.767, grad_norm=105.973, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.818e-05, train_time=2.152
+[gpua003:0/64] 2023-07-06 14:32:09,857 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpua003:0/64] 2023-07-06 14:32:28,641 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 14:32:32,360 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f89e86d3b20>)
+[gpua003:0/64] 2023-07-06 14:32:32,360 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpua003:0/64] 2023-07-06 14:32:32,366 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 14:36:07,379 (trainer:732) INFO: 18epoch:train:801-900batch: iter_time=1.622, forward_time=0.152, loss_ctc=74.970, loss_att=58.171, acc=0.683, loss=63.211, backward_time=0.778, grad_norm=89.744, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.115, optim0_lr0=8.815e-05, train_time=5.717
+[gpua003:0/64] 2023-07-06 14:37:47,747 (trainer:732) INFO: 18epoch:train:901-1000batch: iter_time=9.918e-05, forward_time=0.108, loss_ctc=75.573, loss_att=63.432, acc=0.686, loss=67.074, backward_time=0.752, grad_norm=104.391, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.813e-05, train_time=2.007
+[gpua003:0/64] 2023-07-06 14:39:27,570 (trainer:732) INFO: 18epoch:train:1001-1100batch: iter_time=9.562e-05, forward_time=0.108, loss_ctc=71.935, loss_att=56.850, acc=0.696, loss=61.375, backward_time=0.751, grad_norm=93.037, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.810e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 14:41:07,498 (trainer:732) INFO: 18epoch:train:1101-1200batch: iter_time=1.016e-04, forward_time=0.109, loss_ctc=73.347, loss_att=52.973, acc=0.703, loss=59.086, backward_time=0.751, grad_norm=99.082, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.807e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 14:42:47,249 (trainer:732) INFO: 18epoch:train:1201-1300batch: iter_time=9.793e-05, forward_time=0.108, loss_ctc=83.048, loss_att=61.707, acc=0.686, loss=68.110, backward_time=0.750, grad_norm=102.821, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.804e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 14:44:27,022 (trainer:732) INFO: 18epoch:train:1301-1400batch: iter_time=1.028e-04, forward_time=0.108, loss_ctc=78.082, loss_att=59.145, acc=0.672, loss=64.826, backward_time=0.751, grad_norm=120.118, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.802e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 14:46:06,792 (trainer:732) INFO: 18epoch:train:1401-1500batch: iter_time=1.063e-04, forward_time=0.109, loss_ctc=77.767, loss_att=61.606, acc=0.674, loss=66.454, backward_time=0.752, grad_norm=122.959, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.799e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 14:47:46,697 (trainer:732) INFO: 18epoch:train:1501-1600batch: iter_time=9.485e-05, forward_time=0.110, loss_ctc=73.758, loss_att=56.709, acc=0.684, loss=61.824, backward_time=0.753, grad_norm=95.013, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.796e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 14:48:53,758 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpua003:0/64] 2023-07-06 14:49:13,084 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 14:49:16,877 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9521e44ca0>)
+[gpua003:0/64] 2023-07-06 14:49:16,877 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpua003:0/64] 2023-07-06 14:49:16,883 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 14:53:39,624 (trainer:732) INFO: 18epoch:train:1601-1700batch: iter_time=1.292, forward_time=0.109, loss_ctc=77.398, loss_att=58.944, acc=0.666, loss=64.480, backward_time=0.761, grad_norm=104.361, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.794e-05, train_time=7.058
+[gpua003:0/64] 2023-07-06 14:55:19,937 (trainer:732) INFO: 18epoch:train:1701-1800batch: iter_time=1.040e-04, forward_time=0.108, loss_ctc=72.292, loss_att=58.658, acc=0.692, loss=62.748, backward_time=0.754, grad_norm=90.979, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.791e-05, train_time=2.006
+[gpua003:0/64] 2023-07-06 14:57:00,068 (trainer:732) INFO: 18epoch:train:1801-1900batch: iter_time=9.482e-05, forward_time=0.108, loss_ctc=75.435, loss_att=59.110, acc=0.697, loss=64.008, backward_time=0.753, grad_norm=94.982, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.788e-05, train_time=2.002
+[gpua003:0/64] 2023-07-06 14:58:41,886 (trainer:732) INFO: 18epoch:train:1901-2000batch: iter_time=8.814e-05, forward_time=0.108, loss_ctc=66.463, loss_att=50.107, acc=0.714, loss=55.014, backward_time=0.753, grad_norm=94.405, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.785e-05, train_time=2.036
+[gpua003:0/64] 2023-07-06 15:00:21,511 (trainer:732) INFO: 18epoch:train:2001-2100batch: iter_time=8.940e-05, forward_time=0.108, loss_ctc=87.948, loss_att=64.112, acc=0.676, loss=71.263, backward_time=0.751, grad_norm=117.699, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.783e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 15:02:01,222 (trainer:732) INFO: 18epoch:train:2101-2200batch: iter_time=8.983e-05, forward_time=0.108, loss_ctc=78.447, loss_att=60.484, acc=0.681, loss=65.873, backward_time=0.752, grad_norm=100.916, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.780e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 15:03:48,764 (trainer:732) INFO: 18epoch:train:2201-2300batch: iter_time=9.119e-05, forward_time=0.107, loss_ctc=79.251, loss_att=62.785, acc=0.668, loss=67.725, backward_time=0.760, grad_norm=93.330, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.114, optim0_lr0=8.777e-05, train_time=2.151
+[gpua003:0/64] 2023-07-06 15:05:58,569 (trainer:732) INFO: 18epoch:train:2301-2400batch: iter_time=8.569e-05, forward_time=0.108, loss_ctc=66.068, loss_att=48.122, acc=0.690, loss=53.506, backward_time=0.810, grad_norm=98.868, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.775e-05, train_time=2.596
+[gpua003:0/64] 2023-07-06 15:08:21,871 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpua003:0/64] 2023-07-06 15:08:41,122 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 15:08:44,919 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f89e86f0430>)
+[gpua003:0/64] 2023-07-06 15:08:44,919 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpua003:0/64] 2023-07-06 15:08:44,925 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 15:11:51,642 (trainer:732) INFO: 18epoch:train:2401-2500batch: iter_time=1.318, forward_time=0.127, loss_ctc=84.958, loss_att=62.041, acc=0.667, loss=68.916, backward_time=0.850, grad_norm=104.647, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.114, optim0_lr0=8.772e-05, train_time=7.061
+[gpua003:0/64] 2023-07-06 15:13:33,609 (trainer:732) INFO: 18epoch:train:2501-2600batch: iter_time=1.008e-04, forward_time=0.108, loss_ctc=75.155, loss_att=62.287, acc=0.691, loss=66.148, backward_time=0.764, grad_norm=96.943, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.769e-05, train_time=2.039
+[gpua003:0/64] 2023-07-06 15:15:13,429 (trainer:732) INFO: 18epoch:train:2601-2700batch: iter_time=1.057e-04, forward_time=0.108, loss_ctc=76.932, loss_att=63.507, acc=0.694, loss=67.535, backward_time=0.750, grad_norm=96.917, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.766e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 15:16:53,331 (trainer:732) INFO: 18epoch:train:2701-2800batch: iter_time=1.094e-04, forward_time=0.108, loss_ctc=62.411, loss_att=46.895, acc=0.715, loss=51.550, backward_time=0.752, grad_norm=92.496, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.764e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 15:18:33,060 (trainer:732) INFO: 18epoch:train:2801-2900batch: iter_time=9.940e-05, forward_time=0.108, loss_ctc=81.354, loss_att=57.151, acc=0.696, loss=64.412, backward_time=0.751, grad_norm=109.033, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.761e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 15:20:12,823 (trainer:732) INFO: 18epoch:train:2901-3000batch: iter_time=1.129e-04, forward_time=0.108, loss_ctc=73.741, loss_att=58.586, acc=0.679, loss=63.132, backward_time=0.752, grad_norm=91.478, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.758e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 15:21:52,679 (trainer:732) INFO: 18epoch:train:3001-3100batch: iter_time=1.015e-04, forward_time=0.109, loss_ctc=83.727, loss_att=65.874, acc=0.669, loss=71.230, backward_time=0.752, grad_norm=106.440, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.756e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 15:23:32,451 (trainer:732) INFO: 18epoch:train:3101-3200batch: iter_time=1.030e-04, forward_time=0.109, loss_ctc=67.147, loss_att=49.955, acc=0.689, loss=55.113, backward_time=0.751, grad_norm=107.665, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.753e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 15:25:14,937 (trainer:732) INFO: 18epoch:train:3201-3300batch: iter_time=9.253e-05, forward_time=0.108, loss_ctc=81.896, loss_att=61.574, acc=0.673, loss=67.671, backward_time=0.753, grad_norm=105.660, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.750e-05, train_time=2.049
+[gpua003:0/64] 2023-07-06 15:25:48,262 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpua003:0/64] 2023-07-06 15:26:07,548 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 15:26:11,328 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8a3a77ec50>)
+[gpua003:0/64] 2023-07-06 15:26:11,328 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpua003:0/64] 2023-07-06 15:26:11,334 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 15:29:42,037 (trainer:732) INFO: 18epoch:train:3301-3400batch: iter_time=1.323, forward_time=0.110, loss_ctc=72.978, loss_att=56.716, acc=0.686, loss=61.595, backward_time=0.775, grad_norm=91.640, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.114, optim0_lr0=8.748e-05, train_time=5.342
+[gpua003:0/64] 2023-07-06 15:31:22,262 (trainer:732) INFO: 18epoch:train:3401-3500batch: iter_time=1.067e-04, forward_time=0.108, loss_ctc=72.957, loss_att=62.528, acc=0.685, loss=65.657, backward_time=0.753, grad_norm=88.960, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.745e-05, train_time=2.004
+[gpua003:0/64] 2023-07-06 15:33:02,184 (trainer:732) INFO: 18epoch:train:3501-3600batch: iter_time=1.066e-04, forward_time=0.109, loss_ctc=67.727, loss_att=54.922, acc=0.697, loss=58.763, backward_time=0.753, grad_norm=96.147, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.742e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 15:34:41,830 (trainer:732) INFO: 18epoch:train:3601-3700batch: iter_time=1.085e-04, forward_time=0.108, loss_ctc=72.865, loss_att=53.783, acc=0.699, loss=59.507, backward_time=0.752, grad_norm=105.810, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.740e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 15:36:21,533 (trainer:732) INFO: 18epoch:train:3701-3800batch: iter_time=1.178e-04, forward_time=0.109, loss_ctc=78.415, loss_att=59.688, acc=0.681, loss=65.306, backward_time=0.752, grad_norm=94.163, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.737e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 15:38:01,291 (trainer:732) INFO: 18epoch:train:3801-3900batch: iter_time=1.163e-04, forward_time=0.109, loss_ctc=74.930, loss_att=57.824, acc=0.677, loss=62.956, backward_time=0.752, grad_norm=98.833, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.114, optim0_lr0=8.734e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 15:39:41,058 (trainer:732) INFO: 18epoch:train:3901-4000batch: iter_time=1.116e-04, forward_time=0.109, loss_ctc=73.796, loss_att=59.813, acc=0.674, loss=64.008, backward_time=0.753, grad_norm=105.954, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.732e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 15:41:20,935 (trainer:732) INFO: 18epoch:train:4001-4100batch: iter_time=9.829e-05, forward_time=0.109, loss_ctc=71.016, loss_att=54.671, acc=0.686, loss=59.574, backward_time=0.753, grad_norm=88.325, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.729e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 15:42:27,084 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpua003:0/64] 2023-07-06 15:42:46,118 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 15:42:49,698 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f89e88fafb0>)
+[gpua003:0/64] 2023-07-06 15:42:49,698 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpua003:0/64] 2023-07-06 15:42:49,704 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 15:46:38,959 (trainer:732) INFO: 18epoch:train:4101-4200batch: iter_time=1.311, forward_time=0.109, loss_ctc=82.633, loss_att=57.106, acc=0.674, loss=64.764, backward_time=0.765, grad_norm=108.721, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.726e-05, train_time=6.360
+[gpua003:0/64] 2023-07-06 15:48:19,686 (trainer:732) INFO: 18epoch:train:4201-4300batch: iter_time=9.726e-05, forward_time=0.109, loss_ctc=74.521, loss_att=62.322, acc=0.689, loss=65.982, backward_time=0.757, grad_norm=96.759, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.724e-05, train_time=2.014
+[gpua003:0/64] 2023-07-06 15:49:59,357 (trainer:732) INFO: 18epoch:train:4301-4400batch: iter_time=1.081e-04, forward_time=0.108, loss_ctc=68.417, loss_att=54.908, acc=0.700, loss=58.961, backward_time=0.751, grad_norm=87.929, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.721e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 15:51:39,010 (trainer:732) INFO: 18epoch:train:4401-4500batch: iter_time=1.019e-04, forward_time=0.108, loss_ctc=72.963, loss_att=54.105, acc=0.704, loss=59.763, backward_time=0.750, grad_norm=121.911, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.718e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 15:53:18,767 (trainer:732) INFO: 18epoch:train:4501-4600batch: iter_time=1.001e-04, forward_time=0.108, loss_ctc=78.466, loss_att=59.773, acc=0.690, loss=65.381, backward_time=0.751, grad_norm=96.401, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.716e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 15:54:58,498 (trainer:732) INFO: 18epoch:train:4601-4700batch: iter_time=1.004e-04, forward_time=0.108, loss_ctc=76.432, loss_att=58.796, acc=0.677, loss=64.087, backward_time=0.752, grad_norm=97.092, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.713e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 15:56:38,047 (trainer:732) INFO: 18epoch:train:4701-4800batch: iter_time=9.499e-05, forward_time=0.108, loss_ctc=79.331, loss_att=63.852, acc=0.665, loss=68.495, backward_time=0.750, grad_norm=256.176, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.710e-05, train_time=1.991
+[gpua003:0/64] 2023-07-06 15:58:19,964 (trainer:732) INFO: 18epoch:train:4801-4900batch: iter_time=9.716e-05, forward_time=0.108, loss_ctc=68.325, loss_att=52.879, acc=0.682, loss=57.513, backward_time=0.751, grad_norm=118.723, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.708e-05, train_time=2.038
+[gpua003:0/64] 2023-07-06 16:00:00,347 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpua003:0/64] 2023-07-06 16:00:19,563 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 16:00:23,071 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88444ab610>)
+[gpua003:0/64] 2023-07-06 16:00:23,071 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpua003:0/64] 2023-07-06 16:00:23,077 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 16:03:54,812 (trainer:732) INFO: 18epoch:train:4901-5000batch: iter_time=1.281, forward_time=0.108, loss_ctc=83.769, loss_att=60.724, acc=0.667, loss=67.638, backward_time=0.761, grad_norm=148.559, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.112, optim0_lr0=8.705e-05, train_time=6.697
+[gpua003:0/64] 2023-07-06 16:05:37,501 (trainer:732) INFO: 18epoch:train:5001-5100batch: iter_time=9.821e-05, forward_time=0.108, loss_ctc=74.654, loss_att=62.672, acc=0.686, loss=66.267, backward_time=0.760, grad_norm=88.103, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.702e-05, train_time=2.054
+[gpua003:0/64] 2023-07-06 16:07:17,638 (trainer:732) INFO: 18epoch:train:5101-5200batch: iter_time=1.032e-04, forward_time=0.108, loss_ctc=74.717, loss_att=61.342, acc=0.690, loss=65.354, backward_time=0.751, grad_norm=111.773, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.112, optim0_lr0=8.700e-05, train_time=2.003
+[gpua003:0/64] 2023-07-06 16:08:57,432 (trainer:732) INFO: 18epoch:train:5201-5300batch: iter_time=1.039e-04, forward_time=0.109, loss_ctc=62.277, loss_att=47.038, acc=0.711, loss=51.609, backward_time=0.752, grad_norm=77.470, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.697e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 16:10:37,101 (trainer:732) INFO: 18epoch:train:5301-5400batch: iter_time=1.087e-04, forward_time=0.108, loss_ctc=81.357, loss_att=56.958, acc=0.687, loss=64.278, backward_time=0.751, grad_norm=148.292, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.695e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 16:12:16,859 (trainer:732) INFO: 18epoch:train:5401-5500batch: iter_time=1.099e-04, forward_time=0.109, loss_ctc=73.380, loss_att=57.785, acc=0.682, loss=62.463, backward_time=0.751, grad_norm=91.302, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.692e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 16:13:56,539 (trainer:732) INFO: 18epoch:train:5501-5600batch: iter_time=1.039e-04, forward_time=0.108, loss_ctc=81.378, loss_att=65.579, acc=0.665, loss=70.319, backward_time=0.751, grad_norm=99.738, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.689e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 16:15:35,974 (trainer:732) INFO: 18epoch:train:5601-5700batch: iter_time=1.127e-04, forward_time=0.108, loss_ctc=65.790, loss_att=48.674, acc=0.695, loss=53.808, backward_time=0.749, grad_norm=111.144, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.687e-05, train_time=1.988
+[gpua003:0/64] 2023-07-06 16:17:15,561 (trainer:732) INFO: 18epoch:train:5701-5800batch: iter_time=1.069e-04, forward_time=0.107, loss_ctc=82.875, loss_att=60.270, acc=0.669, loss=67.051, backward_time=0.750, grad_norm=114.126, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.684e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 16:17:48,761 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpua003:0/64] 2023-07-06 16:18:08,112 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 16:18:11,617 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88ae1d5060>)
+[gpua003:0/64] 2023-07-06 16:18:11,617 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpua003:0/64] 2023-07-06 16:18:11,623 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 16:22:38,235 (trainer:732) INFO: 18epoch:train:5801-5900batch: iter_time=1.313, forward_time=0.109, loss_ctc=73.660, loss_att=59.347, acc=0.685, loss=63.641, backward_time=0.764, grad_norm=132.057, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.681e-05, train_time=6.453
+[gpua003:0/64] 2023-07-06 16:24:18,781 (trainer:732) INFO: 18epoch:train:5901-6000batch: iter_time=1.002e-04, forward_time=0.109, loss_ctc=76.119, loss_att=58.539, acc=0.690, loss=63.813, backward_time=0.754, grad_norm=88.467, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.679e-05, train_time=2.011
+[gpua003:0/64] 2023-07-06 16:26:06,138 (trainer:732) INFO: 18epoch:train:6001-6100batch: iter_time=9.305e-05, forward_time=0.109, loss_ctc=67.971, loss_att=52.451, acc=0.704, loss=57.107, backward_time=0.766, grad_norm=95.395, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.676e-05, train_time=2.147
+[gpua003:0/64] 2023-07-06 16:27:49,192 (trainer:732) INFO: 18epoch:train:6101-6200batch: iter_time=9.965e-05, forward_time=0.108, loss_ctc=77.390, loss_att=55.435, acc=0.689, loss=62.021, backward_time=0.756, grad_norm=106.144, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.674e-05, train_time=2.061
+[gpua003:0/64] 2023-07-06 16:29:29,408 (trainer:732) INFO: 18epoch:train:6201-6300batch: iter_time=9.552e-05, forward_time=0.109, loss_ctc=70.826, loss_att=56.281, acc=0.684, loss=60.644, backward_time=0.753, grad_norm=94.037, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.671e-05, train_time=2.004
+[gpua003:0/64] 2023-07-06 16:31:09,234 (trainer:732) INFO: 18epoch:train:6301-6400batch: iter_time=9.346e-05, forward_time=0.107, loss_ctc=78.001, loss_att=63.797, acc=0.672, loss=68.058, backward_time=0.751, grad_norm=98.998, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.668e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 16:32:48,924 (trainer:732) INFO: 18epoch:train:6401-6500batch: iter_time=9.280e-05, forward_time=0.108, loss_ctc=72.916, loss_att=53.940, acc=0.675, loss=59.633, backward_time=0.751, grad_norm=86.057, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.666e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 16:34:29,082 (trainer:732) INFO: 18epoch:train:6501-6600batch: iter_time=1.069e-04, forward_time=0.107, loss_ctc=74.326, loss_att=55.265, acc=0.677, loss=60.983, backward_time=0.752, grad_norm=94.716, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.663e-05, train_time=2.003
+[gpua003:0/64] 2023-07-06 16:35:38,397 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpua003:0/64] 2023-07-06 16:35:57,667 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 16:36:01,099 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8840690fa0>)
+[gpua003:0/64] 2023-07-06 16:36:01,100 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpua003:0/64] 2023-07-06 16:36:01,106 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 16:41:32,144 (trainer:732) INFO: 18epoch:train:6601-6700batch: iter_time=1.282, forward_time=0.107, loss_ctc=83.492, loss_att=58.963, acc=0.681, loss=66.321, backward_time=0.770, grad_norm=107.633, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.661e-05, train_time=8.461
+[gpua003:0/64] 2023-07-06 16:43:17,374 (trainer:732) INFO: 18epoch:train:6701-6800batch: iter_time=9.000e-05, forward_time=0.107, loss_ctc=72.670, loss_att=61.072, acc=0.690, loss=64.551, backward_time=0.767, grad_norm=93.284, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.658e-05, train_time=2.104
+[gpua003:0/64] 2023-07-06 16:44:57,475 (trainer:732) INFO: 18epoch:train:6801-6900batch: iter_time=8.929e-05, forward_time=0.107, loss_ctc=68.519, loss_att=54.104, acc=0.697, loss=58.428, backward_time=0.751, grad_norm=89.012, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.655e-05, train_time=2.002
+[gpua003:0/64] 2023-07-06 16:46:44,014 (trainer:732) INFO: 18epoch:train:6901-7000batch: iter_time=8.633e-05, forward_time=0.107, loss_ctc=73.189, loss_att=52.360, acc=0.706, loss=58.609, backward_time=0.755, grad_norm=131.163, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.112, optim0_lr0=8.653e-05, train_time=2.131
+[gpua003:0/64] 2023-07-06 16:48:38,048 (trainer:732) INFO: 18epoch:train:7001-7100batch: iter_time=9.301e-05, forward_time=0.107, loss_ctc=76.911, loss_att=60.205, acc=0.681, loss=65.217, backward_time=0.775, grad_norm=91.195, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.650e-05, train_time=2.280
+[gpua003:0/64] 2023-07-06 16:50:17,837 (trainer:732) INFO: 18epoch:train:7101-7200batch: iter_time=9.519e-05, forward_time=0.107, loss_ctc=73.833, loss_att=56.759, acc=0.685, loss=61.881, backward_time=0.750, grad_norm=97.936, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.648e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 16:51:58,408 (trainer:732) INFO: 18epoch:train:7201-7300batch: iter_time=9.376e-05, forward_time=0.107, loss_ctc=76.637, loss_att=62.319, acc=0.667, loss=66.614, backward_time=0.752, grad_norm=113.058, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.645e-05, train_time=2.011
+[gpua003:0/64] 2023-07-06 16:54:05,563 (trainer:732) INFO: 18epoch:train:7301-7400batch: iter_time=1.831e-04, forward_time=0.124, loss_ctc=68.742, loss_att=52.246, acc=0.684, loss=57.195, backward_time=0.816, grad_norm=109.431, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.119, optim0_lr0=8.642e-05, train_time=2.543
+[gpua003:0/64] 2023-07-06 16:55:47,933 (trainer:732) INFO: 18epoch:train:7401-7500batch: iter_time=9.365e-05, forward_time=0.110, loss_ctc=82.184, loss_att=57.189, acc=0.675, loss=64.687, backward_time=0.760, grad_norm=142.708, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.640e-05, train_time=2.047
+[gpua003:0/64] 2023-07-06 16:55:54,106 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpua003:0/64] 2023-07-06 16:56:13,402 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 16:56:16,909 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f89dd95bfa0>)
+[gpua003:0/64] 2023-07-06 16:56:16,910 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpua003:0/64] 2023-07-06 16:56:16,916 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 17:00:34,079 (trainer:732) INFO: 18epoch:train:7501-7600batch: iter_time=1.505, forward_time=0.108, loss_ctc=74.300, loss_att=63.152, acc=0.683, loss=66.497, backward_time=0.765, grad_norm=141.163, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.637e-05, train_time=5.723
+[gpua003:0/64] 2023-07-06 17:02:15,733 (trainer:732) INFO: 18epoch:train:7601-7700batch: iter_time=9.276e-05, forward_time=0.109, loss_ctc=75.341, loss_att=59.650, acc=0.691, loss=64.357, backward_time=0.757, grad_norm=131.218, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.635e-05, train_time=2.033
+[gpua003:0/64] 2023-07-06 17:04:00,532 (trainer:732) INFO: 18epoch:train:7701-7800batch: iter_time=9.513e-05, forward_time=0.108, loss_ctc=62.710, loss_att=47.592, acc=0.714, loss=52.128, backward_time=0.758, grad_norm=100.496, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.632e-05, train_time=2.096
+[gpua003:0/64] 2023-07-06 17:05:41,261 (trainer:732) INFO: 18epoch:train:7801-7900batch: iter_time=1.024e-04, forward_time=0.109, loss_ctc=80.778, loss_att=56.500, acc=0.691, loss=63.783, backward_time=0.753, grad_norm=159.514, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.630e-05, train_time=2.014
+[gpua003:0/64] 2023-07-06 17:07:23,370 (trainer:732) INFO: 18epoch:train:7901-8000batch: iter_time=9.871e-05, forward_time=0.109, loss_ctc=72.426, loss_att=57.186, acc=0.677, loss=61.758, backward_time=0.755, grad_norm=100.381, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.627e-05, train_time=2.042
+[gpua003:0/64] 2023-07-06 17:09:03,079 (trainer:732) INFO: 18epoch:train:8001-8100batch: iter_time=9.808e-05, forward_time=0.108, loss_ctc=81.683, loss_att=65.407, acc=0.673, loss=70.289, backward_time=0.751, grad_norm=103.619, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.624e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 17:10:54,113 (trainer:732) INFO: 18epoch:train:8101-8200batch: iter_time=1.053e-04, forward_time=0.108, loss_ctc=64.899, loss_att=47.680, acc=0.696, loss=52.846, backward_time=0.765, grad_norm=84.417, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.622e-05, train_time=2.220
+[gpua003:0/64] 2023-07-06 17:12:33,937 (trainer:732) INFO: 18epoch:train:8201-8300batch: iter_time=9.519e-05, forward_time=0.107, loss_ctc=80.088, loss_att=59.873, acc=0.669, loss=65.937, backward_time=0.751, grad_norm=140.365, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.619e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 17:13:08,280 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpua003:0/64] 2023-07-06 17:13:27,649 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 17:13:31,147 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88d3f86ce0>)
+[gpua003:0/64] 2023-07-06 17:13:31,147 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpua003:0/64] 2023-07-06 17:13:31,153 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 17:18:59,187 (trainer:732) INFO: 18epoch:train:8301-8400batch: iter_time=1.294, forward_time=0.107, loss_ctc=72.616, loss_att=56.151, acc=0.688, loss=61.090, backward_time=0.763, grad_norm=159.886, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.617e-05, train_time=7.705
+[gpua003:0/64] 2023-07-06 17:20:40,408 (trainer:732) INFO: 18epoch:train:8401-8500batch: iter_time=1.002e-04, forward_time=0.108, loss_ctc=72.837, loss_att=60.069, acc=0.694, loss=63.899, backward_time=0.752, grad_norm=118.934, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.614e-05, train_time=2.024
+[gpua003:0/64] 2023-07-06 17:22:20,274 (trainer:732) INFO: 18epoch:train:8501-8600batch: iter_time=9.768e-05, forward_time=0.108, loss_ctc=68.421, loss_att=53.892, acc=0.698, loss=58.250, backward_time=0.752, grad_norm=96.100, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.612e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 17:24:14,320 (trainer:732) INFO: 18epoch:train:8601-8700batch: iter_time=1.036e-04, forward_time=0.109, loss_ctc=68.857, loss_att=51.084, acc=0.707, loss=56.416, backward_time=0.768, grad_norm=78.940, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.609e-05, train_time=2.281
+[gpua003:0/64] 2023-07-06 17:25:54,219 (trainer:732) INFO: 18epoch:train:8701-8800batch: iter_time=1.021e-04, forward_time=0.108, loss_ctc=79.583, loss_att=60.458, acc=0.683, loss=66.195, backward_time=0.752, grad_norm=99.339, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.607e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 17:27:34,084 (trainer:732) INFO: 18epoch:train:8801-8900batch: iter_time=9.920e-05, forward_time=0.108, loss_ctc=74.773, loss_att=57.128, acc=0.683, loss=62.421, backward_time=0.753, grad_norm=91.643, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.604e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 17:29:13,700 (trainer:732) INFO: 18epoch:train:8901-9000batch: iter_time=1.083e-04, forward_time=0.107, loss_ctc=74.198, loss_att=58.596, acc=0.679, loss=63.277, backward_time=0.750, grad_norm=131.138, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.601e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 17:30:53,462 (trainer:732) INFO: 18epoch:train:9001-9100batch: iter_time=1.043e-04, forward_time=0.107, loss_ctc=72.289, loss_att=54.298, acc=0.688, loss=59.695, backward_time=0.750, grad_norm=98.942, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.599e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 17:32:00,571 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpua003:0/64] 2023-07-06 17:32:19,636 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 17:32:23,155 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88a2a770d0>)
+[gpua003:0/64] 2023-07-06 17:32:23,155 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpua003:0/64] 2023-07-06 17:32:23,161 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 17:36:57,445 (trainer:732) INFO: 18epoch:train:9101-9200batch: iter_time=1.310, forward_time=0.109, loss_ctc=81.340, loss_att=56.298, acc=0.680, loss=63.811, backward_time=0.761, grad_norm=111.662, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.596e-05, train_time=7.279
+[gpua003:0/64] 2023-07-06 17:38:39,715 (trainer:732) INFO: 18epoch:train:9201-9300batch: iter_time=1.090e-04, forward_time=0.108, loss_ctc=73.285, loss_att=63.161, acc=0.686, loss=66.198, backward_time=0.757, grad_norm=88.279, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.594e-05, train_time=2.045
+[gpua003:0/64] 2023-07-06 17:40:23,113 (trainer:732) INFO: 18epoch:train:9301-9400batch: iter_time=1.137e-04, forward_time=0.109, loss_ctc=68.800, loss_att=55.601, acc=0.706, loss=59.561, backward_time=0.756, grad_norm=92.752, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.591e-05, train_time=2.068
+[gpua003:0/64] 2023-07-06 17:42:05,313 (trainer:732) INFO: 18epoch:train:9401-9500batch: iter_time=1.016e-04, forward_time=0.108, loss_ctc=72.284, loss_att=52.694, acc=0.710, loss=58.571, backward_time=0.754, grad_norm=98.040, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.589e-05, train_time=2.044
+[gpua003:0/64] 2023-07-06 17:43:45,031 (trainer:732) INFO: 18epoch:train:9501-9600batch: iter_time=1.102e-04, forward_time=0.109, loss_ctc=76.407, loss_att=59.152, acc=0.693, loss=64.328, backward_time=0.751, grad_norm=111.122, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.586e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 17:45:24,576 (trainer:732) INFO: 18epoch:train:9601-9700batch: iter_time=1.125e-04, forward_time=0.107, loss_ctc=74.718, loss_att=58.743, acc=0.682, loss=63.535, backward_time=0.750, grad_norm=102.896, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.584e-05, train_time=1.991
+[gpua003:0/64] 2023-07-06 17:47:08,153 (trainer:732) INFO: 18epoch:train:9701-9800batch: iter_time=1.040e-04, forward_time=0.107, loss_ctc=76.130, loss_att=62.394, acc=0.672, loss=66.515, backward_time=0.752, grad_norm=99.445, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.581e-05, train_time=2.071
+[gpua003:0/64] 2023-07-06 17:48:48,960 (trainer:732) INFO: 18epoch:train:9801-9900batch: iter_time=9.027e-05, forward_time=0.108, loss_ctc=67.693, loss_att=51.247, acc=0.689, loss=56.181, backward_time=0.758, grad_norm=101.173, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.579e-05, train_time=2.016
+[gpua003:0/64] 2023-07-06 17:50:28,839 (trainer:732) INFO: 18epoch:train:9901-10000batch: iter_time=8.743e-05, forward_time=0.109, loss_ctc=81.308, loss_att=58.352, acc=0.679, loss=65.239, backward_time=0.752, grad_norm=97.815, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.576e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 18:02:52,400 (trainer:338) INFO: 18epoch results: [train] iter_time=0.198, forward_time=0.110, loss_ctc=75.020, loss_att=57.840, acc=0.686, loss=62.994, backward_time=0.757, grad_norm=106.540, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.705e-05, train_time=2.652, time=3 hours, 41 minutes and 15.98 seconds, total_count=150000, gpu_max_cached_mem_GB=37.779, [valid] loss_ctc=50.528, cer_ctc=0.288, loss_att=42.295, acc=0.657, cer=0.376, wer=0.987, loss=44.765, time=5 minutes and 47.09 seconds, total_count=15686, gpu_max_cached_mem_GB=37.779, [att_plot] time=6 minutes and 21.52 seconds, total_count=0, gpu_max_cached_mem_GB=37.779
+[gpua003:0/64] 2023-07-06 18:03:10,995 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpua003:0/64] 2023-07-06 18:03:11,099 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/13epoch.pth
+[gpua003:0/64] 2023-07-06 18:03:11,142 (trainer:272) INFO: 19/100epoch started. Estimated time to finish: 1 week, 6 days and 5 hours
+[gpua003:0/64] 2023-07-06 18:03:12,483 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua003:0/64] 2023-07-06 18:03:31,414 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 18:03:36,058 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f97b4d36f80>)
+[gpua003:0/64] 2023-07-06 18:03:36,058 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpua003:0/64] 2023-07-06 18:03:36,157 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 18:10:33,322 (trainer:732) INFO: 19epoch:train:1-100batch: iter_time=3.360, forward_time=0.134, loss_ctc=71.878, loss_att=52.487, acc=0.687, loss=58.305, backward_time=0.768, grad_norm=98.210, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.115, optim0_lr0=8.574e-05, train_time=8.830
+[gpua003:0/64] 2023-07-06 18:12:13,736 (trainer:732) INFO: 19epoch:train:101-200batch: iter_time=1.005e-04, forward_time=0.108, loss_ctc=73.951, loss_att=54.611, acc=0.685, loss=60.413, backward_time=0.752, grad_norm=97.472, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.571e-05, train_time=2.008
+[gpua003:0/64] 2023-07-06 18:13:54,017 (trainer:732) INFO: 19epoch:train:201-300batch: iter_time=9.643e-05, forward_time=0.108, loss_ctc=71.382, loss_att=53.617, acc=0.688, loss=58.947, backward_time=0.750, grad_norm=79.712, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.569e-05, train_time=2.005
+[gpua003:0/64] 2023-07-06 18:15:36,625 (trainer:732) INFO: 19epoch:train:301-400batch: iter_time=9.724e-05, forward_time=0.108, loss_ctc=75.527, loss_att=61.014, acc=0.673, loss=65.368, backward_time=0.757, grad_norm=94.346, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.566e-05, train_time=2.052
+[gpua003:0/64] 2023-07-06 18:17:17,089 (trainer:732) INFO: 19epoch:train:401-500batch: iter_time=9.887e-05, forward_time=0.109, loss_ctc=73.023, loss_att=58.274, acc=0.684, loss=62.699, backward_time=0.751, grad_norm=90.553, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.564e-05, train_time=2.009
+[gpua003:0/64] 2023-07-06 18:19:01,828 (trainer:732) INFO: 19epoch:train:501-600batch: iter_time=8.940e-05, forward_time=0.108, loss_ctc=71.310, loss_att=57.750, acc=0.697, loss=61.818, backward_time=0.756, grad_norm=100.266, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.561e-05, train_time=2.095
+[gpua003:0/64] 2023-07-06 18:21:04,708 (trainer:732) INFO: 19epoch:train:601-700batch: iter_time=9.789e-05, forward_time=0.110, loss_ctc=68.405, loss_att=52.910, acc=0.711, loss=57.558, backward_time=0.796, grad_norm=82.647, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.559e-05, train_time=2.457
+[gpua003:0/64] 2023-07-06 18:22:58,725 (trainer:732) INFO: 19epoch:train:701-800batch: iter_time=1.057e-04, forward_time=0.111, loss_ctc=85.051, loss_att=69.937, acc=0.676, loss=74.471, backward_time=0.799, grad_norm=123.148, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.556e-05, train_time=2.280
+[gpua003:0/64] 2023-07-06 18:23:39,249 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpua003:0/64] 2023-07-06 18:23:57,976 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 18:24:01,673 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f89e935bdf0>)
+[gpua003:0/64] 2023-07-06 18:24:01,673 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpua003:0/64] 2023-07-06 18:24:01,679 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 18:27:57,894 (trainer:732) INFO: 19epoch:train:801-900batch: iter_time=1.349, forward_time=0.109, loss_ctc=74.949, loss_att=56.631, acc=0.679, loss=62.126, backward_time=0.766, grad_norm=85.017, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.553e-05, train_time=5.983
+[gpua003:0/64] 2023-07-06 18:29:38,263 (trainer:732) INFO: 19epoch:train:901-1000batch: iter_time=1.127e-04, forward_time=0.108, loss_ctc=74.503, loss_att=54.158, acc=0.674, loss=60.262, backward_time=0.751, grad_norm=102.346, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.551e-05, train_time=2.007
+[gpua003:0/64] 2023-07-06 18:31:18,016 (trainer:732) INFO: 19epoch:train:1001-1100batch: iter_time=1.100e-04, forward_time=0.108, loss_ctc=74.767, loss_att=56.937, acc=0.684, loss=62.286, backward_time=0.752, grad_norm=82.548, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.548e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 18:32:57,536 (trainer:732) INFO: 19epoch:train:1101-1200batch: iter_time=1.159e-04, forward_time=0.107, loss_ctc=70.675, loss_att=55.035, acc=0.670, loss=59.727, backward_time=0.752, grad_norm=94.405, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.546e-05, train_time=1.990
+[gpua003:0/64] 2023-07-06 18:34:37,089 (trainer:732) INFO: 19epoch:train:1201-1300batch: iter_time=1.106e-04, forward_time=0.107, loss_ctc=76.324, loss_att=59.642, acc=0.686, loss=64.647, backward_time=0.751, grad_norm=93.883, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.544e-05, train_time=1.991
+[gpua003:0/64] 2023-07-06 18:36:17,028 (trainer:732) INFO: 19epoch:train:1301-1400batch: iter_time=9.751e-05, forward_time=0.109, loss_ctc=67.885, loss_att=54.884, acc=0.685, loss=58.784, backward_time=0.753, grad_norm=91.475, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.541e-05, train_time=1.999
+[gpua003:0/64] 2023-07-06 18:37:56,460 (trainer:732) INFO: 19epoch:train:1401-1500batch: iter_time=1.118e-04, forward_time=0.107, loss_ctc=66.717, loss_att=53.410, acc=0.694, loss=57.402, backward_time=0.750, grad_norm=80.970, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.539e-05, train_time=1.988
+[gpua003:0/64] 2023-07-06 18:39:36,031 (trainer:732) INFO: 19epoch:train:1501-1600batch: iter_time=8.864e-05, forward_time=0.108, loss_ctc=81.647, loss_att=67.344, acc=0.685, loss=71.635, backward_time=0.750, grad_norm=114.722, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.536e-05, train_time=1.991
+[gpua003:0/64] 2023-07-06 18:40:44,261 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpua003:0/64] 2023-07-06 18:41:03,688 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 18:41:07,475 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f89e91bfaf0>)
+[gpua003:0/64] 2023-07-06 18:41:07,475 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpua003:0/64] 2023-07-06 18:41:07,482 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 18:45:41,292 (trainer:732) INFO: 19epoch:train:1601-1700batch: iter_time=1.309, forward_time=0.108, loss_ctc=77.277, loss_att=60.983, acc=0.668, loss=65.871, backward_time=0.768, grad_norm=93.748, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.534e-05, train_time=7.305
+[gpua003:0/64] 2023-07-06 18:47:21,491 (trainer:732) INFO: 19epoch:train:1701-1800batch: iter_time=1.143e-04, forward_time=0.109, loss_ctc=73.968, loss_att=51.681, acc=0.683, loss=58.367, backward_time=0.753, grad_norm=97.133, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.531e-05, train_time=2.004
+[gpua003:0/64] 2023-07-06 18:49:01,195 (trainer:732) INFO: 19epoch:train:1801-1900batch: iter_time=1.050e-04, forward_time=0.108, loss_ctc=73.852, loss_att=56.822, acc=0.683, loss=61.931, backward_time=0.751, grad_norm=95.195, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.529e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 18:50:40,963 (trainer:732) INFO: 19epoch:train:1901-2000batch: iter_time=1.119e-04, forward_time=0.109, loss_ctc=68.398, loss_att=50.868, acc=0.690, loss=56.127, backward_time=0.752, grad_norm=82.918, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.526e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 18:52:20,494 (trainer:732) INFO: 19epoch:train:2001-2100batch: iter_time=1.182e-04, forward_time=0.108, loss_ctc=72.453, loss_att=55.434, acc=0.682, loss=60.540, backward_time=0.750, grad_norm=91.971, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.524e-05, train_time=1.990
+[gpua003:0/64] 2023-07-06 18:54:00,169 (trainer:732) INFO: 19epoch:train:2101-2200batch: iter_time=1.126e-04, forward_time=0.109, loss_ctc=70.471, loss_att=60.656, acc=0.681, loss=63.600, backward_time=0.752, grad_norm=92.734, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.521e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 18:55:43,499 (trainer:732) INFO: 19epoch:train:2201-2300batch: iter_time=1.184e-04, forward_time=0.109, loss_ctc=70.887, loss_att=55.798, acc=0.691, loss=60.325, backward_time=0.753, grad_norm=87.370, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.519e-05, train_time=2.066
+[gpua003:0/64] 2023-07-06 18:57:28,468 (trainer:732) INFO: 19epoch:train:2301-2400batch: iter_time=1.142e-04, forward_time=0.108, loss_ctc=74.302, loss_att=62.084, acc=0.687, loss=65.750, backward_time=0.757, grad_norm=90.024, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.516e-05, train_time=2.099
+[gpua003:0/64] 2023-07-06 18:59:09,045 (trainer:732) INFO: 19epoch:train:2401-2500batch: iter_time=9.995e-05, forward_time=0.109, loss_ctc=83.902, loss_att=62.709, acc=0.676, loss=69.067, backward_time=0.757, grad_norm=139.737, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.514e-05, train_time=2.011
+[gpua003:0/64] 2023-07-06 18:59:11,328 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpua003:0/64] 2023-07-06 18:59:30,666 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 18:59:34,430 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f884e24ca60>)
+[gpua003:0/64] 2023-07-06 18:59:34,430 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpua003:0/64] 2023-07-06 18:59:34,436 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 19:05:02,322 (trainer:732) INFO: 19epoch:train:2501-2600batch: iter_time=1.326, forward_time=0.108, loss_ctc=70.364, loss_att=51.755, acc=0.683, loss=57.338, backward_time=0.793, grad_norm=99.243, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.511e-05, train_time=7.065
+[gpua003:0/64] 2023-07-06 19:06:42,318 (trainer:732) INFO: 19epoch:train:2601-2700batch: iter_time=1.063e-04, forward_time=0.108, loss_ctc=73.763, loss_att=53.729, acc=0.683, loss=59.739, backward_time=0.752, grad_norm=99.427, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.509e-05, train_time=2.000
+[gpua003:0/64] 2023-07-06 19:08:21,986 (trainer:732) INFO: 19epoch:train:2701-2800batch: iter_time=9.496e-05, forward_time=0.107, loss_ctc=69.928, loss_att=54.179, acc=0.687, loss=58.904, backward_time=0.752, grad_norm=87.933, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.506e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 19:10:01,778 (trainer:732) INFO: 19epoch:train:2801-2900batch: iter_time=9.847e-05, forward_time=0.108, loss_ctc=70.824, loss_att=55.649, acc=0.683, loss=60.201, backward_time=0.753, grad_norm=104.518, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.504e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 19:11:43,478 (trainer:732) INFO: 19epoch:train:2901-3000batch: iter_time=8.790e-05, forward_time=0.108, loss_ctc=71.744, loss_att=56.572, acc=0.683, loss=61.124, backward_time=0.753, grad_norm=97.646, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.501e-05, train_time=2.034
+[gpua003:0/64] 2023-07-06 19:13:23,321 (trainer:732) INFO: 19epoch:train:3001-3100batch: iter_time=8.518e-05, forward_time=0.108, loss_ctc=68.969, loss_att=56.135, acc=0.697, loss=59.985, backward_time=0.752, grad_norm=93.645, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.499e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 19:15:02,845 (trainer:732) INFO: 19epoch:train:3101-3200batch: iter_time=9.418e-05, forward_time=0.107, loss_ctc=66.684, loss_att=49.410, acc=0.712, loss=54.592, backward_time=0.751, grad_norm=86.075, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.496e-05, train_time=1.990
+[gpua003:0/64] 2023-07-06 19:16:42,465 (trainer:732) INFO: 19epoch:train:3201-3300batch: iter_time=1.012e-04, forward_time=0.108, loss_ctc=83.199, loss_att=70.634, acc=0.670, loss=74.404, backward_time=0.751, grad_norm=111.083, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.494e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 19:17:18,787 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpua003:0/64] 2023-07-06 19:17:38,120 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 19:17:41,665 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8a2451fe20>)
+[gpua003:0/64] 2023-07-06 19:17:41,665 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpua003:0/64] 2023-07-06 19:17:41,672 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 19:23:47,034 (trainer:732) INFO: 19epoch:train:3301-3400batch: iter_time=3.115, forward_time=0.165, loss_ctc=73.481, loss_att=53.362, acc=0.685, loss=59.397, backward_time=0.768, grad_norm=89.020, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.115, optim0_lr0=8.492e-05, train_time=8.491
+[gpua003:0/64] 2023-07-06 19:25:27,393 (trainer:732) INFO: 19epoch:train:3401-3500batch: iter_time=1.015e-04, forward_time=0.109, loss_ctc=74.841, loss_att=53.947, acc=0.678, loss=60.215, backward_time=0.753, grad_norm=92.488, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.489e-05, train_time=2.007
+[gpua003:0/64] 2023-07-06 19:27:17,900 (trainer:732) INFO: 19epoch:train:3501-3600batch: iter_time=8.651e-05, forward_time=0.108, loss_ctc=73.019, loss_att=56.463, acc=0.684, loss=61.429, backward_time=0.762, grad_norm=97.312, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.487e-05, train_time=2.210
+[gpua003:0/64] 2023-07-06 19:28:57,771 (trainer:732) INFO: 19epoch:train:3601-3700batch: iter_time=9.262e-05, forward_time=0.108, loss_ctc=70.917, loss_att=54.082, acc=0.678, loss=59.133, backward_time=0.753, grad_norm=99.327, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.484e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 19:30:37,491 (trainer:732) INFO: 19epoch:train:3701-3800batch: iter_time=1.079e-04, forward_time=0.107, loss_ctc=73.384, loss_att=57.937, acc=0.690, loss=62.571, backward_time=0.750, grad_norm=93.510, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.482e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 19:32:17,208 (trainer:732) INFO: 19epoch:train:3801-3900batch: iter_time=1.142e-04, forward_time=0.108, loss_ctc=67.881, loss_att=55.385, acc=0.682, loss=59.133, backward_time=0.751, grad_norm=104.375, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.479e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 19:33:56,953 (trainer:732) INFO: 19epoch:train:3901-4000batch: iter_time=1.101e-04, forward_time=0.108, loss_ctc=66.750, loss_att=52.965, acc=0.698, loss=57.101, backward_time=0.752, grad_norm=101.994, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.477e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 19:35:36,807 (trainer:732) INFO: 19epoch:train:4001-4100batch: iter_time=9.853e-05, forward_time=0.108, loss_ctc=78.586, loss_att=65.422, acc=0.693, loss=69.371, backward_time=0.753, grad_norm=90.671, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.475e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 19:36:46,521 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpua003:0/64] 2023-07-06 19:37:05,501 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 19:37:09,017 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8f1542f610>)
+[gpua003:0/64] 2023-07-06 19:37:09,017 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpua003:0/64] 2023-07-06 19:37:09,023 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 19:42:01,856 (trainer:732) INFO: 19epoch:train:4101-4200batch: iter_time=1.271, forward_time=0.108, loss_ctc=77.470, loss_att=59.834, acc=0.674, loss=65.125, backward_time=0.764, grad_norm=104.225, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.472e-05, train_time=7.701
+[gpua003:0/64] 2023-07-06 19:43:42,839 (trainer:732) INFO: 19epoch:train:4201-4300batch: iter_time=9.875e-05, forward_time=0.108, loss_ctc=72.007, loss_att=52.479, acc=0.697, loss=58.337, backward_time=0.754, grad_norm=104.085, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.470e-05, train_time=2.019
+[gpua003:0/64] 2023-07-06 19:45:25,413 (trainer:732) INFO: 19epoch:train:4301-4400batch: iter_time=8.662e-05, forward_time=0.108, loss_ctc=75.337, loss_att=58.219, acc=0.688, loss=63.354, backward_time=0.759, grad_norm=99.286, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.467e-05, train_time=2.051
+[gpua003:0/64] 2023-07-06 19:47:05,267 (trainer:732) INFO: 19epoch:train:4401-4500batch: iter_time=1.128e-04, forward_time=0.108, loss_ctc=68.687, loss_att=50.921, acc=0.699, loss=56.251, backward_time=0.752, grad_norm=83.793, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.465e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 19:48:45,031 (trainer:732) INFO: 19epoch:train:4501-4600batch: iter_time=1.138e-04, forward_time=0.109, loss_ctc=66.702, loss_att=53.486, acc=0.690, loss=57.451, backward_time=0.751, grad_norm=87.055, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.462e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 19:50:28,601 (trainer:732) INFO: 19epoch:train:4601-4700batch: iter_time=1.079e-04, forward_time=0.108, loss_ctc=72.286, loss_att=61.308, acc=0.686, loss=64.601, backward_time=0.756, grad_norm=99.764, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.460e-05, train_time=2.071
+[gpua003:0/64] 2023-07-06 19:52:08,198 (trainer:732) INFO: 19epoch:train:4701-4800batch: iter_time=1.036e-04, forward_time=0.108, loss_ctc=71.867, loss_att=54.172, acc=0.705, loss=59.481, backward_time=0.750, grad_norm=109.482, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.458e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 19:53:47,911 (trainer:732) INFO: 19epoch:train:4801-4900batch: iter_time=1.030e-04, forward_time=0.108, loss_ctc=73.498, loss_att=60.646, acc=0.710, loss=64.501, backward_time=0.751, grad_norm=132.193, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.455e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 19:55:27,532 (trainer:732) INFO: 19epoch:train:4901-5000batch: iter_time=1.088e-04, forward_time=0.108, loss_ctc=81.536, loss_att=64.509, acc=0.679, loss=69.617, backward_time=0.751, grad_norm=111.664, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.453e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 19:55:30,048 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpua003:0/64] 2023-07-06 19:55:48,920 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 19:55:52,427 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f887ff8aef0>)
+[gpua003:0/64] 2023-07-06 19:55:52,427 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpua003:0/64] 2023-07-06 19:55:52,433 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 20:00:40,776 (trainer:732) INFO: 19epoch:train:5001-5100batch: iter_time=1.366, forward_time=0.108, loss_ctc=69.849, loss_att=51.729, acc=0.695, loss=57.165, backward_time=0.763, grad_norm=87.510, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.450e-05, train_time=6.265
+[gpua003:0/64] 2023-07-06 20:02:20,937 (trainer:732) INFO: 19epoch:train:5101-5200batch: iter_time=9.291e-05, forward_time=0.108, loss_ctc=70.810, loss_att=51.826, acc=0.701, loss=57.521, backward_time=0.752, grad_norm=89.983, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.448e-05, train_time=2.003
+[gpua003:0/64] 2023-07-06 20:04:03,241 (trainer:732) INFO: 19epoch:train:5201-5300batch: iter_time=9.771e-05, forward_time=0.109, loss_ctc=70.882, loss_att=52.510, acc=0.700, loss=58.021, backward_time=0.755, grad_norm=78.370, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.445e-05, train_time=2.046
+[gpua003:0/64] 2023-07-06 20:05:51,446 (trainer:732) INFO: 19epoch:train:5301-5400batch: iter_time=9.570e-05, forward_time=0.107, loss_ctc=69.962, loss_att=57.223, acc=0.687, loss=61.045, backward_time=0.760, grad_norm=91.273, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.443e-05, train_time=2.164
+[gpua003:0/64] 2023-07-06 20:07:31,682 (trainer:732) INFO: 19epoch:train:5401-5500batch: iter_time=9.179e-05, forward_time=0.107, loss_ctc=70.911, loss_att=55.955, acc=0.689, loss=60.442, backward_time=0.751, grad_norm=91.242, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.441e-05, train_time=2.005
+[gpua003:0/64] 2023-07-06 20:09:13,295 (trainer:732) INFO: 19epoch:train:5501-5600batch: iter_time=9.333e-05, forward_time=0.107, loss_ctc=72.418, loss_att=59.104, acc=0.701, loss=63.098, backward_time=0.754, grad_norm=97.947, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.438e-05, train_time=2.032
+[gpua003:0/64] 2023-07-06 20:10:53,158 (trainer:732) INFO: 19epoch:train:5601-5700batch: iter_time=9.882e-05, forward_time=0.107, loss_ctc=66.916, loss_att=50.106, acc=0.718, loss=55.149, backward_time=0.751, grad_norm=81.033, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.436e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 20:12:34,924 (trainer:732) INFO: 19epoch:train:5701-5800batch: iter_time=9.385e-05, forward_time=0.108, loss_ctc=82.933, loss_att=68.684, acc=0.688, loss=72.959, backward_time=0.756, grad_norm=95.394, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.433e-05, train_time=2.035
+[gpua003:0/64] 2023-07-06 20:13:11,362 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpua003:0/64] 2023-07-06 20:13:30,240 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 20:13:33,729 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f887ff62200>)
+[gpua003:0/64] 2023-07-06 20:13:33,729 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpua003:0/64] 2023-07-06 20:13:33,736 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 20:17:25,315 (trainer:732) INFO: 19epoch:train:5801-5900batch: iter_time=1.332, forward_time=0.109, loss_ctc=72.517, loss_att=53.535, acc=0.694, loss=59.230, backward_time=0.764, grad_norm=91.357, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.431e-05, train_time=5.808
+[gpua003:0/64] 2023-07-06 20:19:05,549 (trainer:732) INFO: 19epoch:train:5901-6000batch: iter_time=9.979e-05, forward_time=0.108, loss_ctc=72.968, loss_att=53.312, acc=0.689, loss=59.209, backward_time=0.753, grad_norm=105.818, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.429e-05, train_time=2.004
+[gpua003:0/64] 2023-07-06 20:20:45,322 (trainer:732) INFO: 19epoch:train:6001-6100batch: iter_time=1.047e-04, forward_time=0.109, loss_ctc=72.912, loss_att=54.448, acc=0.697, loss=59.987, backward_time=0.751, grad_norm=86.049, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.426e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 20:22:41,206 (trainer:732) INFO: 19epoch:train:6101-6200batch: iter_time=9.789e-05, forward_time=0.115, loss_ctc=67.667, loss_att=52.369, acc=0.694, loss=56.958, backward_time=0.773, grad_norm=79.432, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.114, optim0_lr0=8.424e-05, train_time=2.317
+[gpua003:0/64] 2023-07-06 20:24:21,811 (trainer:732) INFO: 19epoch:train:6201-6300batch: iter_time=1.002e-04, forward_time=0.109, loss_ctc=72.569, loss_att=58.922, acc=0.692, loss=63.016, backward_time=0.756, grad_norm=114.121, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.421e-05, train_time=2.012
+[gpua003:0/64] 2023-07-06 20:26:02,019 (trainer:732) INFO: 19epoch:train:6301-6400batch: iter_time=1.042e-04, forward_time=0.110, loss_ctc=67.317, loss_att=54.258, acc=0.696, loss=58.176, backward_time=0.753, grad_norm=86.928, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.419e-05, train_time=2.004
+[gpua003:0/64] 2023-07-06 20:27:41,637 (trainer:732) INFO: 19epoch:train:6401-6500batch: iter_time=1.161e-04, forward_time=0.108, loss_ctc=66.587, loss_att=54.003, acc=0.706, loss=57.778, backward_time=0.750, grad_norm=93.696, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.417e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 20:29:21,369 (trainer:732) INFO: 19epoch:train:6501-6600batch: iter_time=1.061e-04, forward_time=0.109, loss_ctc=78.539, loss_att=62.535, acc=0.711, loss=67.336, backward_time=0.751, grad_norm=87.737, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.414e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 20:30:33,131 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpua003:0/64] 2023-07-06 20:30:52,251 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 20:30:55,813 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8f2fd2b010>)
+[gpua003:0/64] 2023-07-06 20:30:55,813 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpua003:0/64] 2023-07-06 20:30:55,820 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 20:35:42,565 (trainer:732) INFO: 19epoch:train:6601-6700batch: iter_time=1.437, forward_time=0.110, loss_ctc=76.173, loss_att=59.992, acc=0.683, loss=64.847, backward_time=0.771, grad_norm=92.415, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.412e-05, train_time=7.624
+[gpua003:0/64] 2023-07-06 20:37:35,033 (trainer:732) INFO: 19epoch:train:6701-6800batch: iter_time=2.117e-04, forward_time=0.111, loss_ctc=69.258, loss_att=50.177, acc=0.703, loss=55.902, backward_time=0.765, grad_norm=87.979, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.410e-05, train_time=2.249
+[gpua003:0/64] 2023-07-06 20:39:33,155 (trainer:732) INFO: 19epoch:train:6801-6900batch: iter_time=8.933e-05, forward_time=0.110, loss_ctc=76.772, loss_att=57.481, acc=0.692, loss=63.268, backward_time=0.809, grad_norm=113.312, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.407e-05, train_time=2.362
+[gpua003:0/64] 2023-07-06 20:41:18,407 (trainer:732) INFO: 19epoch:train:6901-7000batch: iter_time=8.764e-05, forward_time=0.107, loss_ctc=69.838, loss_att=51.828, acc=0.699, loss=57.231, backward_time=0.770, grad_norm=80.826, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.405e-05, train_time=2.105
+[gpua003:0/64] 2023-07-06 20:43:01,811 (trainer:732) INFO: 19epoch:train:7001-7100batch: iter_time=8.895e-05, forward_time=0.108, loss_ctc=65.767, loss_att=52.168, acc=0.697, loss=56.247, backward_time=0.757, grad_norm=105.272, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.402e-05, train_time=2.068
+[gpua003:0/64] 2023-07-06 20:44:41,509 (trainer:732) INFO: 19epoch:train:7101-7200batch: iter_time=9.980e-05, forward_time=0.108, loss_ctc=71.601, loss_att=60.582, acc=0.689, loss=63.888, backward_time=0.751, grad_norm=87.586, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.400e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 20:46:21,090 (trainer:732) INFO: 19epoch:train:7201-7300batch: iter_time=1.134e-04, forward_time=0.108, loss_ctc=70.650, loss_att=53.644, acc=0.707, loss=58.745, backward_time=0.750, grad_norm=87.476, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.398e-05, train_time=1.991
+[gpua003:0/64] 2023-07-06 20:48:02,044 (trainer:732) INFO: 19epoch:train:7301-7400batch: iter_time=8.952e-05, forward_time=0.108, loss_ctc=72.244, loss_att=60.016, acc=0.710, loss=63.685, backward_time=0.752, grad_norm=87.451, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.395e-05, train_time=2.019
+[gpua003:0/64] 2023-07-06 20:49:41,828 (trainer:732) INFO: 19epoch:train:7401-7500batch: iter_time=8.351e-05, forward_time=0.108, loss_ctc=78.571, loss_att=61.691, acc=0.689, loss=66.755, backward_time=0.751, grad_norm=104.440, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.393e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 20:49:52,973 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpua003:0/64] 2023-07-06 20:50:12,449 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 20:50:15,993 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88d19777f0>)
+[gpua003:0/64] 2023-07-06 20:50:15,993 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpua003:0/64] 2023-07-06 20:50:15,999 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 20:54:12,385 (trainer:732) INFO: 19epoch:train:7501-7600batch: iter_time=1.571, forward_time=0.130, loss_ctc=72.069, loss_att=54.503, acc=0.682, loss=59.773, backward_time=0.763, grad_norm=94.845, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.114, optim0_lr0=8.391e-05, train_time=5.411
+[gpua003:0/64] 2023-07-06 20:55:52,369 (trainer:732) INFO: 19epoch:train:7601-7700batch: iter_time=9.053e-05, forward_time=0.107, loss_ctc=69.256, loss_att=49.020, acc=0.700, loss=55.091, backward_time=0.751, grad_norm=86.882, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.388e-05, train_time=1.999
+[gpua003:0/64] 2023-07-06 20:57:32,499 (trainer:732) INFO: 19epoch:train:7701-7800batch: iter_time=8.971e-05, forward_time=0.107, loss_ctc=71.084, loss_att=55.278, acc=0.687, loss=60.020, backward_time=0.750, grad_norm=85.779, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.386e-05, train_time=2.002
+[gpua003:0/64] 2023-07-06 20:59:12,329 (trainer:732) INFO: 19epoch:train:7801-7900batch: iter_time=1.121e-04, forward_time=0.109, loss_ctc=69.918, loss_att=55.616, acc=0.691, loss=59.906, backward_time=0.752, grad_norm=90.907, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.383e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 21:00:52,131 (trainer:732) INFO: 19epoch:train:7901-8000batch: iter_time=1.091e-04, forward_time=0.110, loss_ctc=72.031, loss_att=58.828, acc=0.687, loss=62.788, backward_time=0.752, grad_norm=90.990, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.381e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 21:02:31,776 (trainer:732) INFO: 19epoch:train:8001-8100batch: iter_time=1.210e-04, forward_time=0.109, loss_ctc=69.895, loss_att=59.175, acc=0.679, loss=62.391, backward_time=0.751, grad_norm=96.311, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.379e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 21:04:11,333 (trainer:732) INFO: 19epoch:train:8101-8200batch: iter_time=1.196e-04, forward_time=0.109, loss_ctc=65.075, loss_att=46.957, acc=0.713, loss=52.393, backward_time=0.752, grad_norm=79.989, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.376e-05, train_time=1.991
+[gpua003:0/64] 2023-07-06 21:05:50,952 (trainer:732) INFO: 19epoch:train:8201-8300batch: iter_time=1.100e-04, forward_time=0.109, loss_ctc=83.512, loss_att=69.549, acc=0.682, loss=73.738, backward_time=0.752, grad_norm=92.984, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.374e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 21:06:26,463 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpua003:0/64] 2023-07-06 21:06:45,859 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 21:06:49,639 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88a4270130>)
+[gpua003:0/64] 2023-07-06 21:06:49,639 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpua003:0/64] 2023-07-06 21:06:49,645 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 21:10:21,900 (trainer:732) INFO: 19epoch:train:8301-8400batch: iter_time=1.289, forward_time=0.109, loss_ctc=73.988, loss_att=57.572, acc=0.681, loss=62.497, backward_time=0.774, grad_norm=92.924, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.372e-05, train_time=5.419
+[gpua003:0/64] 2023-07-06 21:12:03,025 (trainer:732) INFO: 19epoch:train:8401-8500batch: iter_time=9.593e-05, forward_time=0.108, loss_ctc=71.684, loss_att=50.782, acc=0.691, loss=57.053, backward_time=0.753, grad_norm=91.974, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.369e-05, train_time=2.022
+[gpua003:0/64] 2023-07-06 21:13:42,791 (trainer:732) INFO: 19epoch:train:8501-8600batch: iter_time=8.646e-05, forward_time=0.109, loss_ctc=74.130, loss_att=56.299, acc=0.692, loss=61.648, backward_time=0.753, grad_norm=87.929, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.367e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 21:15:23,522 (trainer:732) INFO: 19epoch:train:8601-8700batch: iter_time=9.629e-05, forward_time=0.109, loss_ctc=68.342, loss_att=52.472, acc=0.685, loss=57.233, backward_time=0.753, grad_norm=89.721, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.365e-05, train_time=2.014
+[gpua003:0/64] 2023-07-06 21:17:03,161 (trainer:732) INFO: 19epoch:train:8701-8800batch: iter_time=9.716e-05, forward_time=0.108, loss_ctc=72.982, loss_att=57.376, acc=0.697, loss=62.058, backward_time=0.751, grad_norm=113.686, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.362e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 21:18:43,104 (trainer:732) INFO: 19epoch:train:8801-8900batch: iter_time=9.576e-05, forward_time=0.109, loss_ctc=66.077, loss_att=53.057, acc=0.696, loss=56.963, backward_time=0.753, grad_norm=81.859, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.360e-05, train_time=1.999
+[gpua003:0/64] 2023-07-06 21:20:22,718 (trainer:732) INFO: 19epoch:train:8901-9000batch: iter_time=1.008e-04, forward_time=0.108, loss_ctc=65.904, loss_att=53.143, acc=0.700, loss=56.972, backward_time=0.751, grad_norm=84.308, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.358e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 21:22:02,446 (trainer:732) INFO: 19epoch:train:9001-9100batch: iter_time=9.656e-05, forward_time=0.108, loss_ctc=77.892, loss_att=65.356, acc=0.694, loss=69.117, backward_time=0.752, grad_norm=101.891, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.355e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 21:23:10,807 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpua003:0/64] 2023-07-06 21:23:29,796 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 21:23:33,339 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88a4273610>)
+[gpua003:0/64] 2023-07-06 21:23:33,339 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpua003:0/64] 2023-07-06 21:23:33,345 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 21:27:24,475 (trainer:732) INFO: 19epoch:train:9101-9200batch: iter_time=1.327, forward_time=0.109, loss_ctc=75.034, loss_att=57.612, acc=0.680, loss=62.839, backward_time=0.762, grad_norm=88.253, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.353e-05, train_time=6.440
+[gpua003:0/64] 2023-07-06 21:29:05,161 (trainer:732) INFO: 19epoch:train:9201-9300batch: iter_time=9.736e-05, forward_time=0.108, loss_ctc=71.238, loss_att=51.142, acc=0.705, loss=57.170, backward_time=0.754, grad_norm=81.302, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.351e-05, train_time=2.013
+[gpua003:0/64] 2023-07-06 21:30:46,142 (trainer:732) INFO: 19epoch:train:9301-9400batch: iter_time=1.061e-04, forward_time=0.109, loss_ctc=74.369, loss_att=57.598, acc=0.690, loss=62.630, backward_time=0.752, grad_norm=101.798, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.348e-05, train_time=2.019
+[gpua003:0/64] 2023-07-06 21:32:26,233 (trainer:732) INFO: 19epoch:train:9401-9500batch: iter_time=9.370e-05, forward_time=0.109, loss_ctc=67.835, loss_att=50.281, acc=0.706, loss=55.547, backward_time=0.752, grad_norm=88.722, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.346e-05, train_time=2.002
+[gpua003:0/64] 2023-07-06 21:34:06,640 (trainer:732) INFO: 19epoch:train:9501-9600batch: iter_time=1.055e-04, forward_time=0.110, loss_ctc=64.836, loss_att=52.472, acc=0.696, loss=56.181, backward_time=0.753, grad_norm=90.882, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.344e-05, train_time=2.008
+[gpua003:0/64] 2023-07-06 21:36:03,018 (trainer:732) INFO: 19epoch:train:9601-9700batch: iter_time=7.040e-04, forward_time=0.150, loss_ctc=70.591, loss_att=59.315, acc=0.695, loss=62.698, backward_time=0.779, grad_norm=92.334, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.114, optim0_lr0=8.341e-05, train_time=2.327
+[gpua003:0/64] 2023-07-06 21:37:44,980 (trainer:732) INFO: 19epoch:train:9701-9800batch: iter_time=1.046e-04, forward_time=0.108, loss_ctc=69.523, loss_att=55.550, acc=0.707, loss=59.741, backward_time=0.753, grad_norm=96.147, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.339e-05, train_time=2.039
+[gpua003:0/64] 2023-07-06 21:39:24,859 (trainer:732) INFO: 19epoch:train:9801-9900batch: iter_time=1.053e-04, forward_time=0.110, loss_ctc=73.187, loss_att=59.236, acc=0.717, loss=63.421, backward_time=0.753, grad_norm=85.059, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.337e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 21:41:04,508 (trainer:732) INFO: 19epoch:train:9901-10000batch: iter_time=9.273e-05, forward_time=0.108, loss_ctc=79.916, loss_att=59.698, acc=0.692, loss=65.764, backward_time=0.751, grad_norm=87.679, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.334e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 21:53:26,999 (trainer:338) INFO: 19epoch results: [train] iter_time=0.201, forward_time=0.110, loss_ctc=72.442, loss_att=56.449, acc=0.691, loss=61.247, backward_time=0.757, grad_norm=94.422, clip=100.000, loss_scale=7.494e+17, optim_step_time=0.113, optim0_lr0=8.452e-05, train_time=2.614, time=3 hours, 38 minutes and 13.18 seconds, total_count=160000, gpu_max_cached_mem_GB=37.779, [valid] loss_ctc=50.578, cer_ctc=0.291, loss_att=43.395, acc=0.637, cer=0.442, wer=1.000, loss=45.550, time=5 minutes and 44.01 seconds, total_count=16698, gpu_max_cached_mem_GB=37.779, [att_plot] time=6 minutes and 18.47 seconds, total_count=0, gpu_max_cached_mem_GB=37.779
+[gpua003:0/64] 2023-07-06 21:53:45,891 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpua003:0/64] 2023-07-06 21:53:45,932 (trainer:272) INFO: 20/100epoch started. Estimated time to finish: 1 week, 6 days and 1 hour
+[gpua003:0/64] 2023-07-06 21:53:46,828 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua003:0/64] 2023-07-06 21:54:05,845 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 21:54:10,947 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88a2bba020>)
+[gpua003:0/64] 2023-07-06 21:54:10,947 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpua003:0/64] 2023-07-06 21:54:11,034 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 22:01:50,792 (trainer:732) INFO: 20epoch:train:1-100batch: iter_time=3.782, forward_time=0.136, loss_ctc=71.854, loss_att=51.948, acc=0.710, loss=57.919, backward_time=0.767, grad_norm=91.560, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.114, optim0_lr0=8.332e-05, train_time=9.690
+[gpua003:0/64] 2023-07-06 22:03:30,984 (trainer:732) INFO: 20epoch:train:101-200batch: iter_time=9.746e-05, forward_time=0.108, loss_ctc=68.269, loss_att=50.073, acc=0.698, loss=55.532, backward_time=0.752, grad_norm=89.704, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.330e-05, train_time=2.004
+[gpua003:0/64] 2023-07-06 22:05:13,770 (trainer:732) INFO: 20epoch:train:201-300batch: iter_time=1.101e-04, forward_time=0.109, loss_ctc=76.182, loss_att=54.086, acc=0.680, loss=60.715, backward_time=0.754, grad_norm=92.398, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.327e-05, train_time=2.056
+[gpua003:0/64] 2023-07-06 22:06:54,574 (trainer:732) INFO: 20epoch:train:301-400batch: iter_time=1.038e-04, forward_time=0.107, loss_ctc=72.772, loss_att=56.293, acc=0.685, loss=61.237, backward_time=0.751, grad_norm=105.066, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.112, optim0_lr0=8.325e-05, train_time=2.016
+[gpua003:0/64] 2023-07-06 22:08:34,689 (trainer:732) INFO: 20epoch:train:401-500batch: iter_time=1.021e-04, forward_time=0.106, loss_ctc=75.522, loss_att=55.332, acc=0.684, loss=61.389, backward_time=0.749, grad_norm=97.945, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.112, optim0_lr0=8.323e-05, train_time=2.002
+[gpua003:0/64] 2023-07-06 22:10:14,287 (trainer:732) INFO: 20epoch:train:501-600batch: iter_time=1.003e-04, forward_time=0.107, loss_ctc=73.512, loss_att=56.609, acc=0.702, loss=61.680, backward_time=0.750, grad_norm=84.685, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.112, optim0_lr0=8.321e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 22:11:54,452 (trainer:732) INFO: 20epoch:train:601-700batch: iter_time=1.043e-04, forward_time=0.107, loss_ctc=78.698, loss_att=54.562, acc=0.686, loss=61.803, backward_time=0.750, grad_norm=98.035, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.318e-05, train_time=2.003
+[gpua003:0/64] 2023-07-06 22:13:40,823 (trainer:732) INFO: 20epoch:train:701-800batch: iter_time=1.096e-04, forward_time=0.108, loss_ctc=89.847, loss_att=64.589, acc=0.689, loss=72.167, backward_time=0.762, grad_norm=100.675, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.316e-05, train_time=2.127
+[gpua003:0/64] 2023-07-06 22:14:20,761 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpua003:0/64] 2023-07-06 22:14:39,405 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 22:14:43,119 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f89e9b63d60>)
+[gpua003:0/64] 2023-07-06 22:14:43,119 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpua003:0/64] 2023-07-06 22:14:43,125 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 22:18:32,284 (trainer:732) INFO: 20epoch:train:801-900batch: iter_time=1.362, forward_time=0.108, loss_ctc=71.198, loss_att=53.976, acc=0.705, loss=59.142, backward_time=0.768, grad_norm=80.563, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.112, optim0_lr0=8.314e-05, train_time=5.829
+[gpua003:0/64] 2023-07-06 22:20:13,201 (trainer:732) INFO: 20epoch:train:901-1000batch: iter_time=9.604e-05, forward_time=0.108, loss_ctc=67.658, loss_att=48.534, acc=0.705, loss=54.271, backward_time=0.755, grad_norm=86.115, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.112, optim0_lr0=8.311e-05, train_time=2.018
+[gpua003:0/64] 2023-07-06 22:21:53,069 (trainer:732) INFO: 20epoch:train:1001-1100batch: iter_time=1.027e-04, forward_time=0.108, loss_ctc=75.100, loss_att=54.061, acc=0.692, loss=60.372, backward_time=0.753, grad_norm=94.035, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.309e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 22:23:34,205 (trainer:732) INFO: 20epoch:train:1101-1200batch: iter_time=1.004e-04, forward_time=0.108, loss_ctc=75.994, loss_att=54.925, acc=0.686, loss=61.246, backward_time=0.754, grad_norm=94.166, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.307e-05, train_time=2.022
+[gpua003:0/64] 2023-07-06 22:25:14,385 (trainer:732) INFO: 20epoch:train:1201-1300batch: iter_time=9.688e-05, forward_time=0.108, loss_ctc=68.867, loss_att=52.161, acc=0.694, loss=57.173, backward_time=0.752, grad_norm=88.133, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.114, optim0_lr0=8.304e-05, train_time=2.003
+[gpua003:0/64] 2023-07-06 22:26:54,236 (trainer:732) INFO: 20epoch:train:1301-1400batch: iter_time=9.936e-05, forward_time=0.108, loss_ctc=74.280, loss_att=59.054, acc=0.691, loss=63.622, backward_time=0.753, grad_norm=81.828, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.302e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 22:28:34,189 (trainer:732) INFO: 20epoch:train:1401-1500batch: iter_time=9.366e-05, forward_time=0.108, loss_ctc=72.329, loss_att=51.203, acc=0.701, loss=57.541, backward_time=0.752, grad_norm=88.414, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.114, optim0_lr0=8.300e-05, train_time=1.999
+[gpua003:0/64] 2023-07-06 22:30:15,525 (trainer:732) INFO: 20epoch:train:1501-1600batch: iter_time=1.027e-04, forward_time=0.108, loss_ctc=91.703, loss_att=61.724, acc=0.683, loss=70.718, backward_time=0.754, grad_norm=106.713, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.114, optim0_lr0=8.298e-05, train_time=2.026
+[gpua003:0/64] 2023-07-06 22:31:22,752 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpua003:0/64] 2023-07-06 22:31:42,368 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 22:31:46,184 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f896d168970>)
+[gpua003:0/64] 2023-07-06 22:31:46,184 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpua003:0/64] 2023-07-06 22:31:46,190 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 22:35:07,262 (trainer:732) INFO: 20epoch:train:1601-1700batch: iter_time=1.291, forward_time=0.108, loss_ctc=71.960, loss_att=56.558, acc=0.711, loss=61.178, backward_time=0.764, grad_norm=84.868, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.295e-05, train_time=5.835
+[gpua003:0/64] 2023-07-06 22:36:48,148 (trainer:732) INFO: 20epoch:train:1701-1800batch: iter_time=1.106e-04, forward_time=0.109, loss_ctc=70.844, loss_att=53.568, acc=0.697, loss=58.751, backward_time=0.755, grad_norm=109.667, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.293e-05, train_time=2.017
+[gpua003:0/64] 2023-07-06 22:38:27,849 (trainer:732) INFO: 20epoch:train:1801-1900batch: iter_time=9.001e-05, forward_time=0.108, loss_ctc=70.234, loss_att=49.729, acc=0.708, loss=55.881, backward_time=0.752, grad_norm=104.621, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.112, optim0_lr0=8.291e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 22:40:07,934 (trainer:732) INFO: 20epoch:train:1901-2000batch: iter_time=9.188e-05, forward_time=0.109, loss_ctc=74.679, loss_att=53.534, acc=0.678, loss=59.877, backward_time=0.753, grad_norm=99.994, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.288e-05, train_time=2.001
+[gpua003:0/64] 2023-07-06 22:41:47,719 (trainer:732) INFO: 20epoch:train:2001-2100batch: iter_time=9.469e-05, forward_time=0.109, loss_ctc=72.452, loss_att=57.103, acc=0.685, loss=61.708, backward_time=0.753, grad_norm=88.374, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.286e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 22:43:27,515 (trainer:732) INFO: 20epoch:train:2101-2200batch: iter_time=9.559e-05, forward_time=0.108, loss_ctc=72.276, loss_att=54.565, acc=0.690, loss=59.878, backward_time=0.753, grad_norm=85.377, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.284e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 22:45:07,265 (trainer:732) INFO: 20epoch:train:2201-2300batch: iter_time=1.133e-04, forward_time=0.109, loss_ctc=70.952, loss_att=53.601, acc=0.697, loss=58.807, backward_time=0.752, grad_norm=91.829, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.282e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 22:46:46,964 (trainer:732) INFO: 20epoch:train:2301-2400batch: iter_time=1.050e-04, forward_time=0.109, loss_ctc=82.613, loss_att=58.490, acc=0.677, loss=65.727, backward_time=0.751, grad_norm=108.551, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.279e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 22:48:27,085 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpua003:0/64] 2023-07-06 22:48:46,472 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 22:48:50,356 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f944b98b760>)
+[gpua003:0/64] 2023-07-06 22:48:50,356 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpua003:0/64] 2023-07-06 22:48:50,362 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 22:53:34,506 (trainer:732) INFO: 20epoch:train:2401-2500batch: iter_time=2.844, forward_time=0.130, loss_ctc=80.783, loss_att=60.279, acc=0.694, loss=66.430, backward_time=0.756, grad_norm=95.454, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.115, optim0_lr0=8.277e-05, train_time=8.151
+[gpua003:0/64] 2023-07-06 22:55:17,165 (trainer:732) INFO: 20epoch:train:2501-2600batch: iter_time=9.896e-05, forward_time=0.113, loss_ctc=70.675, loss_att=49.400, acc=0.713, loss=55.783, backward_time=0.761, grad_norm=88.563, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.275e-05, train_time=2.053
+[gpua003:0/64] 2023-07-06 22:56:57,735 (trainer:732) INFO: 20epoch:train:2601-2700batch: iter_time=9.556e-05, forward_time=0.108, loss_ctc=74.837, loss_att=54.096, acc=0.700, loss=60.318, backward_time=0.756, grad_norm=95.551, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.273e-05, train_time=2.011
+[gpua003:0/64] 2023-07-06 22:58:38,514 (trainer:732) INFO: 20epoch:train:2701-2800batch: iter_time=9.973e-05, forward_time=0.107, loss_ctc=75.135, loss_att=51.859, acc=0.679, loss=58.842, backward_time=0.752, grad_norm=88.078, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.270e-05, train_time=2.015
+[gpua003:0/64] 2023-07-06 23:00:20,949 (trainer:732) INFO: 20epoch:train:2801-2900batch: iter_time=9.541e-05, forward_time=0.108, loss_ctc=69.255, loss_att=55.518, acc=0.700, loss=59.639, backward_time=0.755, grad_norm=80.641, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.268e-05, train_time=2.048
+[gpua003:0/64] 2023-07-06 23:02:14,547 (trainer:732) INFO: 20epoch:train:2901-3000batch: iter_time=9.836e-05, forward_time=0.109, loss_ctc=74.405, loss_att=53.787, acc=0.692, loss=59.972, backward_time=0.782, grad_norm=90.228, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.266e-05, train_time=2.272
+[gpua003:0/64] 2023-07-06 23:03:54,862 (trainer:732) INFO: 20epoch:train:3001-3100batch: iter_time=9.682e-05, forward_time=0.109, loss_ctc=68.569, loss_att=51.720, acc=0.703, loss=56.774, backward_time=0.753, grad_norm=80.182, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.264e-05, train_time=2.006
+[gpua003:0/64] 2023-07-06 23:05:36,742 (trainer:732) INFO: 20epoch:train:3101-3200batch: iter_time=9.416e-05, forward_time=0.110, loss_ctc=81.585, loss_att=57.636, acc=0.695, loss=64.820, backward_time=0.756, grad_norm=114.022, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.261e-05, train_time=2.037
+[gpua003:0/64] 2023-07-06 23:07:16,980 (trainer:732) INFO: 20epoch:train:3201-3300batch: iter_time=9.267e-05, forward_time=0.109, loss_ctc=83.278, loss_att=62.301, acc=0.690, loss=68.594, backward_time=0.752, grad_norm=102.383, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.259e-05, train_time=2.005
+[gpua003:0/64] 2023-07-06 23:07:51,559 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpua003:0/64] 2023-07-06 23:08:11,129 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 23:08:14,689 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88d27b7370>)
+[gpua003:0/64] 2023-07-06 23:08:14,689 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpua003:0/64] 2023-07-06 23:08:14,695 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 23:13:03,956 (trainer:732) INFO: 20epoch:train:3301-3400batch: iter_time=1.301, forward_time=0.109, loss_ctc=78.861, loss_att=56.676, acc=0.700, loss=63.331, backward_time=0.769, grad_norm=114.958, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.257e-05, train_time=6.939
+[gpua003:0/64] 2023-07-06 23:14:44,012 (trainer:732) INFO: 20epoch:train:3401-3500batch: iter_time=9.664e-05, forward_time=0.108, loss_ctc=68.025, loss_att=50.075, acc=0.699, loss=55.460, backward_time=0.753, grad_norm=84.489, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.255e-05, train_time=2.001
+[gpua003:0/64] 2023-07-06 23:16:27,830 (trainer:732) INFO: 20epoch:train:3501-3600batch: iter_time=9.904e-05, forward_time=0.108, loss_ctc=74.312, loss_att=52.930, acc=0.693, loss=59.345, backward_time=0.765, grad_norm=93.179, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.252e-05, train_time=2.076
+[gpua003:0/64] 2023-07-06 23:18:08,117 (trainer:732) INFO: 20epoch:train:3601-3700batch: iter_time=1.068e-04, forward_time=0.110, loss_ctc=73.489, loss_att=54.029, acc=0.682, loss=59.867, backward_time=0.753, grad_norm=96.418, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.250e-05, train_time=2.006
+[gpua003:0/64] 2023-07-06 23:19:47,962 (trainer:732) INFO: 20epoch:train:3701-3800batch: iter_time=9.700e-05, forward_time=0.107, loss_ctc=67.104, loss_att=51.126, acc=0.699, loss=55.919, backward_time=0.751, grad_norm=85.122, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.248e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 23:21:27,624 (trainer:732) INFO: 20epoch:train:3801-3900batch: iter_time=1.038e-04, forward_time=0.107, loss_ctc=73.938, loss_att=60.096, acc=0.686, loss=64.249, backward_time=0.751, grad_norm=89.373, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.112, optim0_lr0=8.246e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 23:23:07,236 (trainer:732) INFO: 20epoch:train:3901-4000batch: iter_time=1.016e-04, forward_time=0.108, loss_ctc=72.519, loss_att=51.367, acc=0.692, loss=57.713, backward_time=0.752, grad_norm=97.338, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.243e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 23:24:46,969 (trainer:732) INFO: 20epoch:train:4001-4100batch: iter_time=9.761e-05, forward_time=0.108, loss_ctc=88.906, loss_att=60.730, acc=0.683, loss=69.183, backward_time=0.751, grad_norm=111.352, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.241e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 23:25:53,953 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpua003:0/64] 2023-07-06 23:26:13,125 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 23:26:16,688 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f944b603430>)
+[gpua003:0/64] 2023-07-06 23:26:16,688 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpua003:0/64] 2023-07-06 23:26:16,695 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 23:29:48,281 (trainer:732) INFO: 20epoch:train:4101-4200batch: iter_time=1.285, forward_time=0.108, loss_ctc=71.787, loss_att=56.750, acc=0.703, loss=61.261, backward_time=0.763, grad_norm=89.955, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.239e-05, train_time=6.026
+[gpua003:0/64] 2023-07-06 23:31:29,587 (trainer:732) INFO: 20epoch:train:4201-4300batch: iter_time=1.041e-04, forward_time=0.109, loss_ctc=70.768, loss_att=51.030, acc=0.713, loss=56.951, backward_time=0.758, grad_norm=86.739, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.237e-05, train_time=2.026
+[gpua003:0/64] 2023-07-06 23:33:09,537 (trainer:732) INFO: 20epoch:train:4301-4400batch: iter_time=1.101e-04, forward_time=0.108, loss_ctc=69.008, loss_att=48.198, acc=0.717, loss=54.441, backward_time=0.752, grad_norm=98.169, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.234e-05, train_time=1.999
+[gpua003:0/64] 2023-07-06 23:34:49,543 (trainer:732) INFO: 20epoch:train:4401-4500batch: iter_time=1.108e-04, forward_time=0.108, loss_ctc=73.392, loss_att=51.724, acc=0.689, loss=58.225, backward_time=0.753, grad_norm=99.811, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.232e-05, train_time=2.000
+[gpua003:0/64] 2023-07-06 23:36:29,633 (trainer:732) INFO: 20epoch:train:4501-4600batch: iter_time=1.189e-04, forward_time=0.109, loss_ctc=71.304, loss_att=56.277, acc=0.699, loss=60.785, backward_time=0.753, grad_norm=84.933, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.230e-05, train_time=2.002
+[gpua003:0/64] 2023-07-06 23:38:09,605 (trainer:732) INFO: 20epoch:train:4601-4700batch: iter_time=1.208e-04, forward_time=0.108, loss_ctc=70.498, loss_att=54.270, acc=0.697, loss=59.139, backward_time=0.752, grad_norm=83.394, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.228e-05, train_time=1.999
+[gpua003:0/64] 2023-07-06 23:39:49,275 (trainer:732) INFO: 20epoch:train:4701-4800batch: iter_time=1.190e-04, forward_time=0.107, loss_ctc=71.120, loss_att=52.173, acc=0.708, loss=57.857, backward_time=0.750, grad_norm=96.519, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.225e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 23:41:28,953 (trainer:732) INFO: 20epoch:train:4801-4900batch: iter_time=1.090e-04, forward_time=0.108, loss_ctc=81.570, loss_att=58.452, acc=0.689, loss=65.387, backward_time=0.750, grad_norm=95.091, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.223e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 23:43:09,201 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpua003:0/64] 2023-07-06 23:43:28,281 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 23:43:31,799 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8a4f675e10>)
+[gpua003:0/64] 2023-07-06 23:43:31,799 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpua003:0/64] 2023-07-06 23:43:31,806 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 23:46:22,528 (trainer:732) INFO: 20epoch:train:4901-5000batch: iter_time=1.282, forward_time=0.108, loss_ctc=79.944, loss_att=59.404, acc=0.708, loss=65.566, backward_time=0.755, grad_norm=106.026, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.221e-05, train_time=5.871
+[gpua003:0/64] 2023-07-06 23:48:04,124 (trainer:732) INFO: 20epoch:train:5001-5100batch: iter_time=1.074e-04, forward_time=0.107, loss_ctc=73.293, loss_att=52.693, acc=0.711, loss=58.873, backward_time=0.757, grad_norm=83.888, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.219e-05, train_time=2.032
+[gpua003:0/64] 2023-07-06 23:49:44,402 (trainer:732) INFO: 20epoch:train:5101-5200batch: iter_time=1.044e-04, forward_time=0.106, loss_ctc=67.920, loss_att=49.443, acc=0.700, loss=54.986, backward_time=0.751, grad_norm=92.376, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.217e-05, train_time=2.005
+[gpua003:0/64] 2023-07-06 23:51:24,233 (trainer:732) INFO: 20epoch:train:5201-5300batch: iter_time=1.049e-04, forward_time=0.106, loss_ctc=77.875, loss_att=55.129, acc=0.680, loss=61.953, backward_time=0.751, grad_norm=110.700, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.214e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 23:53:04,201 (trainer:732) INFO: 20epoch:train:5301-5400batch: iter_time=1.111e-04, forward_time=0.107, loss_ctc=66.903, loss_att=50.769, acc=0.700, loss=55.609, backward_time=0.751, grad_norm=80.652, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.212e-05, train_time=1.999
+[gpua003:0/64] 2023-07-06 23:54:50,887 (trainer:732) INFO: 20epoch:train:5401-5500batch: iter_time=1.171e-04, forward_time=0.107, loss_ctc=73.965, loss_att=54.060, acc=0.695, loss=60.031, backward_time=0.764, grad_norm=92.000, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.210e-05, train_time=2.133
+[gpua003:0/64] 2023-07-06 23:56:33,008 (trainer:732) INFO: 20epoch:train:5501-5600batch: iter_time=1.043e-04, forward_time=0.108, loss_ctc=72.161, loss_att=57.079, acc=0.703, loss=61.603, backward_time=0.754, grad_norm=88.705, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.208e-05, train_time=2.042
+[gpua003:0/64] 2023-07-06 23:58:12,856 (trainer:732) INFO: 20epoch:train:5601-5700batch: iter_time=1.017e-04, forward_time=0.109, loss_ctc=76.027, loss_att=53.040, acc=0.686, loss=59.936, backward_time=0.753, grad_norm=101.954, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.205e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 23:59:52,835 (trainer:732) INFO: 20epoch:train:5701-5800batch: iter_time=1.008e-04, forward_time=0.109, loss_ctc=83.943, loss_att=63.565, acc=0.695, loss=69.678, backward_time=0.753, grad_norm=152.716, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.114, optim0_lr0=8.203e-05, train_time=1.999
+[gpua003:0/64] 2023-07-07 00:00:26,092 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpua003:0/64] 2023-07-07 00:00:45,216 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 00:00:48,757 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8a4ea11d20>)
+[gpua003:0/64] 2023-07-07 00:00:48,757 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpua003:0/64] 2023-07-07 00:00:48,764 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 00:05:13,212 (trainer:732) INFO: 20epoch:train:5801-5900batch: iter_time=1.287, forward_time=0.108, loss_ctc=69.493, loss_att=50.272, acc=0.713, loss=56.039, backward_time=0.767, grad_norm=90.186, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.201e-05, train_time=6.407
+[gpua003:0/64] 2023-07-07 00:06:53,937 (trainer:732) INFO: 20epoch:train:5901-6000batch: iter_time=9.985e-05, forward_time=0.107, loss_ctc=67.619, loss_att=48.654, acc=0.710, loss=54.344, backward_time=0.755, grad_norm=80.945, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.199e-05, train_time=2.014
+[gpua003:0/64] 2023-07-07 00:08:34,265 (trainer:732) INFO: 20epoch:train:6001-6100batch: iter_time=9.513e-05, forward_time=0.107, loss_ctc=72.368, loss_att=52.436, acc=0.699, loss=58.415, backward_time=0.754, grad_norm=99.544, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.197e-05, train_time=2.006
+[gpua003:0/64] 2023-07-07 00:10:14,094 (trainer:732) INFO: 20epoch:train:6101-6200batch: iter_time=1.063e-04, forward_time=0.107, loss_ctc=72.573, loss_att=53.094, acc=0.693, loss=58.938, backward_time=0.752, grad_norm=96.528, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.194e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 00:11:54,093 (trainer:732) INFO: 20epoch:train:6201-6300batch: iter_time=9.448e-05, forward_time=0.108, loss_ctc=67.484, loss_att=50.808, acc=0.704, loss=55.811, backward_time=0.753, grad_norm=108.855, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.192e-05, train_time=2.000
+[gpua003:0/64] 2023-07-07 00:13:34,349 (trainer:732) INFO: 20epoch:train:6301-6400batch: iter_time=1.004e-04, forward_time=0.107, loss_ctc=72.464, loss_att=56.785, acc=0.702, loss=61.489, backward_time=0.753, grad_norm=87.129, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.190e-05, train_time=2.005
+[gpua003:0/64] 2023-07-07 00:15:14,312 (trainer:732) INFO: 20epoch:train:6401-6500batch: iter_time=9.419e-05, forward_time=0.108, loss_ctc=71.280, loss_att=50.161, acc=0.709, loss=56.497, backward_time=0.754, grad_norm=98.013, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.188e-05, train_time=1.999
+[gpua003:0/64] 2023-07-07 00:16:54,147 (trainer:732) INFO: 20epoch:train:6501-6600batch: iter_time=1.010e-04, forward_time=0.107, loss_ctc=90.171, loss_att=63.851, acc=0.685, loss=71.747, backward_time=0.753, grad_norm=108.463, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.186e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 00:18:00,433 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpua003:0/64] 2023-07-07 00:18:19,726 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 00:18:23,276 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88d4c9c6d0>)
+[gpua003:0/64] 2023-07-07 00:18:23,276 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpua003:0/64] 2023-07-07 00:18:23,282 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 00:22:45,396 (trainer:732) INFO: 20epoch:train:6601-6700batch: iter_time=1.296, forward_time=0.108, loss_ctc=74.059, loss_att=56.550, acc=0.707, loss=61.803, backward_time=0.761, grad_norm=91.757, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.183e-05, train_time=7.025
+[gpua003:0/64] 2023-07-07 00:24:26,019 (trainer:732) INFO: 20epoch:train:6701-6800batch: iter_time=1.007e-04, forward_time=0.108, loss_ctc=69.347, loss_att=50.947, acc=0.699, loss=56.467, backward_time=0.755, grad_norm=89.501, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.181e-05, train_time=2.012
+[gpua003:0/64] 2023-07-07 00:26:06,137 (trainer:732) INFO: 20epoch:train:6801-6900batch: iter_time=1.227e-04, forward_time=0.107, loss_ctc=70.173, loss_att=49.974, acc=0.698, loss=56.034, backward_time=0.751, grad_norm=91.982, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.179e-05, train_time=2.002
+[gpua003:0/64] 2023-07-07 00:27:45,899 (trainer:732) INFO: 20epoch:train:6901-7000batch: iter_time=1.212e-04, forward_time=0.108, loss_ctc=71.799, loss_att=52.784, acc=0.684, loss=58.488, backward_time=0.752, grad_norm=105.648, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.177e-05, train_time=1.995
+[gpua003:0/64] 2023-07-07 00:29:25,719 (trainer:732) INFO: 20epoch:train:7001-7100batch: iter_time=1.125e-04, forward_time=0.107, loss_ctc=69.357, loss_att=53.933, acc=0.692, loss=58.560, backward_time=0.752, grad_norm=90.499, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.175e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 00:31:05,462 (trainer:732) INFO: 20epoch:train:7101-7200batch: iter_time=1.086e-04, forward_time=0.108, loss_ctc=68.020, loss_att=53.832, acc=0.699, loss=58.088, backward_time=0.752, grad_norm=108.903, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.173e-05, train_time=1.995
+[gpua003:0/64] 2023-07-07 00:32:45,164 (trainer:732) INFO: 20epoch:train:7201-7300batch: iter_time=1.177e-04, forward_time=0.108, loss_ctc=71.562, loss_att=52.820, acc=0.698, loss=58.443, backward_time=0.753, grad_norm=92.492, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.170e-05, train_time=1.994
+[gpua003:0/64] 2023-07-07 00:34:25,792 (trainer:732) INFO: 20epoch:train:7301-7400batch: iter_time=1.149e-04, forward_time=0.108, loss_ctc=84.374, loss_att=58.985, acc=0.686, loss=66.602, backward_time=0.752, grad_norm=103.222, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.168e-05, train_time=2.012
+[gpua003:0/64] 2023-07-07 00:36:05,793 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpua003:0/64] 2023-07-07 00:36:24,910 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 00:36:28,439 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f933d9419c0>)
+[gpua003:0/64] 2023-07-07 00:36:28,439 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpua003:0/64] 2023-07-07 00:36:28,446 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 00:39:39,875 (trainer:732) INFO: 20epoch:train:7401-7500batch: iter_time=1.300, forward_time=0.108, loss_ctc=82.541, loss_att=62.936, acc=0.691, loss=68.817, backward_time=0.758, grad_norm=115.515, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.166e-05, train_time=6.281
+[gpua003:0/64] 2023-07-07 00:41:23,429 (trainer:732) INFO: 20epoch:train:7501-7600batch: iter_time=9.637e-05, forward_time=0.109, loss_ctc=69.225, loss_att=48.998, acc=0.720, loss=55.066, backward_time=0.763, grad_norm=96.997, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.164e-05, train_time=2.071
+[gpua003:0/64] 2023-07-07 00:43:03,727 (trainer:732) INFO: 20epoch:train:7601-7700batch: iter_time=1.074e-04, forward_time=0.107, loss_ctc=74.248, loss_att=54.779, acc=0.704, loss=60.620, backward_time=0.752, grad_norm=92.841, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.162e-05, train_time=2.006
+[gpua003:0/64] 2023-07-07 00:44:44,287 (trainer:732) INFO: 20epoch:train:7701-7800batch: iter_time=1.079e-04, forward_time=0.107, loss_ctc=73.724, loss_att=51.407, acc=0.683, loss=58.102, backward_time=0.753, grad_norm=97.431, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.159e-05, train_time=2.011
+[gpua003:0/64] 2023-07-07 00:46:24,185 (trainer:732) INFO: 20epoch:train:7801-7900batch: iter_time=9.605e-05, forward_time=0.108, loss_ctc=69.983, loss_att=56.359, acc=0.700, loss=60.446, backward_time=0.753, grad_norm=78.300, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.157e-05, train_time=1.998
+[gpua003:0/64] 2023-07-07 00:48:04,263 (trainer:732) INFO: 20epoch:train:7901-8000batch: iter_time=9.780e-05, forward_time=0.109, loss_ctc=71.676, loss_att=53.364, acc=0.698, loss=58.857, backward_time=0.754, grad_norm=93.587, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.155e-05, train_time=2.001
+[gpua003:0/64] 2023-07-07 00:49:45,962 (trainer:732) INFO: 20epoch:train:8001-8100batch: iter_time=9.342e-05, forward_time=0.108, loss_ctc=67.152, loss_att=50.735, acc=0.707, loss=55.660, backward_time=0.754, grad_norm=96.944, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.153e-05, train_time=2.034
+[gpua003:0/64] 2023-07-07 00:51:26,537 (trainer:732) INFO: 20epoch:train:8101-8200batch: iter_time=9.864e-05, forward_time=0.108, loss_ctc=81.176, loss_att=58.498, acc=0.696, loss=65.302, backward_time=0.753, grad_norm=108.921, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.151e-05, train_time=2.011
+[gpua003:0/64] 2023-07-07 00:53:07,235 (trainer:732) INFO: 20epoch:train:8201-8300batch: iter_time=1.006e-04, forward_time=0.108, loss_ctc=79.556, loss_att=61.957, acc=0.691, loss=67.237, backward_time=0.752, grad_norm=109.467, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.149e-05, train_time=2.014
+[gpua003:0/64] 2023-07-07 00:53:41,518 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpua003:0/64] 2023-07-07 00:54:00,863 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 00:54:04,680 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f933d957ac0>)
+[gpua003:0/64] 2023-07-07 00:54:04,680 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpua003:0/64] 2023-07-07 00:54:04,686 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 00:58:25,770 (trainer:732) INFO: 20epoch:train:8301-8400batch: iter_time=1.273, forward_time=0.108, loss_ctc=78.310, loss_att=60.461, acc=0.698, loss=65.816, backward_time=0.767, grad_norm=117.559, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.146e-05, train_time=6.370
+[gpua003:0/64] 2023-07-07 01:00:07,312 (trainer:732) INFO: 20epoch:train:8401-8500batch: iter_time=1.152e-04, forward_time=0.108, loss_ctc=67.962, loss_att=49.896, acc=0.703, loss=55.316, backward_time=0.754, grad_norm=90.877, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.114, optim0_lr0=8.144e-05, train_time=2.031
+[gpua003:0/64] 2023-07-07 01:01:48,019 (trainer:732) INFO: 20epoch:train:8501-8600batch: iter_time=1.031e-04, forward_time=0.110, loss_ctc=68.584, loss_att=49.176, acc=0.698, loss=54.998, backward_time=0.755, grad_norm=93.992, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.142e-05, train_time=2.014
+[gpua003:0/64] 2023-07-07 01:03:36,281 (trainer:732) INFO: 20epoch:train:8601-8700batch: iter_time=9.241e-05, forward_time=0.108, loss_ctc=75.435, loss_att=53.797, acc=0.684, loss=60.288, backward_time=0.759, grad_norm=99.257, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.140e-05, train_time=2.165
+[gpua003:0/64] 2023-07-07 01:05:22,596 (trainer:732) INFO: 20epoch:train:8701-8800batch: iter_time=1.007e-04, forward_time=0.108, loss_ctc=69.540, loss_att=53.032, acc=0.705, loss=57.984, backward_time=0.759, grad_norm=88.332, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.138e-05, train_time=2.126
+[gpua003:0/64] 2023-07-07 01:07:02,503 (trainer:732) INFO: 20epoch:train:8801-8900batch: iter_time=1.033e-04, forward_time=0.108, loss_ctc=71.565, loss_att=57.189, acc=0.691, loss=61.502, backward_time=0.751, grad_norm=90.049, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.136e-05, train_time=1.998
+[gpua003:0/64] 2023-07-07 01:08:47,374 (trainer:732) INFO: 20epoch:train:8901-9000batch: iter_time=9.529e-05, forward_time=0.108, loss_ctc=74.747, loss_att=50.418, acc=0.695, loss=57.717, backward_time=0.757, grad_norm=98.679, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.134e-05, train_time=2.097
+[gpua003:0/64] 2023-07-07 01:10:27,736 (trainer:732) INFO: 20epoch:train:9001-9100batch: iter_time=9.856e-05, forward_time=0.107, loss_ctc=83.548, loss_att=63.248, acc=0.685, loss=69.338, backward_time=0.751, grad_norm=111.462, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.131e-05, train_time=2.007
+[gpua003:0/64] 2023-07-07 01:11:37,029 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpua003:0/64] 2023-07-07 01:11:56,346 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 01:12:00,135 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88772833d0>)
+[gpua003:0/64] 2023-07-07 01:12:00,135 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpua003:0/64] 2023-07-07 01:12:00,141 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 01:16:34,776 (trainer:732) INFO: 20epoch:train:9101-9200batch: iter_time=1.308, forward_time=0.108, loss_ctc=72.185, loss_att=56.405, acc=0.695, loss=61.139, backward_time=0.773, grad_norm=115.543, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.129e-05, train_time=7.341
+[gpua003:0/64] 2023-07-07 01:18:15,925 (trainer:732) INFO: 20epoch:train:9201-9300batch: iter_time=9.792e-05, forward_time=0.106, loss_ctc=71.745, loss_att=52.813, acc=0.704, loss=58.492, backward_time=0.754, grad_norm=89.778, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.127e-05, train_time=2.023
+[gpua003:0/64] 2023-07-07 01:19:59,197 (trainer:732) INFO: 20epoch:train:9301-9400batch: iter_time=9.737e-05, forward_time=0.107, loss_ctc=69.353, loss_att=48.395, acc=0.713, loss=54.682, backward_time=0.753, grad_norm=88.881, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.125e-05, train_time=2.065
+[gpua003:0/64] 2023-07-07 01:21:40,857 (trainer:732) INFO: 20epoch:train:9401-9500batch: iter_time=1.001e-04, forward_time=0.107, loss_ctc=72.030, loss_att=51.469, acc=0.684, loss=57.637, backward_time=0.753, grad_norm=97.051, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.123e-05, train_time=2.033
+[gpua003:0/64] 2023-07-07 01:23:21,684 (trainer:732) INFO: 20epoch:train:9501-9600batch: iter_time=9.299e-05, forward_time=0.107, loss_ctc=71.994, loss_att=56.267, acc=0.690, loss=60.985, backward_time=0.753, grad_norm=91.397, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.121e-05, train_time=2.016
+[gpua003:0/64] 2023-07-07 01:25:01,632 (trainer:732) INFO: 20epoch:train:9601-9700batch: iter_time=1.032e-04, forward_time=0.108, loss_ctc=70.203, loss_att=53.311, acc=0.696, loss=58.379, backward_time=0.753, grad_norm=104.136, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.118e-05, train_time=1.999
+[gpua003:0/64] 2023-07-07 01:26:41,518 (trainer:732) INFO: 20epoch:train:9701-9800batch: iter_time=8.494e-05, forward_time=0.107, loss_ctc=70.139, loss_att=51.677, acc=0.703, loss=57.216, backward_time=0.753, grad_norm=87.042, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.116e-05, train_time=1.998
+[gpua003:0/64] 2023-07-07 01:28:24,627 (trainer:732) INFO: 20epoch:train:9801-9900batch: iter_time=9.340e-05, forward_time=0.107, loss_ctc=82.680, loss_att=58.302, acc=0.679, loss=65.616, backward_time=0.763, grad_norm=95.875, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.114e-05, train_time=2.062
+[gpua003:0/64] 2023-07-07 01:30:09,616 (trainer:732) INFO: 20epoch:train:9901-10000batch: iter_time=9.389e-05, forward_time=0.107, loss_ctc=77.669, loss_att=57.909, acc=0.701, loss=63.837, backward_time=0.757, grad_norm=107.681, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.112e-05, train_time=2.100
+[gpua003:0/64] 2023-07-07 01:42:21,427 (trainer:338) INFO: 20epoch results: [train] iter_time=0.196, forward_time=0.108, loss_ctc=73.961, loss_att=54.526, acc=0.696, loss=60.357, backward_time=0.755, grad_norm=96.055, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.221e-05, train_time=2.596, time=3 hours, 36 minutes and 32.76 seconds, total_count=170000, gpu_max_cached_mem_GB=37.779, [valid] loss_ctc=49.387, cer_ctc=0.283, loss_att=42.558, acc=0.643, cer=0.424, wer=0.999, loss=44.607, time=5 minutes and 50.92 seconds, total_count=17710, gpu_max_cached_mem_GB=37.779, [att_plot] time=6 minutes and 11.77 seconds, total_count=0, gpu_max_cached_mem_GB=37.779
+[gpua003:0/64] 2023-07-07 01:42:37,043 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpua003:0/64] 2023-07-07 01:42:37,073 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.acc": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.acc.ave_5best.till20epoch.pth
+[gpua003:0/64] 2023-07-07 01:43:32,066 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.total_count": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.total_count.ave_5best.till20epoch.pth
+[gpua003:0/64] 2023-07-07 01:43:57,407 (trainer:272) INFO: 21/100epoch started. Estimated time to finish: 1 week, 5 days and 21 hours
+[gpua003:0/64] 2023-07-07 01:43:58,913 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua003:0/64] 2023-07-07 01:44:18,582 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 01:44:24,346 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f95210cae60>)
+[gpua003:0/64] 2023-07-07 01:44:24,346 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpua003:0/64] 2023-07-07 01:44:24,417 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 01:50:53,031 (trainer:732) INFO: 21epoch:train:1-100batch: iter_time=3.069, forward_time=0.137, loss_ctc=75.425, loss_att=60.964, acc=0.696, loss=65.302, backward_time=0.770, grad_norm=105.263, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.117, optim0_lr0=8.110e-05, train_time=8.297
+[gpua003:0/64] 2023-07-07 01:52:34,428 (trainer:732) INFO: 21epoch:train:101-200batch: iter_time=1.033e-04, forward_time=0.109, loss_ctc=66.860, loss_att=53.856, acc=0.695, loss=57.757, backward_time=0.754, grad_norm=87.309, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.108e-05, train_time=2.028
+[gpua003:0/64] 2023-07-07 01:54:16,169 (trainer:732) INFO: 21epoch:train:201-300batch: iter_time=1.017e-04, forward_time=0.108, loss_ctc=68.585, loss_att=49.706, acc=0.718, loss=55.369, backward_time=0.751, grad_norm=89.606, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.106e-05, train_time=2.035
+[gpua003:0/64] 2023-07-07 01:55:56,901 (trainer:732) INFO: 21epoch:train:301-400batch: iter_time=9.549e-05, forward_time=0.109, loss_ctc=83.868, loss_att=59.563, acc=0.677, loss=66.854, backward_time=0.751, grad_norm=118.007, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.104e-05, train_time=2.014
+[gpua003:0/64] 2023-07-07 01:57:38,518 (trainer:732) INFO: 21epoch:train:401-500batch: iter_time=8.947e-05, forward_time=0.108, loss_ctc=75.494, loss_att=55.952, acc=0.710, loss=61.815, backward_time=0.753, grad_norm=96.012, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.101e-05, train_time=2.032
+[gpua003:0/64] 2023-07-07 01:59:25,089 (trainer:732) INFO: 21epoch:train:501-600batch: iter_time=8.360e-05, forward_time=0.107, loss_ctc=77.597, loss_att=58.124, acc=0.678, loss=63.966, backward_time=0.760, grad_norm=95.102, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.099e-05, train_time=2.131
+[gpua003:0/64] 2023-07-07 02:01:14,358 (trainer:732) INFO: 21epoch:train:601-700batch: iter_time=8.784e-05, forward_time=0.108, loss_ctc=79.901, loss_att=58.953, acc=0.688, loss=65.237, backward_time=0.766, grad_norm=109.677, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.097e-05, train_time=2.185
+[gpua003:0/64] 2023-07-07 02:03:06,286 (trainer:732) INFO: 21epoch:train:701-800batch: iter_time=8.960e-05, forward_time=0.108, loss_ctc=76.140, loss_att=52.512, acc=0.702, loss=59.600, backward_time=0.764, grad_norm=108.446, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.095e-05, train_time=2.238
+[gpua003:0/64] 2023-07-07 02:03:45,470 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpua003:0/64] 2023-07-07 02:04:04,788 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 02:04:08,666 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f85a7512ad0>)
+[gpua003:0/64] 2023-07-07 02:04:08,666 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpua003:0/64] 2023-07-07 02:04:08,672 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 02:08:05,629 (trainer:732) INFO: 21epoch:train:801-900batch: iter_time=1.390, forward_time=0.152, loss_ctc=78.626, loss_att=63.039, acc=0.675, loss=67.715, backward_time=0.773, grad_norm=107.084, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.115, optim0_lr0=8.093e-05, train_time=5.987
+[gpua003:0/64] 2023-07-07 02:09:45,991 (trainer:732) INFO: 21epoch:train:901-1000batch: iter_time=9.686e-05, forward_time=0.109, loss_ctc=65.698, loss_att=52.821, acc=0.694, loss=56.684, backward_time=0.754, grad_norm=87.102, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.091e-05, train_time=2.007
+[gpua003:0/64] 2023-07-07 02:11:25,991 (trainer:732) INFO: 21epoch:train:1001-1100batch: iter_time=1.035e-04, forward_time=0.108, loss_ctc=70.095, loss_att=51.641, acc=0.713, loss=57.177, backward_time=0.751, grad_norm=85.639, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.089e-05, train_time=2.000
+[gpua003:0/64] 2023-07-07 02:13:05,844 (trainer:732) INFO: 21epoch:train:1101-1200batch: iter_time=1.030e-04, forward_time=0.108, loss_ctc=71.874, loss_att=52.739, acc=0.696, loss=58.480, backward_time=0.752, grad_norm=119.352, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.087e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 02:14:45,543 (trainer:732) INFO: 21epoch:train:1201-1300batch: iter_time=9.500e-05, forward_time=0.108, loss_ctc=79.532, loss_att=61.570, acc=0.679, loss=66.959, backward_time=0.750, grad_norm=83.751, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.114, optim0_lr0=8.084e-05, train_time=1.994
+[gpua003:0/64] 2023-07-07 02:16:25,354 (trainer:732) INFO: 21epoch:train:1301-1400batch: iter_time=9.425e-05, forward_time=0.108, loss_ctc=70.489, loss_att=51.580, acc=0.691, loss=57.252, backward_time=0.751, grad_norm=89.038, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.082e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 02:18:05,983 (trainer:732) INFO: 21epoch:train:1401-1500batch: iter_time=9.711e-05, forward_time=0.109, loss_ctc=79.242, loss_att=59.276, acc=0.681, loss=65.266, backward_time=0.754, grad_norm=99.915, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.080e-05, train_time=2.012
+[gpua003:0/64] 2023-07-07 02:19:45,694 (trainer:732) INFO: 21epoch:train:1501-1600batch: iter_time=1.032e-04, forward_time=0.107, loss_ctc=86.448, loss_att=59.634, acc=0.687, loss=67.678, backward_time=0.752, grad_norm=119.579, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.078e-05, train_time=1.994
+[gpua003:0/64] 2023-07-07 02:21:03,476 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpua003:0/64] 2023-07-07 02:21:22,929 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 02:21:26,897 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9520c35d20>)
+[gpua003:0/64] 2023-07-07 02:21:26,897 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpua003:0/64] 2023-07-07 02:21:26,904 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 02:26:35,736 (trainer:732) INFO: 21epoch:train:1601-1700batch: iter_time=3.035, forward_time=0.137, loss_ctc=74.922, loss_att=57.634, acc=0.681, loss=62.820, backward_time=0.764, grad_norm=103.929, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.115, optim0_lr0=8.076e-05, train_time=8.200
+[gpua003:0/64] 2023-07-07 02:28:16,442 (trainer:732) INFO: 21epoch:train:1701-1800batch: iter_time=9.394e-05, forward_time=0.110, loss_ctc=67.692, loss_att=51.540, acc=0.720, loss=56.386, backward_time=0.754, grad_norm=84.127, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.074e-05, train_time=2.015
+[gpua003:0/64] 2023-07-07 02:29:56,377 (trainer:732) INFO: 21epoch:train:1801-1900batch: iter_time=9.295e-05, forward_time=0.109, loss_ctc=67.942, loss_att=54.566, acc=0.708, loss=58.578, backward_time=0.753, grad_norm=95.960, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.072e-05, train_time=1.998
+[gpua003:0/64] 2023-07-07 02:31:36,002 (trainer:732) INFO: 21epoch:train:1901-2000batch: iter_time=8.542e-05, forward_time=0.108, loss_ctc=62.560, loss_att=45.274, acc=0.709, loss=50.460, backward_time=0.750, grad_norm=78.689, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.070e-05, train_time=1.992
+[gpua003:0/64] 2023-07-07 02:33:15,755 (trainer:732) INFO: 21epoch:train:2001-2100batch: iter_time=8.779e-05, forward_time=0.108, loss_ctc=83.888, loss_att=62.880, acc=0.695, loss=69.182, backward_time=0.752, grad_norm=111.465, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.068e-05, train_time=1.995
+[gpua003:0/64] 2023-07-07 02:34:55,601 (trainer:732) INFO: 21epoch:train:2101-2200batch: iter_time=9.295e-05, forward_time=0.108, loss_ctc=71.779, loss_att=53.269, acc=0.708, loss=58.822, backward_time=0.753, grad_norm=78.300, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.066e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 02:36:35,519 (trainer:732) INFO: 21epoch:train:2201-2300batch: iter_time=8.719e-05, forward_time=0.109, loss_ctc=77.907, loss_att=57.920, acc=0.681, loss=63.916, backward_time=0.753, grad_norm=94.141, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.063e-05, train_time=1.998
+[gpua003:0/64] 2023-07-07 02:38:15,578 (trainer:732) INFO: 21epoch:train:2301-2400batch: iter_time=1.077e-04, forward_time=0.109, loss_ctc=77.449, loss_att=56.410, acc=0.699, loss=62.722, backward_time=0.751, grad_norm=114.357, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.061e-05, train_time=2.001
+[gpua003:0/64] 2023-07-07 02:40:15,411 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpua003:0/64] 2023-07-07 02:40:34,895 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 02:40:38,734 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f85a7553460>)
+[gpua003:0/64] 2023-07-07 02:40:38,734 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpua003:0/64] 2023-07-07 02:40:38,741 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 02:43:57,355 (trainer:732) INFO: 21epoch:train:2401-2500batch: iter_time=1.755, forward_time=0.116, loss_ctc=73.960, loss_att=51.538, acc=0.693, loss=58.265, backward_time=0.768, grad_norm=101.777, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.059e-05, train_time=6.835
+[gpua003:0/64] 2023-07-07 02:45:38,947 (trainer:732) INFO: 21epoch:train:2501-2600batch: iter_time=1.017e-04, forward_time=0.107, loss_ctc=73.796, loss_att=60.954, acc=0.696, loss=64.807, backward_time=0.760, grad_norm=102.406, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.057e-05, train_time=2.032
+[gpua003:0/64] 2023-07-07 02:47:21,878 (trainer:732) INFO: 21epoch:train:2601-2700batch: iter_time=1.022e-04, forward_time=0.107, loss_ctc=65.924, loss_att=51.785, acc=0.701, loss=56.026, backward_time=0.752, grad_norm=94.190, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.055e-05, train_time=2.058
+[gpua003:0/64] 2023-07-07 02:49:01,878 (trainer:732) INFO: 21epoch:train:2701-2800batch: iter_time=1.006e-04, forward_time=0.107, loss_ctc=68.630, loss_att=49.530, acc=0.716, loss=55.260, backward_time=0.752, grad_norm=96.790, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.053e-05, train_time=2.000
+[gpua003:0/64] 2023-07-07 02:50:43,603 (trainer:732) INFO: 21epoch:train:2801-2900batch: iter_time=1.010e-04, forward_time=0.109, loss_ctc=78.316, loss_att=57.443, acc=0.680, loss=63.705, backward_time=0.757, grad_norm=110.859, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.051e-05, train_time=2.034
+[gpua003:0/64] 2023-07-07 02:52:23,596 (trainer:732) INFO: 21epoch:train:2901-3000batch: iter_time=9.699e-05, forward_time=0.110, loss_ctc=74.531, loss_att=55.316, acc=0.713, loss=61.081, backward_time=0.754, grad_norm=91.694, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.113, optim0_lr0=8.049e-05, train_time=2.000
+[gpua003:0/64] 2023-07-07 02:54:03,277 (trainer:732) INFO: 21epoch:train:3001-3100batch: iter_time=1.094e-04, forward_time=0.108, loss_ctc=72.826, loss_att=52.947, acc=0.686, loss=58.911, backward_time=0.751, grad_norm=96.991, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.047e-05, train_time=1.993
+[gpua003:0/64] 2023-07-07 02:55:45,673 (trainer:732) INFO: 21epoch:train:3101-3200batch: iter_time=1.044e-04, forward_time=0.128, loss_ctc=79.741, loss_att=59.681, acc=0.688, loss=65.699, backward_time=0.753, grad_norm=105.684, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.045e-05, train_time=2.048
+[gpua003:0/64] 2023-07-07 02:57:26,045 (trainer:732) INFO: 21epoch:train:3201-3300batch: iter_time=4.703e-04, forward_time=0.110, loss_ctc=72.892, loss_att=50.604, acc=0.704, loss=57.290, backward_time=0.750, grad_norm=98.215, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.113, optim0_lr0=8.043e-05, train_time=2.002
+[gpua003:0/64] 2023-07-07 02:57:59,564 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpua003:0/64] 2023-07-07 02:58:19,306 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 02:58:23,210 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8875fe7100>)
+[gpua003:0/64] 2023-07-07 02:58:23,210 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpua003:0/64] 2023-07-07 02:58:23,216 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 03:04:26,311 (trainer:732) INFO: 21epoch:train:3301-3400batch: iter_time=1.411, forward_time=0.113, loss_ctc=70.531, loss_att=53.598, acc=0.698, loss=58.678, backward_time=0.765, grad_norm=95.913, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.040e-05, train_time=8.405
+[gpua003:0/64] 2023-07-07 03:06:06,849 (trainer:732) INFO: 21epoch:train:3401-3500batch: iter_time=1.204e-04, forward_time=0.109, loss_ctc=66.577, loss_att=55.002, acc=0.685, loss=58.474, backward_time=0.753, grad_norm=97.139, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.038e-05, train_time=2.011
+[gpua003:0/64] 2023-07-07 03:07:46,882 (trainer:732) INFO: 21epoch:train:3501-3600batch: iter_time=1.055e-04, forward_time=0.108, loss_ctc=68.566, loss_att=48.282, acc=0.717, loss=54.367, backward_time=0.754, grad_norm=84.774, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.036e-05, train_time=2.000
+[gpua003:0/64] 2023-07-07 03:09:26,695 (trainer:732) INFO: 21epoch:train:3601-3700batch: iter_time=1.129e-04, forward_time=0.109, loss_ctc=76.925, loss_att=57.566, acc=0.683, loss=63.374, backward_time=0.753, grad_norm=117.953, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.034e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 03:11:06,310 (trainer:732) INFO: 21epoch:train:3701-3800batch: iter_time=1.180e-04, forward_time=0.107, loss_ctc=70.698, loss_att=53.316, acc=0.704, loss=58.530, backward_time=0.751, grad_norm=94.370, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.032e-05, train_time=1.992
+[gpua003:0/64] 2023-07-07 03:12:46,196 (trainer:732) INFO: 21epoch:train:3801-3900batch: iter_time=1.218e-04, forward_time=0.109, loss_ctc=74.676, loss_att=54.150, acc=0.689, loss=60.307, backward_time=0.752, grad_norm=84.395, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.030e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 03:14:25,952 (trainer:732) INFO: 21epoch:train:3901-4000batch: iter_time=9.952e-05, forward_time=0.107, loss_ctc=79.508, loss_att=60.374, acc=0.682, loss=66.114, backward_time=0.752, grad_norm=109.277, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.028e-05, train_time=1.995
+[gpua003:0/64] 2023-07-07 03:16:05,888 (trainer:732) INFO: 21epoch:train:4001-4100batch: iter_time=8.976e-05, forward_time=0.108, loss_ctc=76.068, loss_att=53.950, acc=0.697, loss=60.586, backward_time=0.754, grad_norm=109.443, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.026e-05, train_time=1.999
+[gpua003:0/64] 2023-07-07 03:17:13,032 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpua003:0/64] 2023-07-07 03:17:32,119 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 03:17:35,672 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f85e9701cf0>)
+[gpua003:0/64] 2023-07-07 03:17:35,672 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpua003:0/64] 2023-07-07 03:17:35,678 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 03:21:24,869 (trainer:732) INFO: 21epoch:train:4101-4200batch: iter_time=2.108, forward_time=0.163, loss_ctc=73.138, loss_att=59.185, acc=0.677, loss=63.371, backward_time=0.766, grad_norm=100.088, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.115, optim0_lr0=8.024e-05, train_time=6.378
+[gpua003:0/64] 2023-07-07 03:23:05,523 (trainer:732) INFO: 21epoch:train:4201-4300batch: iter_time=9.623e-05, forward_time=0.109, loss_ctc=67.172, loss_att=52.075, acc=0.720, loss=56.604, backward_time=0.755, grad_norm=80.071, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.022e-05, train_time=2.014
+[gpua003:0/64] 2023-07-07 03:24:45,805 (trainer:732) INFO: 21epoch:train:4301-4400batch: iter_time=1.166e-04, forward_time=0.111, loss_ctc=66.732, loss_att=52.975, acc=0.711, loss=57.102, backward_time=0.754, grad_norm=109.947, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.020e-05, train_time=2.005
+[gpua003:0/64] 2023-07-07 03:26:25,913 (trainer:732) INFO: 21epoch:train:4401-4500batch: iter_time=1.080e-04, forward_time=0.110, loss_ctc=63.186, loss_att=44.532, acc=0.715, loss=50.128, backward_time=0.755, grad_norm=80.740, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.018e-05, train_time=2.002
+[gpua003:0/64] 2023-07-07 03:28:05,766 (trainer:732) INFO: 21epoch:train:4501-4600batch: iter_time=1.226e-04, forward_time=0.110, loss_ctc=82.770, loss_att=62.550, acc=0.696, loss=68.616, backward_time=0.753, grad_norm=110.536, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.113, optim0_lr0=8.016e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 03:29:48,662 (trainer:732) INFO: 21epoch:train:4601-4700batch: iter_time=1.171e-04, forward_time=0.111, loss_ctc=71.738, loss_att=52.977, acc=0.712, loss=58.605, backward_time=0.757, grad_norm=88.053, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.113, optim0_lr0=8.014e-05, train_time=2.058
+[gpua003:0/64] 2023-07-07 03:31:28,763 (trainer:732) INFO: 21epoch:train:4701-4800batch: iter_time=1.118e-04, forward_time=0.110, loss_ctc=75.673, loss_att=54.196, acc=0.695, loss=60.639, backward_time=0.754, grad_norm=90.929, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.113, optim0_lr0=8.011e-05, train_time=2.002
+[gpua003:0/64] 2023-07-07 03:33:08,783 (trainer:732) INFO: 21epoch:train:4801-4900batch: iter_time=1.194e-04, forward_time=0.110, loss_ctc=76.080, loss_att=56.995, acc=0.697, loss=62.720, backward_time=0.755, grad_norm=119.887, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.009e-05, train_time=2.000
+[gpua003:0/64] 2023-07-07 03:34:49,915 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpua003:0/64] 2023-07-07 03:35:09,166 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 03:35:12,753 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f85e97b62c0>)
+[gpua003:0/64] 2023-07-07 03:35:12,753 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpua003:0/64] 2023-07-07 03:35:12,760 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 03:39:22,173 (trainer:732) INFO: 21epoch:train:4901-5000batch: iter_time=1.327, forward_time=0.110, loss_ctc=72.216, loss_att=51.391, acc=0.693, loss=57.639, backward_time=0.762, grad_norm=97.793, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.007e-05, train_time=7.468
+[gpua003:0/64] 2023-07-07 03:41:06,488 (trainer:732) INFO: 21epoch:train:5001-5100batch: iter_time=1.047e-04, forward_time=0.109, loss_ctc=70.415, loss_att=54.270, acc=0.710, loss=59.113, backward_time=0.761, grad_norm=92.209, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.005e-05, train_time=2.086
+[gpua003:0/64] 2023-07-07 03:42:46,655 (trainer:732) INFO: 21epoch:train:5101-5200batch: iter_time=9.585e-05, forward_time=0.109, loss_ctc=64.440, loss_att=51.288, acc=0.697, loss=55.233, backward_time=0.754, grad_norm=91.829, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.003e-05, train_time=2.003
+[gpua003:0/64] 2023-07-07 03:44:26,435 (trainer:732) INFO: 21epoch:train:5201-5300batch: iter_time=1.122e-04, forward_time=0.108, loss_ctc=66.016, loss_att=47.983, acc=0.711, loss=53.393, backward_time=0.753, grad_norm=82.415, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.001e-05, train_time=1.995
+[gpua003:0/64] 2023-07-07 03:46:06,273 (trainer:732) INFO: 21epoch:train:5301-5400batch: iter_time=1.066e-04, forward_time=0.109, loss_ctc=79.568, loss_att=58.274, acc=0.689, loss=64.662, backward_time=0.752, grad_norm=101.250, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=7.999e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 03:47:45,977 (trainer:732) INFO: 21epoch:train:5401-5500batch: iter_time=9.211e-05, forward_time=0.108, loss_ctc=77.093, loss_att=57.211, acc=0.708, loss=63.176, backward_time=0.752, grad_norm=88.698, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=7.997e-05, train_time=1.994
+[gpua003:0/64] 2023-07-07 03:49:25,739 (trainer:732) INFO: 21epoch:train:5501-5600batch: iter_time=1.027e-04, forward_time=0.108, loss_ctc=70.776, loss_att=51.574, acc=0.689, loss=57.334, backward_time=0.752, grad_norm=92.327, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.113, optim0_lr0=7.995e-05, train_time=1.995
+[gpua003:0/64] 2023-07-07 03:51:05,561 (trainer:732) INFO: 21epoch:train:5601-5700batch: iter_time=9.823e-05, forward_time=0.109, loss_ctc=78.570, loss_att=59.212, acc=0.684, loss=65.020, backward_time=0.752, grad_norm=102.663, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=7.993e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 03:52:45,407 (trainer:732) INFO: 21epoch:train:5701-5800batch: iter_time=1.062e-04, forward_time=0.108, loss_ctc=74.565, loss_att=51.158, acc=0.691, loss=58.180, backward_time=0.753, grad_norm=101.119, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=7.991e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 03:53:20,039 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpua003:0/64] 2023-07-07 03:53:39,181 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 03:53:42,818 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88a3d76c80>)
+[gpua003:0/64] 2023-07-07 03:53:42,818 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpua003:0/64] 2023-07-07 03:53:42,824 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 03:58:59,477 (trainer:732) INFO: 21epoch:train:5801-5900batch: iter_time=1.377, forward_time=0.109, loss_ctc=68.222, loss_att=54.113, acc=0.702, loss=58.346, backward_time=0.768, grad_norm=85.484, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.113, optim0_lr0=7.989e-05, train_time=7.481
+[gpua003:0/64] 2023-07-07 04:00:40,031 (trainer:732) INFO: 21epoch:train:5901-6000batch: iter_time=9.701e-05, forward_time=0.108, loss_ctc=67.129, loss_att=54.573, acc=0.697, loss=58.340, backward_time=0.753, grad_norm=88.099, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.113, optim0_lr0=7.987e-05, train_time=2.011
+[gpua003:0/64] 2023-07-07 04:02:20,207 (trainer:732) INFO: 21epoch:train:6001-6100batch: iter_time=9.362e-05, forward_time=0.109, loss_ctc=68.337, loss_att=48.416, acc=0.728, loss=54.392, backward_time=0.752, grad_norm=86.732, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.985e-05, train_time=2.003
+[gpua003:0/64] 2023-07-07 04:04:01,464 (trainer:732) INFO: 21epoch:train:6101-6200batch: iter_time=9.073e-05, forward_time=0.109, loss_ctc=75.111, loss_att=55.799, acc=0.695, loss=61.593, backward_time=0.761, grad_norm=94.382, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.983e-05, train_time=2.025
+[gpua003:0/64] 2023-07-07 04:05:41,365 (trainer:732) INFO: 21epoch:train:6201-6300batch: iter_time=9.518e-05, forward_time=0.109, loss_ctc=69.408, loss_att=51.348, acc=0.719, loss=56.766, backward_time=0.752, grad_norm=91.702, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.981e-05, train_time=1.998
+[gpua003:0/64] 2023-07-07 04:07:21,149 (trainer:732) INFO: 21epoch:train:6301-6400batch: iter_time=9.310e-05, forward_time=0.109, loss_ctc=72.354, loss_att=53.860, acc=0.695, loss=59.408, backward_time=0.752, grad_norm=82.091, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.979e-05, train_time=1.995
+[gpua003:0/64] 2023-07-07 04:09:01,102 (trainer:732) INFO: 21epoch:train:6401-6500batch: iter_time=9.926e-05, forward_time=0.109, loss_ctc=78.032, loss_att=54.051, acc=0.697, loss=61.245, backward_time=0.753, grad_norm=110.132, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.977e-05, train_time=1.999
+[gpua003:0/64] 2023-07-07 04:10:40,790 (trainer:732) INFO: 21epoch:train:6501-6600batch: iter_time=9.807e-05, forward_time=0.107, loss_ctc=78.055, loss_att=56.847, acc=0.705, loss=63.209, backward_time=0.752, grad_norm=115.245, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.975e-05, train_time=1.994
+[gpua003:0/64] 2023-07-07 04:11:49,314 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpua003:0/64] 2023-07-07 04:12:08,429 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 04:12:11,990 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f843c793520>)
+[gpua003:0/64] 2023-07-07 04:12:11,990 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpua003:0/64] 2023-07-07 04:12:11,997 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 04:15:49,294 (trainer:732) INFO: 21epoch:train:6601-6700batch: iter_time=1.324, forward_time=0.109, loss_ctc=74.801, loss_att=60.444, acc=0.679, loss=64.751, backward_time=0.766, grad_norm=105.116, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.973e-05, train_time=6.170
+[gpua003:0/64] 2023-07-07 04:17:31,076 (trainer:732) INFO: 21epoch:train:6701-6800batch: iter_time=9.790e-05, forward_time=0.111, loss_ctc=67.289, loss_att=50.540, acc=0.720, loss=55.565, backward_time=0.757, grad_norm=81.960, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.971e-05, train_time=2.035
+[gpua003:0/64] 2023-07-07 04:19:10,966 (trainer:732) INFO: 21epoch:train:6801-6900batch: iter_time=1.034e-04, forward_time=0.108, loss_ctc=67.753, loss_att=53.586, acc=0.710, loss=57.836, backward_time=0.751, grad_norm=85.772, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.969e-05, train_time=1.998
+[gpua003:0/64] 2023-07-07 04:20:51,192 (trainer:732) INFO: 21epoch:train:6901-7000batch: iter_time=1.021e-04, forward_time=0.108, loss_ctc=62.269, loss_att=43.393, acc=0.718, loss=49.056, backward_time=0.752, grad_norm=94.628, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.967e-05, train_time=2.004
+[gpua003:0/64] 2023-07-07 04:22:31,278 (trainer:732) INFO: 21epoch:train:7001-7100batch: iter_time=1.125e-04, forward_time=0.109, loss_ctc=82.553, loss_att=62.057, acc=0.701, loss=68.206, backward_time=0.752, grad_norm=96.053, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.965e-05, train_time=2.002
+[gpua003:0/64] 2023-07-07 04:24:11,011 (trainer:732) INFO: 21epoch:train:7101-7200batch: iter_time=9.769e-05, forward_time=0.109, loss_ctc=73.617, loss_att=53.857, acc=0.710, loss=59.785, backward_time=0.752, grad_norm=106.016, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.963e-05, train_time=1.994
+[gpua003:0/64] 2023-07-07 04:25:51,930 (trainer:732) INFO: 21epoch:train:7201-7300batch: iter_time=2.744e-04, forward_time=0.119, loss_ctc=71.583, loss_att=52.769, acc=0.697, loss=58.413, backward_time=0.752, grad_norm=92.085, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.961e-05, train_time=2.018
+[gpua003:0/64] 2023-07-07 04:27:31,763 (trainer:732) INFO: 21epoch:train:7301-7400batch: iter_time=9.744e-05, forward_time=0.109, loss_ctc=78.262, loss_att=56.400, acc=0.700, loss=62.958, backward_time=0.753, grad_norm=108.333, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.959e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 04:29:11,616 (trainer:732) INFO: 21epoch:train:7401-7500batch: iter_time=9.512e-05, forward_time=0.109, loss_ctc=72.795, loss_att=51.382, acc=0.699, loss=57.806, backward_time=0.752, grad_norm=106.912, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.957e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 04:29:19,879 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpua003:0/64] 2023-07-07 04:29:39,014 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 04:29:44,220 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88a3dd37f0>)
+[gpua003:0/64] 2023-07-07 04:29:44,313 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpua003:0/64] 2023-07-07 04:29:44,320 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 04:34:49,745 (trainer:732) INFO: 21epoch:train:7501-7600batch: iter_time=1.925, forward_time=0.158, loss_ctc=69.045, loss_att=53.787, acc=0.717, loss=58.364, backward_time=0.770, grad_norm=91.648, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.116, optim0_lr0=7.955e-05, train_time=6.762
+[gpua003:0/64] 2023-07-07 04:36:30,383 (trainer:732) INFO: 21epoch:train:7601-7700batch: iter_time=9.981e-05, forward_time=0.109, loss_ctc=65.367, loss_att=51.408, acc=0.707, loss=55.596, backward_time=0.753, grad_norm=96.250, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.952e-05, train_time=2.013
+[gpua003:0/64] 2023-07-07 04:38:11,636 (trainer:732) INFO: 21epoch:train:7701-7800batch: iter_time=1.030e-04, forward_time=0.110, loss_ctc=65.290, loss_att=46.653, acc=0.718, loss=52.244, backward_time=0.752, grad_norm=97.699, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.950e-05, train_time=2.025
+[gpua003:0/64] 2023-07-07 04:39:52,657 (trainer:732) INFO: 21epoch:train:7801-7900batch: iter_time=9.248e-05, forward_time=0.116, loss_ctc=79.587, loss_att=57.577, acc=0.697, loss=64.180, backward_time=0.752, grad_norm=90.219, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.948e-05, train_time=2.020
+[gpua003:0/64] 2023-07-07 04:41:35,649 (trainer:732) INFO: 21epoch:train:7901-8000batch: iter_time=6.759e-04, forward_time=0.130, loss_ctc=75.540, loss_att=56.684, acc=0.718, loss=62.341, backward_time=0.758, grad_norm=87.590, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.946e-05, train_time=2.060
+[gpua003:0/64] 2023-07-07 04:43:22,167 (trainer:732) INFO: 21epoch:train:8001-8100batch: iter_time=1.011e-04, forward_time=0.149, loss_ctc=71.159, loss_att=52.801, acc=0.693, loss=58.309, backward_time=0.770, grad_norm=152.348, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.116, optim0_lr0=7.944e-05, train_time=2.130
+[gpua003:0/64] 2023-07-07 04:45:05,978 (trainer:732) INFO: 21epoch:train:8101-8200batch: iter_time=9.418e-05, forward_time=0.140, loss_ctc=76.922, loss_att=55.656, acc=0.697, loss=62.036, backward_time=0.756, grad_norm=115.315, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.114, optim0_lr0=7.942e-05, train_time=2.076
+[gpua003:0/64] 2023-07-07 04:46:48,173 (trainer:732) INFO: 21epoch:train:8201-8300batch: iter_time=1.139e-04, forward_time=0.112, loss_ctc=71.045, loss_att=50.537, acc=0.700, loss=56.690, backward_time=0.751, grad_norm=97.377, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.940e-05, train_time=2.044
+[gpua003:0/64] 2023-07-07 04:47:40,909 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpua003:0/64] 2023-07-07 04:48:00,341 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 04:48:04,000 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8471d437c0>)
+[gpua003:0/64] 2023-07-07 04:48:04,001 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpua003:0/64] 2023-07-07 04:48:04,007 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 04:53:34,303 (trainer:732) INFO: 21epoch:train:8301-8400batch: iter_time=2.928, forward_time=0.156, loss_ctc=74.000, loss_att=62.198, acc=0.690, loss=65.739, backward_time=0.775, grad_norm=93.122, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.938e-05, train_time=8.122
+[gpua003:0/64] 2023-07-07 04:55:17,196 (trainer:732) INFO: 21epoch:train:8401-8500batch: iter_time=1.028e-04, forward_time=0.111, loss_ctc=65.302, loss_att=52.812, acc=0.700, loss=56.559, backward_time=0.756, grad_norm=78.595, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.936e-05, train_time=2.058
+[gpua003:0/64] 2023-07-07 04:56:57,424 (trainer:732) INFO: 21epoch:train:8501-8600batch: iter_time=9.658e-05, forward_time=0.108, loss_ctc=68.684, loss_att=51.115, acc=0.721, loss=56.385, backward_time=0.750, grad_norm=82.716, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.934e-05, train_time=2.004
+[gpua003:0/64] 2023-07-07 04:58:38,276 (trainer:732) INFO: 21epoch:train:8601-8700batch: iter_time=1.075e-04, forward_time=0.108, loss_ctc=69.325, loss_att=51.087, acc=0.707, loss=56.559, backward_time=0.752, grad_norm=99.216, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.932e-05, train_time=2.017
+[gpua003:0/64] 2023-07-07 05:00:18,316 (trainer:732) INFO: 21epoch:train:8701-8800batch: iter_time=9.697e-05, forward_time=0.109, loss_ctc=73.945, loss_att=57.455, acc=0.694, loss=62.402, backward_time=0.753, grad_norm=89.756, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.930e-05, train_time=2.001
+[gpua003:0/64] 2023-07-07 05:01:58,070 (trainer:732) INFO: 21epoch:train:8801-8900batch: iter_time=1.013e-04, forward_time=0.109, loss_ctc=70.106, loss_att=52.451, acc=0.702, loss=57.747, backward_time=0.753, grad_norm=85.297, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.114, optim0_lr0=7.928e-05, train_time=1.995
+[gpua003:0/64] 2023-07-07 05:03:37,866 (trainer:732) INFO: 21epoch:train:8901-9000batch: iter_time=9.608e-05, forward_time=0.108, loss_ctc=76.088, loss_att=58.045, acc=0.688, loss=63.458, backward_time=0.752, grad_norm=106.493, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.926e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 05:05:17,660 (trainer:732) INFO: 21epoch:train:9001-9100batch: iter_time=9.909e-05, forward_time=0.108, loss_ctc=80.044, loss_att=57.867, acc=0.695, loss=64.520, backward_time=0.752, grad_norm=118.907, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.924e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 05:06:43,861 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpua003:0/64] 2023-07-07 05:07:03,332 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 05:07:07,163 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f844ba56680>)
+[gpua003:0/64] 2023-07-07 05:07:07,164 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpua003:0/64] 2023-07-07 05:07:07,170 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 05:11:58,302 (trainer:732) INFO: 21epoch:train:9101-9200batch: iter_time=1.887, forward_time=0.165, loss_ctc=73.392, loss_att=56.440, acc=0.687, loss=61.525, backward_time=0.766, grad_norm=116.633, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.115, optim0_lr0=7.922e-05, train_time=8.012
+[gpua003:0/64] 2023-07-07 05:13:38,749 (trainer:732) INFO: 21epoch:train:9201-9300batch: iter_time=9.300e-05, forward_time=0.107, loss_ctc=67.815, loss_att=50.596, acc=0.716, loss=55.762, backward_time=0.753, grad_norm=79.997, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.920e-05, train_time=2.009
+[gpua003:0/64] 2023-07-07 05:15:19,209 (trainer:732) INFO: 21epoch:train:9301-9400batch: iter_time=1.019e-04, forward_time=0.107, loss_ctc=66.582, loss_att=52.925, acc=0.708, loss=57.022, backward_time=0.752, grad_norm=93.157, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.919e-05, train_time=2.009
+[gpua003:0/64] 2023-07-07 05:16:59,576 (trainer:732) INFO: 21epoch:train:9401-9500batch: iter_time=9.374e-05, forward_time=0.107, loss_ctc=61.813, loss_att=45.211, acc=0.710, loss=50.192, backward_time=0.752, grad_norm=86.096, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.917e-05, train_time=2.007
+[gpua003:0/64] 2023-07-07 05:18:40,211 (trainer:732) INFO: 21epoch:train:9501-9600batch: iter_time=9.173e-05, forward_time=0.107, loss_ctc=81.797, loss_att=61.241, acc=0.697, loss=67.408, backward_time=0.752, grad_norm=95.878, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.111, optim0_lr0=7.915e-05, train_time=2.012
+[gpua003:0/64] 2023-07-07 05:20:20,733 (trainer:732) INFO: 21epoch:train:9601-9700batch: iter_time=9.476e-05, forward_time=0.107, loss_ctc=72.431, loss_att=53.032, acc=0.704, loss=58.852, backward_time=0.752, grad_norm=86.451, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.913e-05, train_time=2.010
+[gpua003:0/64] 2023-07-07 05:22:00,233 (trainer:732) INFO: 21epoch:train:9701-9800batch: iter_time=9.436e-05, forward_time=0.107, loss_ctc=72.639, loss_att=53.426, acc=0.691, loss=59.190, backward_time=0.751, grad_norm=95.931, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.911e-05, train_time=1.990
+[gpua003:0/64] 2023-07-07 05:23:39,817 (trainer:732) INFO: 21epoch:train:9801-9900batch: iter_time=8.872e-05, forward_time=0.108, loss_ctc=76.251, loss_att=59.146, acc=0.691, loss=64.277, backward_time=0.752, grad_norm=114.109, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.909e-05, train_time=1.991
+[gpua003:0/64] 2023-07-07 05:25:19,445 (trainer:732) INFO: 21epoch:train:9901-10000batch: iter_time=9.037e-05, forward_time=0.107, loss_ctc=73.224, loss_att=50.857, acc=0.694, loss=57.567, backward_time=0.752, grad_norm=105.676, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.907e-05, train_time=1.992
+[gpua003:0/64] 2023-07-07 05:38:40,911 (trainer:338) INFO: 21epoch results: [train] iter_time=0.235, forward_time=0.113, loss_ctc=72.893, loss_att=54.497, acc=0.699, loss=60.016, backward_time=0.755, grad_norm=97.715, clip=100.000, loss_scale=2.398e+19, optim_step_time=0.113, optim0_lr0=8.007e-05, train_time=2.656, time=3 hours, 41 minutes and 44.21 seconds, total_count=180000, gpu_max_cached_mem_GB=37.779, [valid] loss_ctc=50.186, cer_ctc=0.290, loss_att=41.004, acc=0.654, cer=0.409, wer=0.994, loss=43.759, time=6 minutes and 51.3 seconds, total_count=18722, gpu_max_cached_mem_GB=37.779, [att_plot] time=6 minutes and 7.7 seconds, total_count=0, gpu_max_cached_mem_GB=37.779
+[gpua003:0/64] 2023-07-07 05:38:59,977 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpua003:0/64] 2023-07-07 05:39:00,026 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/14epoch.pth
+[gpua003:0/64] 2023-07-07 05:39:00,083 (trainer:272) INFO: 22/100epoch started. Estimated time to finish: 1 week, 5 days and 17 hours
+[gpua003:0/64] 2023-07-07 05:39:01,612 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua003:0/64] 2023-07-07 05:39:20,669 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 05:39:24,275 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f89b6dd35e0>)
+[gpua003:0/64] 2023-07-07 05:39:24,275 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpua003:0/64] 2023-07-07 05:39:24,379 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 05:46:09,882 (trainer:732) INFO: 22epoch:train:1-100batch: iter_time=3.232, forward_time=0.130, loss_ctc=75.959, loss_att=57.589, acc=0.700, loss=63.100, backward_time=0.770, grad_norm=94.575, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.114, optim0_lr0=7.905e-05, train_time=8.584
+[gpua003:0/64] 2023-07-07 05:47:51,050 (trainer:732) INFO: 22epoch:train:101-200batch: iter_time=9.855e-05, forward_time=0.109, loss_ctc=68.208, loss_att=52.446, acc=0.682, loss=57.174, backward_time=0.756, grad_norm=97.364, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.114, optim0_lr0=7.903e-05, train_time=2.023
+[gpua003:0/64] 2023-07-07 05:49:30,923 (trainer:732) INFO: 22epoch:train:201-300batch: iter_time=9.754e-05, forward_time=0.110, loss_ctc=76.840, loss_att=59.608, acc=0.719, loss=64.777, backward_time=0.753, grad_norm=95.370, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.114, optim0_lr0=7.901e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 05:51:10,776 (trainer:732) INFO: 22epoch:train:301-400batch: iter_time=9.949e-05, forward_time=0.110, loss_ctc=77.024, loss_att=65.093, acc=0.681, loss=68.673, backward_time=0.752, grad_norm=106.966, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.114, optim0_lr0=7.899e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 05:52:55,638 (trainer:732) INFO: 22epoch:train:401-500batch: iter_time=9.878e-05, forward_time=0.109, loss_ctc=70.205, loss_att=56.739, acc=0.696, loss=60.779, backward_time=0.761, grad_norm=90.649, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.897e-05, train_time=2.097
+[gpua003:0/64] 2023-07-07 05:54:39,214 (trainer:732) INFO: 22epoch:train:501-600batch: iter_time=1.011e-04, forward_time=0.109, loss_ctc=67.033, loss_att=51.629, acc=0.698, loss=56.250, backward_time=0.754, grad_norm=93.014, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.114, optim0_lr0=7.895e-05, train_time=2.071
+[gpua003:0/64] 2023-07-07 05:56:20,364 (trainer:732) INFO: 22epoch:train:601-700batch: iter_time=1.016e-04, forward_time=0.109, loss_ctc=65.903, loss_att=46.926, acc=0.685, loss=52.619, backward_time=0.751, grad_norm=81.828, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.893e-05, train_time=2.023
+[gpua003:0/64] 2023-07-07 05:58:00,763 (trainer:732) INFO: 22epoch:train:701-800batch: iter_time=9.919e-05, forward_time=0.108, loss_ctc=77.598, loss_att=57.244, acc=0.698, loss=63.350, backward_time=0.751, grad_norm=97.156, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.891e-05, train_time=2.008
+[gpua003:0/64] 2023-07-07 05:58:40,682 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpua003:0/64] 2023-07-07 05:58:59,667 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 05:59:03,212 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f89b6dd3dc0>)
+[gpua003:0/64] 2023-07-07 05:59:03,212 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpua003:0/64] 2023-07-07 05:59:03,218 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 06:03:49,036 (trainer:732) INFO: 22epoch:train:801-900batch: iter_time=1.356, forward_time=0.139, loss_ctc=73.151, loss_att=56.666, acc=0.700, loss=61.612, backward_time=0.769, grad_norm=98.640, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.115, optim0_lr0=7.889e-05, train_time=6.965
+[gpua003:0/64] 2023-07-07 06:05:29,630 (trainer:732) INFO: 22epoch:train:901-1000batch: iter_time=9.770e-05, forward_time=0.110, loss_ctc=68.300, loss_att=50.098, acc=0.687, loss=55.559, backward_time=0.754, grad_norm=100.634, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.887e-05, train_time=2.012
+[gpua003:0/64] 2023-07-07 06:07:09,505 (trainer:732) INFO: 22epoch:train:1001-1100batch: iter_time=1.005e-04, forward_time=0.108, loss_ctc=73.251, loss_att=59.729, acc=0.702, loss=63.786, backward_time=0.752, grad_norm=99.591, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.885e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 06:08:52,104 (trainer:732) INFO: 22epoch:train:1101-1200batch: iter_time=1.038e-04, forward_time=0.109, loss_ctc=71.657, loss_att=55.743, acc=0.704, loss=60.517, backward_time=0.765, grad_norm=85.238, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.883e-05, train_time=2.052
+[gpua003:0/64] 2023-07-07 06:10:32,190 (trainer:732) INFO: 22epoch:train:1201-1300batch: iter_time=1.035e-04, forward_time=0.109, loss_ctc=76.079, loss_att=65.762, acc=0.682, loss=68.857, backward_time=0.753, grad_norm=102.836, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.881e-05, train_time=2.001
+[gpua003:0/64] 2023-07-07 06:12:12,065 (trainer:732) INFO: 22epoch:train:1301-1400batch: iter_time=9.626e-05, forward_time=0.109, loss_ctc=69.609, loss_att=55.901, acc=0.688, loss=60.014, backward_time=0.753, grad_norm=115.398, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.879e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 06:13:52,048 (trainer:732) INFO: 22epoch:train:1401-1500batch: iter_time=8.880e-05, forward_time=0.109, loss_ctc=61.548, loss_att=44.239, acc=0.684, loss=49.431, backward_time=0.753, grad_norm=108.769, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.877e-05, train_time=1.999
+[gpua003:0/64] 2023-07-07 06:15:31,817 (trainer:732) INFO: 22epoch:train:1501-1600batch: iter_time=9.085e-05, forward_time=0.109, loss_ctc=73.610, loss_att=51.494, acc=0.698, loss=58.129, backward_time=0.753, grad_norm=95.261, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.875e-05, train_time=1.995
+[gpua003:0/64] 2023-07-07 06:16:54,392 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpua003:0/64] 2023-07-07 06:17:13,867 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 06:17:17,499 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8898db79a0>)
+[gpua003:0/64] 2023-07-07 06:17:17,499 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpua003:0/64] 2023-07-07 06:17:17,506 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 06:21:41,543 (trainer:732) INFO: 22epoch:train:1601-1700batch: iter_time=2.544, forward_time=0.118, loss_ctc=71.770, loss_att=56.335, acc=0.704, loss=60.965, backward_time=0.763, grad_norm=88.321, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.873e-05, train_time=7.394
+[gpua003:0/64] 2023-07-07 06:23:24,648 (trainer:732) INFO: 22epoch:train:1701-1800batch: iter_time=8.832e-05, forward_time=0.108, loss_ctc=72.678, loss_att=52.151, acc=0.698, loss=58.309, backward_time=0.758, grad_norm=95.685, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.871e-05, train_time=2.062
+[gpua003:0/64] 2023-07-07 06:25:06,463 (trainer:732) INFO: 22epoch:train:1801-1900batch: iter_time=9.287e-05, forward_time=0.122, loss_ctc=66.852, loss_att=52.819, acc=0.716, loss=57.029, backward_time=0.755, grad_norm=93.289, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.115, optim0_lr0=7.869e-05, train_time=2.036
+[gpua003:0/64] 2023-07-07 06:26:47,146 (trainer:732) INFO: 22epoch:train:1901-2000batch: iter_time=1.001e-04, forward_time=0.114, loss_ctc=77.777, loss_att=64.279, acc=0.708, loss=68.329, backward_time=0.754, grad_norm=107.242, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.115, optim0_lr0=7.867e-05, train_time=2.013
+[gpua003:0/64] 2023-07-07 06:28:27,163 (trainer:732) INFO: 22epoch:train:2001-2100batch: iter_time=9.088e-05, forward_time=0.109, loss_ctc=75.746, loss_att=62.285, acc=0.691, loss=66.323, backward_time=0.754, grad_norm=93.071, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.865e-05, train_time=2.000
+[gpua003:0/64] 2023-07-07 06:30:09,221 (trainer:732) INFO: 22epoch:train:2101-2200batch: iter_time=8.882e-05, forward_time=0.123, loss_ctc=70.706, loss_att=58.251, acc=0.693, loss=61.987, backward_time=0.758, grad_norm=99.862, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.863e-05, train_time=2.041
+[gpua003:0/64] 2023-07-07 06:31:49,013 (trainer:732) INFO: 22epoch:train:2201-2300batch: iter_time=8.759e-05, forward_time=0.108, loss_ctc=56.761, loss_att=45.017, acc=0.696, loss=48.540, backward_time=0.752, grad_norm=80.159, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.862e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 06:33:28,852 (trainer:732) INFO: 22epoch:train:2301-2400batch: iter_time=9.062e-05, forward_time=0.108, loss_ctc=69.849, loss_att=49.995, acc=0.703, loss=55.951, backward_time=0.751, grad_norm=112.615, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.860e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 06:35:09,707 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpua003:0/64] 2023-07-07 06:35:28,850 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 06:35:32,446 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88a3de2800>)
+[gpua003:0/64] 2023-07-07 06:35:32,446 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpua003:0/64] 2023-07-07 06:35:32,453 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 06:39:06,065 (trainer:732) INFO: 22epoch:train:2401-2500batch: iter_time=1.292, forward_time=0.109, loss_ctc=71.798, loss_att=50.317, acc=0.717, loss=56.761, backward_time=0.759, grad_norm=95.019, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.858e-05, train_time=6.744
+[gpua003:0/64] 2023-07-07 06:40:51,328 (trainer:732) INFO: 22epoch:train:2501-2600batch: iter_time=1.031e-04, forward_time=0.109, loss_ctc=75.859, loss_att=56.028, acc=0.710, loss=61.978, backward_time=0.760, grad_norm=95.952, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.856e-05, train_time=2.105
+[gpua003:0/64] 2023-07-07 06:42:31,354 (trainer:732) INFO: 22epoch:train:2601-2700batch: iter_time=1.082e-04, forward_time=0.108, loss_ctc=66.166, loss_att=50.393, acc=0.695, loss=55.125, backward_time=0.751, grad_norm=86.983, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.854e-05, train_time=2.000
+[gpua003:0/64] 2023-07-07 06:44:10,962 (trainer:732) INFO: 22epoch:train:2701-2800batch: iter_time=1.081e-04, forward_time=0.108, loss_ctc=77.042, loss_att=58.748, acc=0.721, loss=64.236, backward_time=0.750, grad_norm=99.652, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.852e-05, train_time=1.992
+[gpua003:0/64] 2023-07-07 06:45:50,837 (trainer:732) INFO: 22epoch:train:2801-2900batch: iter_time=1.088e-04, forward_time=0.109, loss_ctc=73.184, loss_att=61.853, acc=0.694, loss=65.253, backward_time=0.752, grad_norm=101.544, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.850e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 06:47:30,553 (trainer:732) INFO: 22epoch:train:2901-3000batch: iter_time=1.154e-04, forward_time=0.109, loss_ctc=70.952, loss_att=57.515, acc=0.703, loss=61.546, backward_time=0.751, grad_norm=97.436, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.848e-05, train_time=1.994
+[gpua003:0/64] 2023-07-07 06:49:10,499 (trainer:732) INFO: 22epoch:train:3001-3100batch: iter_time=1.189e-04, forward_time=0.109, loss_ctc=62.039, loss_att=47.304, acc=0.712, loss=51.724, backward_time=0.753, grad_norm=90.810, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.846e-05, train_time=1.999
+[gpua003:0/64] 2023-07-07 06:50:50,138 (trainer:732) INFO: 22epoch:train:3101-3200batch: iter_time=1.267e-04, forward_time=0.109, loss_ctc=66.663, loss_att=46.048, acc=0.693, loss=52.232, backward_time=0.750, grad_norm=86.756, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.844e-05, train_time=1.993
+[gpua003:0/64] 2023-07-07 06:52:32,525 (trainer:732) INFO: 22epoch:train:3201-3300batch: iter_time=1.227e-04, forward_time=0.110, loss_ctc=72.678, loss_att=54.618, acc=0.711, loss=60.036, backward_time=0.757, grad_norm=104.682, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.842e-05, train_time=2.048
+[gpua003:0/64] 2023-07-07 06:53:06,741 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpua003:0/64] 2023-07-07 06:53:26,004 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 06:53:29,609 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f845d473ee0>)
+[gpua003:0/64] 2023-07-07 06:53:29,609 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpua003:0/64] 2023-07-07 06:53:30,022 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 06:57:58,983 (trainer:732) INFO: 22epoch:train:3301-3400batch: iter_time=1.348, forward_time=0.145, loss_ctc=72.094, loss_att=56.366, acc=0.703, loss=61.085, backward_time=0.769, grad_norm=89.951, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.114, optim0_lr0=7.840e-05, train_time=6.529
+[gpua003:0/64] 2023-07-07 06:59:39,298 (trainer:732) INFO: 22epoch:train:3401-3500batch: iter_time=1.038e-04, forward_time=0.110, loss_ctc=66.447, loss_att=49.854, acc=0.686, loss=54.832, backward_time=0.753, grad_norm=93.159, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.112, optim0_lr0=7.838e-05, train_time=2.006
+[gpua003:0/64] 2023-07-07 07:01:19,264 (trainer:732) INFO: 22epoch:train:3501-3600batch: iter_time=9.217e-05, forward_time=0.109, loss_ctc=75.507, loss_att=61.068, acc=0.704, loss=65.400, backward_time=0.751, grad_norm=96.972, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.112, optim0_lr0=7.836e-05, train_time=1.999
+[gpua003:0/64] 2023-07-07 07:02:59,198 (trainer:732) INFO: 22epoch:train:3601-3700batch: iter_time=1.088e-04, forward_time=0.109, loss_ctc=70.999, loss_att=57.684, acc=0.705, loss=61.678, backward_time=0.752, grad_norm=85.253, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.112, optim0_lr0=7.834e-05, train_time=1.998
+[gpua003:0/64] 2023-07-07 07:04:38,930 (trainer:732) INFO: 22epoch:train:3701-3800batch: iter_time=1.136e-04, forward_time=0.109, loss_ctc=72.958, loss_att=61.007, acc=0.688, loss=64.592, backward_time=0.750, grad_norm=110.731, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.112, optim0_lr0=7.833e-05, train_time=1.994
+[gpua003:0/64] 2023-07-07 07:06:18,662 (trainer:732) INFO: 22epoch:train:3801-3900batch: iter_time=1.150e-04, forward_time=0.109, loss_ctc=69.973, loss_att=57.379, acc=0.688, loss=61.157, backward_time=0.751, grad_norm=104.773, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.831e-05, train_time=1.994
+[gpua003:0/64] 2023-07-07 07:07:58,470 (trainer:732) INFO: 22epoch:train:3901-4000batch: iter_time=1.153e-04, forward_time=0.109, loss_ctc=58.578, loss_att=42.394, acc=0.697, loss=47.249, backward_time=0.752, grad_norm=78.336, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.112, optim0_lr0=7.829e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 07:09:38,304 (trainer:732) INFO: 22epoch:train:4001-4100batch: iter_time=9.816e-05, forward_time=0.108, loss_ctc=72.183, loss_att=51.473, acc=0.699, loss=57.686, backward_time=0.753, grad_norm=90.776, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.827e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 07:10:44,632 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpua003:0/64] 2023-07-07 07:11:04,150 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 07:11:07,729 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8420e69cc0>)
+[gpua003:0/64] 2023-07-07 07:11:07,729 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpua003:0/64] 2023-07-07 07:11:07,735 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 07:14:13,080 (trainer:732) INFO: 22epoch:train:4101-4200batch: iter_time=1.318, forward_time=0.108, loss_ctc=71.941, loss_att=54.810, acc=0.713, loss=59.949, backward_time=0.764, grad_norm=86.300, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.825e-05, train_time=5.495
+[gpua003:0/64] 2023-07-07 07:15:54,008 (trainer:732) INFO: 22epoch:train:4201-4300batch: iter_time=1.036e-04, forward_time=0.109, loss_ctc=70.358, loss_att=50.448, acc=0.689, loss=56.421, backward_time=0.755, grad_norm=99.875, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.823e-05, train_time=2.018
+[gpua003:0/64] 2023-07-07 07:17:35,127 (trainer:732) INFO: 22epoch:train:4301-4400batch: iter_time=1.074e-04, forward_time=0.108, loss_ctc=70.078, loss_att=57.577, acc=0.697, loss=61.327, backward_time=0.751, grad_norm=93.447, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.821e-05, train_time=2.022
+[gpua003:0/64] 2023-07-07 07:19:17,039 (trainer:732) INFO: 22epoch:train:4401-4500batch: iter_time=8.866e-05, forward_time=0.108, loss_ctc=73.720, loss_att=60.764, acc=0.703, loss=64.650, backward_time=0.759, grad_norm=98.011, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.819e-05, train_time=2.038
+[gpua003:0/64] 2023-07-07 07:20:57,799 (trainer:732) INFO: 22epoch:train:4501-4600batch: iter_time=9.315e-05, forward_time=0.110, loss_ctc=74.002, loss_att=60.934, acc=0.685, loss=64.855, backward_time=0.754, grad_norm=99.614, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.817e-05, train_time=2.015
+[gpua003:0/64] 2023-07-07 07:22:39,059 (trainer:732) INFO: 22epoch:train:4601-4700batch: iter_time=1.059e-04, forward_time=0.109, loss_ctc=69.200, loss_att=56.517, acc=0.691, loss=60.322, backward_time=0.752, grad_norm=101.222, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.815e-05, train_time=2.025
+[gpua003:0/64] 2023-07-07 07:24:20,772 (trainer:732) INFO: 22epoch:train:4701-4800batch: iter_time=1.095e-04, forward_time=0.109, loss_ctc=61.538, loss_att=45.833, acc=0.689, loss=50.545, backward_time=0.754, grad_norm=95.781, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.813e-05, train_time=2.034
+[gpua003:0/64] 2023-07-07 07:26:02,965 (trainer:732) INFO: 22epoch:train:4801-4900batch: iter_time=1.033e-04, forward_time=0.109, loss_ctc=69.082, loss_att=51.514, acc=0.696, loss=56.785, backward_time=0.752, grad_norm=95.683, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.811e-05, train_time=2.044
+[gpua003:0/64] 2023-07-07 07:27:49,767 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpua003:0/64] 2023-07-07 07:28:09,330 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 07:28:12,926 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f848c6b0550>)
+[gpua003:0/64] 2023-07-07 07:28:12,926 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpua003:0/64] 2023-07-07 07:28:12,933 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 07:31:57,955 (trainer:732) INFO: 22epoch:train:4901-5000batch: iter_time=2.503, forward_time=0.127, loss_ctc=69.914, loss_att=52.926, acc=0.705, loss=58.022, backward_time=0.762, grad_norm=90.567, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.810e-05, train_time=7.100
+[gpua003:0/64] 2023-07-07 07:33:40,006 (trainer:732) INFO: 22epoch:train:5001-5100batch: iter_time=1.018e-04, forward_time=0.109, loss_ctc=75.894, loss_att=56.058, acc=0.706, loss=62.009, backward_time=0.760, grad_norm=95.605, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.808e-05, train_time=2.041
+[gpua003:0/64] 2023-07-07 07:35:20,731 (trainer:732) INFO: 22epoch:train:5101-5200batch: iter_time=1.041e-04, forward_time=0.108, loss_ctc=65.260, loss_att=50.518, acc=0.693, loss=54.941, backward_time=0.754, grad_norm=95.597, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.806e-05, train_time=2.014
+[gpua003:0/64] 2023-07-07 07:37:03,337 (trainer:732) INFO: 22epoch:train:5201-5300batch: iter_time=9.542e-05, forward_time=0.109, loss_ctc=75.495, loss_att=58.850, acc=0.712, loss=63.844, backward_time=0.755, grad_norm=96.146, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.804e-05, train_time=2.052
+[gpua003:0/64] 2023-07-07 07:38:52,102 (trainer:732) INFO: 22epoch:train:5301-5400batch: iter_time=9.699e-05, forward_time=0.109, loss_ctc=72.248, loss_att=62.576, acc=0.684, loss=65.477, backward_time=0.759, grad_norm=109.912, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.802e-05, train_time=2.175
+[gpua003:0/64] 2023-07-07 07:40:47,676 (trainer:732) INFO: 22epoch:train:5401-5500batch: iter_time=9.981e-05, forward_time=0.108, loss_ctc=69.974, loss_att=56.541, acc=0.690, loss=60.571, backward_time=0.802, grad_norm=96.604, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.800e-05, train_time=2.311
+[gpua003:0/64] 2023-07-07 07:42:29,427 (trainer:732) INFO: 22epoch:train:5501-5600batch: iter_time=1.005e-04, forward_time=0.108, loss_ctc=62.758, loss_att=47.686, acc=0.702, loss=52.208, backward_time=0.762, grad_norm=88.346, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.798e-05, train_time=2.035
+[gpua003:0/64] 2023-07-07 07:44:09,193 (trainer:732) INFO: 22epoch:train:5601-5700batch: iter_time=1.001e-04, forward_time=0.109, loss_ctc=65.100, loss_att=45.891, acc=0.696, loss=51.654, backward_time=0.751, grad_norm=81.829, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.796e-05, train_time=1.995
+[gpua003:0/64] 2023-07-07 07:45:53,335 (trainer:732) INFO: 22epoch:train:5701-5800batch: iter_time=9.999e-05, forward_time=0.108, loss_ctc=74.308, loss_att=57.186, acc=0.695, loss=62.323, backward_time=0.767, grad_norm=150.463, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.794e-05, train_time=2.083
+[gpua003:0/64] 2023-07-07 07:46:33,767 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpua003:0/64] 2023-07-07 07:46:52,851 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 07:46:56,470 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8934109cf0>)
+[gpua003:0/64] 2023-07-07 07:46:56,470 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpua003:0/64] 2023-07-07 07:46:56,477 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 07:50:29,089 (trainer:732) INFO: 22epoch:train:5801-5900batch: iter_time=1.584, forward_time=0.131, loss_ctc=74.207, loss_att=54.322, acc=0.717, loss=60.288, backward_time=0.764, grad_norm=91.726, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.792e-05, train_time=5.515
+[gpua003:0/64] 2023-07-07 07:52:09,734 (trainer:732) INFO: 22epoch:train:5901-6000batch: iter_time=9.734e-05, forward_time=0.110, loss_ctc=65.678, loss_att=47.794, acc=0.691, loss=53.160, backward_time=0.752, grad_norm=83.021, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.791e-05, train_time=2.013
+[gpua003:0/64] 2023-07-07 07:53:50,393 (trainer:732) INFO: 22epoch:train:6001-6100batch: iter_time=9.193e-05, forward_time=0.112, loss_ctc=74.747, loss_att=60.495, acc=0.721, loss=64.771, backward_time=0.755, grad_norm=90.014, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.789e-05, train_time=2.013
+[gpua003:0/64] 2023-07-07 07:55:31,643 (trainer:732) INFO: 22epoch:train:6101-6200batch: iter_time=9.239e-05, forward_time=0.119, loss_ctc=73.282, loss_att=60.349, acc=0.698, loss=64.229, backward_time=0.756, grad_norm=88.930, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.787e-05, train_time=2.025
+[gpua003:0/64] 2023-07-07 07:57:11,271 (trainer:732) INFO: 22epoch:train:6201-6300batch: iter_time=9.293e-05, forward_time=0.108, loss_ctc=67.412, loss_att=54.664, acc=0.707, loss=58.488, backward_time=0.751, grad_norm=95.356, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.785e-05, train_time=1.992
+[gpua003:0/64] 2023-07-07 07:58:51,149 (trainer:732) INFO: 22epoch:train:6301-6400batch: iter_time=9.666e-05, forward_time=0.109, loss_ctc=66.473, loss_att=51.695, acc=0.714, loss=56.129, backward_time=0.751, grad_norm=103.423, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.783e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 08:00:31,053 (trainer:732) INFO: 22epoch:train:6401-6500batch: iter_time=9.940e-05, forward_time=0.110, loss_ctc=61.632, loss_att=44.108, acc=0.695, loss=49.365, backward_time=0.752, grad_norm=86.270, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.781e-05, train_time=1.998
+[gpua003:0/64] 2023-07-07 08:02:10,665 (trainer:732) INFO: 22epoch:train:6501-6600batch: iter_time=1.027e-04, forward_time=0.109, loss_ctc=72.139, loss_att=51.933, acc=0.712, loss=57.995, backward_time=0.750, grad_norm=96.771, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.779e-05, train_time=1.992
+[gpua003:0/64] 2023-07-07 08:03:18,465 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpua003:0/64] 2023-07-07 08:03:37,874 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 08:03:41,509 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f84a27b68f0>)
+[gpua003:0/64] 2023-07-07 08:03:41,509 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpua003:0/64] 2023-07-07 08:03:41,515 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 08:07:00,984 (trainer:732) INFO: 22epoch:train:6601-6700batch: iter_time=1.390, forward_time=0.109, loss_ctc=70.332, loss_att=55.203, acc=0.706, loss=59.742, backward_time=0.765, grad_norm=97.451, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.777e-05, train_time=5.806
+[gpua003:0/64] 2023-07-07 08:08:45,065 (trainer:732) INFO: 22epoch:train:6701-6800batch: iter_time=9.209e-05, forward_time=0.108, loss_ctc=72.039, loss_att=50.836, acc=0.705, loss=57.197, backward_time=0.762, grad_norm=96.664, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.776e-05, train_time=2.081
+[gpua003:0/64] 2023-07-07 08:10:27,886 (trainer:732) INFO: 22epoch:train:6801-6900batch: iter_time=9.907e-05, forward_time=0.109, loss_ctc=66.153, loss_att=54.046, acc=0.709, loss=57.678, backward_time=0.754, grad_norm=91.951, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.774e-05, train_time=2.056
+[gpua003:0/64] 2023-07-07 08:12:07,728 (trainer:732) INFO: 22epoch:train:6901-7000batch: iter_time=1.036e-04, forward_time=0.109, loss_ctc=76.287, loss_att=59.045, acc=0.713, loss=64.218, backward_time=0.752, grad_norm=110.971, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.772e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 08:13:52,415 (trainer:732) INFO: 22epoch:train:7001-7100batch: iter_time=9.646e-05, forward_time=0.110, loss_ctc=72.777, loss_att=62.041, acc=0.682, loss=65.261, backward_time=0.770, grad_norm=98.939, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.770e-05, train_time=2.094
+[gpua003:0/64] 2023-07-07 08:15:37,878 (trainer:732) INFO: 22epoch:train:7101-7200batch: iter_time=9.875e-05, forward_time=0.109, loss_ctc=69.184, loss_att=56.850, acc=0.686, loss=60.550, backward_time=0.767, grad_norm=104.102, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.768e-05, train_time=2.109
+[gpua003:0/64] 2023-07-07 08:17:18,027 (trainer:732) INFO: 22epoch:train:7201-7300batch: iter_time=9.466e-05, forward_time=0.108, loss_ctc=56.723, loss_att=44.561, acc=0.694, loss=48.210, backward_time=0.751, grad_norm=82.134, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.766e-05, train_time=2.003
+[gpua003:0/64] 2023-07-07 08:19:11,959 (trainer:732) INFO: 22epoch:train:7301-7400batch: iter_time=9.554e-05, forward_time=0.109, loss_ctc=68.916, loss_att=48.970, acc=0.708, loss=54.954, backward_time=0.805, grad_norm=145.408, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.764e-05, train_time=2.278
+[gpua003:0/64] 2023-07-07 08:20:53,519 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpua003:0/64] 2023-07-07 08:21:12,743 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 08:21:16,313 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f944af90460>)
+[gpua003:0/64] 2023-07-07 08:21:16,313 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpua003:0/64] 2023-07-07 08:21:16,320 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 08:23:45,003 (trainer:732) INFO: 22epoch:train:7401-7500batch: iter_time=1.329, forward_time=0.136, loss_ctc=70.359, loss_att=50.850, acc=0.717, loss=56.703, backward_time=0.762, grad_norm=84.034, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.116, optim0_lr0=7.762e-05, train_time=5.461
+[gpua003:0/64] 2023-07-07 08:25:27,825 (trainer:732) INFO: 22epoch:train:7501-7600batch: iter_time=8.708e-05, forward_time=0.110, loss_ctc=76.765, loss_att=54.087, acc=0.711, loss=60.891, backward_time=0.763, grad_norm=98.850, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.761e-05, train_time=2.056
+[gpua003:0/64] 2023-07-07 08:27:08,334 (trainer:732) INFO: 22epoch:train:7601-7700batch: iter_time=1.042e-04, forward_time=0.109, loss_ctc=64.296, loss_att=50.883, acc=0.706, loss=54.907, backward_time=0.754, grad_norm=87.185, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.759e-05, train_time=2.010
+[gpua003:0/64] 2023-07-07 08:28:48,255 (trainer:732) INFO: 22epoch:train:7701-7800batch: iter_time=8.846e-05, forward_time=0.108, loss_ctc=74.565, loss_att=60.399, acc=0.719, loss=64.649, backward_time=0.752, grad_norm=95.857, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.757e-05, train_time=1.998
+[gpua003:0/64] 2023-07-07 08:30:28,045 (trainer:732) INFO: 22epoch:train:7801-7900batch: iter_time=1.037e-04, forward_time=0.109, loss_ctc=73.972, loss_att=62.893, acc=0.689, loss=66.217, backward_time=0.752, grad_norm=87.398, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.755e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 08:32:07,809 (trainer:732) INFO: 22epoch:train:7901-8000batch: iter_time=1.089e-04, forward_time=0.110, loss_ctc=70.642, loss_att=57.664, acc=0.693, loss=61.557, backward_time=0.752, grad_norm=92.959, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.753e-05, train_time=1.995
+[gpua003:0/64] 2023-07-07 08:33:47,492 (trainer:732) INFO: 22epoch:train:8001-8100batch: iter_time=1.063e-04, forward_time=0.109, loss_ctc=55.497, loss_att=44.132, acc=0.708, loss=47.541, backward_time=0.752, grad_norm=80.785, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.751e-05, train_time=1.993
+[gpua003:0/64] 2023-07-07 08:35:27,353 (trainer:732) INFO: 22epoch:train:8101-8200batch: iter_time=1.061e-04, forward_time=0.111, loss_ctc=67.689, loss_att=46.225, acc=0.704, loss=52.664, backward_time=0.752, grad_norm=103.286, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.749e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 08:37:07,180 (trainer:732) INFO: 22epoch:train:8201-8300batch: iter_time=1.080e-04, forward_time=0.111, loss_ctc=72.512, loss_att=53.632, acc=0.717, loss=59.296, backward_time=0.752, grad_norm=82.921, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.747e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 08:37:49,047 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpua003:0/64] 2023-07-07 08:38:08,430 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 08:38:12,343 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f944af91030>)
+[gpua003:0/64] 2023-07-07 08:38:12,343 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpua003:0/64] 2023-07-07 08:38:12,349 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 08:43:42,647 (trainer:732) INFO: 22epoch:train:8301-8400batch: iter_time=2.865, forward_time=0.127, loss_ctc=76.998, loss_att=56.248, acc=0.715, loss=62.473, backward_time=0.764, grad_norm=91.921, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.746e-05, train_time=7.909
+[gpua003:0/64] 2023-07-07 08:45:23,857 (trainer:732) INFO: 22epoch:train:8401-8500batch: iter_time=9.852e-05, forward_time=0.109, loss_ctc=64.243, loss_att=47.232, acc=0.693, loss=52.335, backward_time=0.754, grad_norm=77.637, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.744e-05, train_time=2.024
+[gpua003:0/64] 2023-07-07 08:47:04,339 (trainer:732) INFO: 22epoch:train:8501-8600batch: iter_time=1.040e-04, forward_time=0.109, loss_ctc=74.869, loss_att=61.475, acc=0.714, loss=65.493, backward_time=0.753, grad_norm=93.438, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.112, optim0_lr0=7.742e-05, train_time=2.009
+[gpua003:0/64] 2023-07-07 08:48:43,954 (trainer:732) INFO: 22epoch:train:8601-8700batch: iter_time=1.034e-04, forward_time=0.108, loss_ctc=73.122, loss_att=61.953, acc=0.688, loss=65.304, backward_time=0.751, grad_norm=96.667, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.112, optim0_lr0=7.740e-05, train_time=1.992
+[gpua003:0/64] 2023-07-07 08:50:26,881 (trainer:732) INFO: 22epoch:train:8701-8800batch: iter_time=1.126e-04, forward_time=0.108, loss_ctc=66.145, loss_att=55.031, acc=0.694, loss=58.365, backward_time=0.754, grad_norm=94.273, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.112, optim0_lr0=7.738e-05, train_time=2.058
+[gpua003:0/64] 2023-07-07 08:52:06,631 (trainer:732) INFO: 22epoch:train:8801-8900batch: iter_time=1.047e-04, forward_time=0.108, loss_ctc=66.295, loss_att=51.963, acc=0.699, loss=56.262, backward_time=0.751, grad_norm=94.275, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.112, optim0_lr0=7.736e-05, train_time=1.995
+[gpua003:0/64] 2023-07-07 08:53:46,418 (trainer:732) INFO: 22epoch:train:8901-9000batch: iter_time=9.829e-05, forward_time=0.108, loss_ctc=61.909, loss_att=43.782, acc=0.697, loss=49.220, backward_time=0.752, grad_norm=82.673, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.734e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 08:55:26,132 (trainer:732) INFO: 22epoch:train:9001-9100batch: iter_time=1.043e-04, forward_time=0.108, loss_ctc=71.915, loss_att=53.753, acc=0.707, loss=59.202, backward_time=0.751, grad_norm=109.132, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.112, optim0_lr0=7.733e-05, train_time=1.994
+[gpua003:0/64] 2023-07-07 08:56:34,455 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpua003:0/64] 2023-07-07 08:56:53,892 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 08:56:57,546 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8593aca140>)
+[gpua003:0/64] 2023-07-07 08:56:57,546 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpua003:0/64] 2023-07-07 08:56:57,553 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 09:00:31,061 (trainer:732) INFO: 22epoch:train:9101-9200batch: iter_time=1.431, forward_time=0.127, loss_ctc=71.039, loss_att=54.915, acc=0.705, loss=59.752, backward_time=0.761, grad_norm=106.663, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.731e-05, train_time=6.098
+[gpua003:0/64] 2023-07-07 09:02:14,178 (trainer:732) INFO: 22epoch:train:9201-9300batch: iter_time=1.042e-04, forward_time=0.122, loss_ctc=72.597, loss_att=52.275, acc=0.703, loss=58.372, backward_time=0.760, grad_norm=105.215, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.115, optim0_lr0=7.729e-05, train_time=2.062
+[gpua003:0/64] 2023-07-07 09:03:55,492 (trainer:732) INFO: 22epoch:train:9301-9400batch: iter_time=1.019e-04, forward_time=0.111, loss_ctc=66.483, loss_att=52.418, acc=0.717, loss=56.638, backward_time=0.755, grad_norm=102.064, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.114, optim0_lr0=7.727e-05, train_time=2.026
+[gpua003:0/64] 2023-07-07 09:05:35,862 (trainer:732) INFO: 22epoch:train:9401-9500batch: iter_time=1.021e-04, forward_time=0.110, loss_ctc=75.311, loss_att=58.987, acc=0.722, loss=63.884, backward_time=0.752, grad_norm=84.201, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.114, optim0_lr0=7.725e-05, train_time=2.007
+[gpua003:0/64] 2023-07-07 09:07:16,074 (trainer:732) INFO: 22epoch:train:9501-9600batch: iter_time=9.991e-05, forward_time=0.111, loss_ctc=71.239, loss_att=61.029, acc=0.693, loss=64.092, backward_time=0.754, grad_norm=112.479, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.723e-05, train_time=2.004
+[gpua003:0/64] 2023-07-07 09:08:55,956 (trainer:732) INFO: 22epoch:train:9601-9700batch: iter_time=1.011e-04, forward_time=0.111, loss_ctc=69.983, loss_att=56.365, acc=0.703, loss=60.450, backward_time=0.753, grad_norm=101.191, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.722e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 09:10:39,207 (trainer:732) INFO: 22epoch:train:9701-9800batch: iter_time=1.137e-04, forward_time=0.129, loss_ctc=55.743, loss_att=43.049, acc=0.703, loss=46.857, backward_time=0.756, grad_norm=90.349, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.124, optim0_lr0=7.720e-05, train_time=2.065
+[gpua003:0/64] 2023-07-07 09:12:19,264 (trainer:732) INFO: 22epoch:train:9801-9900batch: iter_time=9.062e-05, forward_time=0.110, loss_ctc=69.518, loss_att=49.134, acc=0.711, loss=55.249, backward_time=0.753, grad_norm=100.063, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.718e-05, train_time=2.001
+[gpua003:0/64] 2023-07-07 09:14:01,810 (trainer:732) INFO: 22epoch:train:9901-10000batch: iter_time=9.648e-05, forward_time=0.129, loss_ctc=69.398, loss_att=49.229, acc=0.721, loss=55.280, backward_time=0.756, grad_norm=90.245, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.716e-05, train_time=2.051
+[gpua003:0/64] 2023-07-07 09:26:51,892 (trainer:338) INFO: 22epoch results: [train] iter_time=0.222, forward_time=0.112, loss_ctc=70.052, loss_att=54.212, acc=0.701, loss=58.964, backward_time=0.757, grad_norm=96.262, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.809e-05, train_time=2.580, time=3 hours, 35 minutes and 15.27 seconds, total_count=190000, gpu_max_cached_mem_GB=37.779, [valid] loss_ctc=53.056, cer_ctc=0.291, loss_att=42.969, acc=0.658, cer=0.388, wer=0.991, loss=45.995, time=6 minutes and 5.25 seconds, total_count=19734, gpu_max_cached_mem_GB=37.779, [att_plot] time=6 minutes and 31.11 seconds, total_count=0, gpu_max_cached_mem_GB=37.779
+[gpua003:0/64] 2023-07-07 09:27:11,470 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpua003:0/64] 2023-07-07 09:27:11,478 (trainer:272) INFO: 23/100epoch started. Estimated time to finish: 1 week, 5 days and 13 hours
+[gpua003:0/64] 2023-07-07 09:27:12,495 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua003:0/64] 2023-07-07 09:27:32,952 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 09:27:36,862 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f952a1b7850>)
+[gpua003:0/64] 2023-07-07 09:27:36,865 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpua003:0/64] 2023-07-07 09:27:36,955 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 09:34:04,979 (trainer:732) INFO: 23epoch:train:1-100batch: iter_time=3.070, forward_time=0.136, loss_ctc=73.179, loss_att=57.756, acc=0.704, loss=62.383, backward_time=0.766, grad_norm=99.480, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.115, optim0_lr0=7.714e-05, train_time=8.259
+[gpua003:0/64] 2023-07-07 09:35:47,590 (trainer:732) INFO: 23epoch:train:101-200batch: iter_time=9.809e-05, forward_time=0.110, loss_ctc=65.447, loss_att=55.548, acc=0.684, loss=58.518, backward_time=0.757, grad_norm=105.042, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.712e-05, train_time=2.052
+[gpua003:0/64] 2023-07-07 09:37:44,269 (trainer:732) INFO: 23epoch:train:201-300batch: iter_time=2.992e-04, forward_time=0.200, loss_ctc=91.477, loss_att=64.962, acc=0.703, loss=72.917, backward_time=0.765, grad_norm=137.116, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.130, optim0_lr0=7.711e-05, train_time=2.331
+[gpua003:0/64] 2023-07-07 09:39:30,373 (trainer:732) INFO: 23epoch:train:301-400batch: iter_time=2.111e-04, forward_time=0.143, loss_ctc=74.075, loss_att=60.821, acc=0.698, loss=64.797, backward_time=0.764, grad_norm=100.292, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.118, optim0_lr0=7.709e-05, train_time=2.124
+[gpua003:0/64] 2023-07-07 09:41:13,338 (trainer:732) INFO: 23epoch:train:401-500batch: iter_time=9.774e-05, forward_time=0.108, loss_ctc=78.934, loss_att=61.524, acc=0.709, loss=66.747, backward_time=0.755, grad_norm=111.165, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.707e-05, train_time=2.059
+[gpua003:0/64] 2023-07-07 09:42:53,514 (trainer:732) INFO: 23epoch:train:501-600batch: iter_time=9.807e-05, forward_time=0.108, loss_ctc=69.498, loss_att=53.920, acc=0.698, loss=58.594, backward_time=0.752, grad_norm=97.973, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.705e-05, train_time=2.003
+[gpua003:0/64] 2023-07-07 09:44:41,445 (trainer:732) INFO: 23epoch:train:601-700batch: iter_time=8.987e-05, forward_time=0.108, loss_ctc=83.563, loss_att=61.774, acc=0.691, loss=68.311, backward_time=0.762, grad_norm=122.324, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.703e-05, train_time=2.158
+[gpua003:0/64] 2023-07-07 09:46:26,252 (trainer:732) INFO: 23epoch:train:701-800batch: iter_time=9.492e-05, forward_time=0.109, loss_ctc=74.540, loss_att=56.661, acc=0.697, loss=62.025, backward_time=0.755, grad_norm=102.834, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.701e-05, train_time=2.096
+[gpua003:0/64] 2023-07-07 09:47:10,710 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpua003:0/64] 2023-07-07 09:47:30,186 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 09:47:34,091 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f85a2f77df0>)
+[gpua003:0/64] 2023-07-07 09:47:34,145 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpua003:0/64] 2023-07-07 09:47:34,151 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 09:53:24,106 (trainer:732) INFO: 23epoch:train:801-900batch: iter_time=2.968, forward_time=0.137, loss_ctc=71.883, loss_att=53.675, acc=0.701, loss=59.138, backward_time=0.769, grad_norm=96.823, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.700e-05, train_time=8.356
+[gpua003:0/64] 2023-07-07 09:55:05,103 (trainer:732) INFO: 23epoch:train:901-1000batch: iter_time=1.047e-04, forward_time=0.110, loss_ctc=64.340, loss_att=51.496, acc=0.693, loss=55.349, backward_time=0.754, grad_norm=93.049, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.698e-05, train_time=2.020
+[gpua003:0/64] 2023-07-07 09:56:45,333 (trainer:732) INFO: 23epoch:train:1001-1100batch: iter_time=9.897e-05, forward_time=0.109, loss_ctc=82.590, loss_att=64.984, acc=0.698, loss=70.266, backward_time=0.751, grad_norm=115.985, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.696e-05, train_time=2.004
+[gpua003:0/64] 2023-07-07 09:58:25,170 (trainer:732) INFO: 23epoch:train:1101-1200batch: iter_time=9.800e-05, forward_time=0.109, loss_ctc=74.904, loss_att=57.611, acc=0.704, loss=62.799, backward_time=0.753, grad_norm=93.699, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.694e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 10:00:05,128 (trainer:732) INFO: 23epoch:train:1201-1300batch: iter_time=1.040e-04, forward_time=0.110, loss_ctc=77.700, loss_att=63.938, acc=0.709, loss=68.066, backward_time=0.753, grad_norm=120.473, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.114, optim0_lr0=7.692e-05, train_time=1.999
+[gpua003:0/64] 2023-07-07 10:01:45,015 (trainer:732) INFO: 23epoch:train:1301-1400batch: iter_time=1.311e-04, forward_time=0.110, loss_ctc=66.958, loss_att=50.399, acc=0.716, loss=55.367, backward_time=0.753, grad_norm=104.203, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.690e-05, train_time=1.998
+[gpua003:0/64] 2023-07-07 10:03:25,092 (trainer:732) INFO: 23epoch:train:1401-1500batch: iter_time=1.131e-04, forward_time=0.111, loss_ctc=79.489, loss_att=60.622, acc=0.688, loss=66.282, backward_time=0.753, grad_norm=110.078, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.689e-05, train_time=2.001
+[gpua003:0/64] 2023-07-07 10:05:04,751 (trainer:732) INFO: 23epoch:train:1501-1600batch: iter_time=9.681e-05, forward_time=0.109, loss_ctc=69.851, loss_att=56.849, acc=0.699, loss=60.749, backward_time=0.751, grad_norm=108.192, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.687e-05, train_time=1.993
+[gpua003:0/64] 2023-07-07 10:06:13,927 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpua003:0/64] 2023-07-07 10:06:33,050 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 10:06:36,687 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f85a23e6620>)
+[gpua003:0/64] 2023-07-07 10:06:36,687 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpua003:0/64] 2023-07-07 10:06:36,693 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 10:10:24,633 (trainer:732) INFO: 23epoch:train:1601-1700batch: iter_time=1.378, forward_time=0.109, loss_ctc=70.081, loss_att=51.925, acc=0.700, loss=57.372, backward_time=0.765, grad_norm=91.494, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.685e-05, train_time=6.397
+[gpua003:0/64] 2023-07-07 10:12:05,102 (trainer:732) INFO: 23epoch:train:1701-1800batch: iter_time=1.008e-04, forward_time=0.109, loss_ctc=70.081, loss_att=57.940, acc=0.707, loss=61.582, backward_time=0.756, grad_norm=100.076, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.112, optim0_lr0=7.683e-05, train_time=2.009
+[gpua003:0/64] 2023-07-07 10:13:45,172 (trainer:732) INFO: 23epoch:train:1801-1900batch: iter_time=1.097e-04, forward_time=0.108, loss_ctc=72.888, loss_att=55.204, acc=0.697, loss=60.509, backward_time=0.752, grad_norm=114.953, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.112, optim0_lr0=7.681e-05, train_time=2.001
+[gpua003:0/64] 2023-07-07 10:15:25,082 (trainer:732) INFO: 23epoch:train:1901-2000batch: iter_time=9.785e-05, forward_time=0.109, loss_ctc=83.550, loss_att=66.001, acc=0.695, loss=71.265, backward_time=0.753, grad_norm=99.143, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.680e-05, train_time=1.998
+[gpua003:0/64] 2023-07-07 10:17:04,949 (trainer:732) INFO: 23epoch:train:2001-2100batch: iter_time=9.886e-05, forward_time=0.110, loss_ctc=72.138, loss_att=59.700, acc=0.709, loss=63.431, backward_time=0.752, grad_norm=90.082, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.112, optim0_lr0=7.678e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 10:18:44,752 (trainer:732) INFO: 23epoch:train:2101-2200batch: iter_time=9.873e-05, forward_time=0.108, loss_ctc=71.072, loss_att=54.294, acc=0.707, loss=59.327, backward_time=0.752, grad_norm=109.276, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.112, optim0_lr0=7.676e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 10:20:24,385 (trainer:732) INFO: 23epoch:train:2201-2300batch: iter_time=9.689e-05, forward_time=0.107, loss_ctc=78.773, loss_att=58.998, acc=0.697, loss=64.930, backward_time=0.751, grad_norm=108.006, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.112, optim0_lr0=7.674e-05, train_time=1.992
+[gpua003:0/64] 2023-07-07 10:22:04,941 (trainer:732) INFO: 23epoch:train:2301-2400batch: iter_time=9.118e-05, forward_time=0.108, loss_ctc=74.926, loss_att=57.996, acc=0.700, loss=63.075, backward_time=0.752, grad_norm=123.642, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.672e-05, train_time=2.011
+[gpua003:0/64] 2023-07-07 10:23:49,235 (trainer:732) INFO: 23epoch:train:2401-2500batch: iter_time=9.225e-05, forward_time=0.107, loss_ctc=64.708, loss_att=53.380, acc=0.695, loss=56.779, backward_time=0.763, grad_norm=88.049, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.670e-05, train_time=2.086
+[gpua003:0/64] 2023-07-07 10:23:52,704 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpua003:0/64] 2023-07-07 10:24:12,052 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 10:24:15,673 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f85f3d60520>)
+[gpua003:0/64] 2023-07-07 10:24:15,673 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpua003:0/64] 2023-07-07 10:24:15,680 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 10:29:09,460 (trainer:732) INFO: 23epoch:train:2501-2600batch: iter_time=1.283, forward_time=0.109, loss_ctc=73.117, loss_att=58.099, acc=0.706, loss=62.605, backward_time=0.766, grad_norm=90.498, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.112, optim0_lr0=7.669e-05, train_time=6.404
+[gpua003:0/64] 2023-07-07 10:30:49,559 (trainer:732) INFO: 23epoch:train:2601-2700batch: iter_time=8.534e-05, forward_time=0.109, loss_ctc=64.315, loss_att=53.246, acc=0.691, loss=56.567, backward_time=0.752, grad_norm=89.794, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.667e-05, train_time=2.002
+[gpua003:0/64] 2023-07-07 10:32:29,624 (trainer:732) INFO: 23epoch:train:2701-2800batch: iter_time=8.962e-05, forward_time=0.109, loss_ctc=85.891, loss_att=63.051, acc=0.705, loss=69.903, backward_time=0.754, grad_norm=94.837, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.665e-05, train_time=2.001
+[gpua003:0/64] 2023-07-07 10:34:09,982 (trainer:732) INFO: 23epoch:train:2801-2900batch: iter_time=9.237e-05, forward_time=0.109, loss_ctc=74.116, loss_att=59.956, acc=0.701, loss=64.204, backward_time=0.753, grad_norm=89.065, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.663e-05, train_time=2.007
+[gpua003:0/64] 2023-07-07 10:35:50,367 (trainer:732) INFO: 23epoch:train:2901-3000batch: iter_time=8.691e-05, forward_time=0.109, loss_ctc=79.211, loss_att=61.574, acc=0.710, loss=66.865, backward_time=0.753, grad_norm=102.506, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.661e-05, train_time=2.007
+[gpua003:0/64] 2023-07-07 10:37:31,411 (trainer:732) INFO: 23epoch:train:3001-3100batch: iter_time=9.464e-05, forward_time=0.109, loss_ctc=65.890, loss_att=51.091, acc=0.706, loss=55.530, backward_time=0.755, grad_norm=86.526, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.660e-05, train_time=2.021
+[gpua003:0/64] 2023-07-07 10:39:12,463 (trainer:732) INFO: 23epoch:train:3101-3200batch: iter_time=1.047e-04, forward_time=0.109, loss_ctc=78.191, loss_att=58.913, acc=0.696, loss=64.697, backward_time=0.758, grad_norm=107.870, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.658e-05, train_time=2.021
+[gpua003:0/64] 2023-07-07 10:40:55,933 (trainer:732) INFO: 23epoch:train:3201-3300batch: iter_time=1.031e-04, forward_time=0.108, loss_ctc=70.556, loss_att=54.398, acc=0.709, loss=59.245, backward_time=0.758, grad_norm=88.177, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.656e-05, train_time=2.069
+[gpua003:0/64] 2023-07-07 10:41:31,292 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpua003:0/64] 2023-07-07 10:41:50,605 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 10:41:54,287 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f860cc32c20>)
+[gpua003:0/64] 2023-07-07 10:41:54,287 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpua003:0/64] 2023-07-07 10:41:54,293 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 10:45:45,530 (trainer:732) INFO: 23epoch:train:3301-3400batch: iter_time=1.299, forward_time=0.108, loss_ctc=67.705, loss_att=53.054, acc=0.697, loss=57.449, backward_time=0.769, grad_norm=90.741, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.654e-05, train_time=5.792
+[gpua003:0/64] 2023-07-07 10:47:25,708 (trainer:732) INFO: 23epoch:train:3401-3500batch: iter_time=9.948e-05, forward_time=0.108, loss_ctc=69.513, loss_att=54.398, acc=0.710, loss=58.932, backward_time=0.753, grad_norm=90.557, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.112, optim0_lr0=7.653e-05, train_time=2.003
+[gpua003:0/64] 2023-07-07 10:49:05,710 (trainer:732) INFO: 23epoch:train:3501-3600batch: iter_time=9.595e-05, forward_time=0.109, loss_ctc=71.035, loss_att=55.784, acc=0.705, loss=60.359, backward_time=0.754, grad_norm=89.579, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.112, optim0_lr0=7.651e-05, train_time=2.000
+[gpua003:0/64] 2023-07-07 10:51:01,341 (trainer:732) INFO: 23epoch:train:3601-3700batch: iter_time=9.044e-05, forward_time=0.119, loss_ctc=84.230, loss_att=63.766, acc=0.703, loss=69.905, backward_time=0.776, grad_norm=98.539, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.114, optim0_lr0=7.649e-05, train_time=2.312
+[gpua003:0/64] 2023-07-07 10:52:56,875 (trainer:732) INFO: 23epoch:train:3701-3800batch: iter_time=6.396e-04, forward_time=0.131, loss_ctc=73.263, loss_att=59.816, acc=0.713, loss=63.850, backward_time=0.792, grad_norm=100.049, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.115, optim0_lr0=7.647e-05, train_time=2.310
+[gpua003:0/64] 2023-07-07 10:54:36,698 (trainer:732) INFO: 23epoch:train:3801-3900batch: iter_time=8.669e-05, forward_time=0.110, loss_ctc=66.313, loss_att=49.529, acc=0.713, loss=54.564, backward_time=0.753, grad_norm=92.675, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.645e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 10:56:17,417 (trainer:732) INFO: 23epoch:train:3901-4000batch: iter_time=9.704e-05, forward_time=0.112, loss_ctc=77.178, loss_att=56.536, acc=0.708, loss=62.729, backward_time=0.754, grad_norm=103.834, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.644e-05, train_time=2.014
+[gpua003:0/64] 2023-07-07 10:57:57,769 (trainer:732) INFO: 23epoch:train:4001-4100batch: iter_time=9.868e-05, forward_time=0.110, loss_ctc=72.979, loss_att=57.846, acc=0.699, loss=62.386, backward_time=0.753, grad_norm=115.524, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.642e-05, train_time=2.007
+[gpua003:0/64] 2023-07-07 10:59:19,759 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpua003:0/64] 2023-07-07 10:59:39,162 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 10:59:42,862 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f85f3d177c0>)
+[gpua003:0/64] 2023-07-07 10:59:42,862 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpua003:0/64] 2023-07-07 10:59:42,869 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 11:04:06,757 (trainer:732) INFO: 23epoch:train:4101-4200batch: iter_time=2.609, forward_time=0.130, loss_ctc=65.738, loss_att=48.149, acc=0.704, loss=53.426, backward_time=0.768, grad_norm=87.940, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.115, optim0_lr0=7.640e-05, train_time=7.379
+[gpua003:0/64] 2023-07-07 11:05:49,241 (trainer:732) INFO: 23epoch:train:4201-4300batch: iter_time=1.057e-04, forward_time=0.109, loss_ctc=68.915, loss_att=58.433, acc=0.703, loss=61.578, backward_time=0.757, grad_norm=94.575, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.638e-05, train_time=2.050
+[gpua003:0/64] 2023-07-07 11:07:30,444 (trainer:732) INFO: 23epoch:train:4301-4400batch: iter_time=1.040e-04, forward_time=0.111, loss_ctc=71.161, loss_att=54.889, acc=0.690, loss=59.771, backward_time=0.754, grad_norm=100.822, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.114, optim0_lr0=7.636e-05, train_time=2.024
+[gpua003:0/64] 2023-07-07 11:09:10,593 (trainer:732) INFO: 23epoch:train:4401-4500batch: iter_time=1.108e-04, forward_time=0.110, loss_ctc=82.303, loss_att=66.740, acc=0.686, loss=71.409, backward_time=0.755, grad_norm=94.738, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.635e-05, train_time=2.003
+[gpua003:0/64] 2023-07-07 11:10:50,539 (trainer:732) INFO: 23epoch:train:4501-4600batch: iter_time=1.004e-04, forward_time=0.110, loss_ctc=71.919, loss_att=59.048, acc=0.707, loss=62.910, backward_time=0.753, grad_norm=85.604, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.112, optim0_lr0=7.633e-05, train_time=1.999
+[gpua003:0/64] 2023-07-07 11:12:30,296 (trainer:732) INFO: 23epoch:train:4601-4700batch: iter_time=1.109e-04, forward_time=0.110, loss_ctc=70.547, loss_att=55.273, acc=0.703, loss=59.856, backward_time=0.752, grad_norm=92.197, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.631e-05, train_time=1.995
+[gpua003:0/64] 2023-07-07 11:14:10,064 (trainer:732) INFO: 23epoch:train:4701-4800batch: iter_time=9.679e-05, forward_time=0.109, loss_ctc=78.198, loss_att=57.584, acc=0.701, loss=63.768, backward_time=0.754, grad_norm=100.076, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.114, optim0_lr0=7.629e-05, train_time=1.995
+[gpua003:0/64] 2023-07-07 11:15:50,073 (trainer:732) INFO: 23epoch:train:4801-4900batch: iter_time=1.089e-04, forward_time=0.110, loss_ctc=73.676, loss_att=58.368, acc=0.689, loss=62.960, backward_time=0.754, grad_norm=105.898, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.114, optim0_lr0=7.628e-05, train_time=2.000
+[gpua003:0/64] 2023-07-07 11:17:33,946 (trainer:732) INFO: 23epoch:train:4901-5000batch: iter_time=9.428e-05, forward_time=0.109, loss_ctc=64.333, loss_att=54.483, acc=0.690, loss=57.438, backward_time=0.757, grad_norm=94.071, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.114, optim0_lr0=7.626e-05, train_time=2.077
+[gpua003:0/64] 2023-07-07 11:17:39,457 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpua003:0/64] 2023-07-07 11:17:59,004 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 11:18:02,629 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f85f895fac0>)
+[gpua003:0/64] 2023-07-07 11:18:02,629 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpua003:0/64] 2023-07-07 11:18:02,635 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 11:23:09,216 (trainer:732) INFO: 23epoch:train:5001-5100batch: iter_time=1.440, forward_time=0.127, loss_ctc=71.836, loss_att=56.120, acc=0.706, loss=60.835, backward_time=0.765, grad_norm=102.820, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.114, optim0_lr0=7.624e-05, train_time=6.705
+[gpua003:0/64] 2023-07-07 11:24:49,430 (trainer:732) INFO: 23epoch:train:5101-5200batch: iter_time=9.982e-05, forward_time=0.108, loss_ctc=63.033, loss_att=52.578, acc=0.690, loss=55.715, backward_time=0.754, grad_norm=77.578, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.112, optim0_lr0=7.622e-05, train_time=2.004
+gpua087:2330954:2332476 [1] NCCL INFO comm 0xbc380f30 rank 53 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua055:3866105:3867680 [2] NCCL INFO comm 0xa0bacc0 rank 38 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua087:2330955:2332481 [2] NCCL INFO comm 0x1091ecd0 rank 54 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua031:1680702:1682220 [2] NCCL INFO comm 0x90042a50 rank 26 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua055:3866104:3867675 [1] NCCL INFO comm 0x4ff24650 rank 37 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua028:3269322:3270845 [1] NCCL INFO comm 0x50ff9ba0 rank 17 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua025:63838:65355 [2] NCCL INFO comm 0xc1f876b0 rank 14 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua060:2854969:2856486 [1] NCCL INFO comm 0x8c2cb6d0 rank 45 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua025:63837:65357 [1] NCCL INFO comm 0xa196ac90 rank 13 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua060:2854970:2856496 [2] NCCL INFO comm 0xb4b68d30 rank 46 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua003:350635:352158 [2] NCCL INFO comm 0xc165ff50 rank 2 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua003:350634:352156 [1] NCCL INFO comm 0xb8217e10 rank 1 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua028:3269323:3270853 [2] NCCL INFO comm 0x4fe1d010 rank 18 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua053:959076:960598 [2] NCCL INFO comm 0xa5547430 rank 34 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua090:2294099:2295633 [2] NCCL INFO comm 0x508070c0 rank 58 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua029:1226922:1228446 [1] NCCL INFO comm 0x91446d0 rank 21 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua029:1226923:1228448 [2] NCCL INFO comm 0x9682050 rank 22 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua057:1814426:1815949 [1] NCCL INFO comm 0xb6887810 rank 41 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua057:1814427:1815959 [2] NCCL INFO comm 0x8ff8bf0 rank 42 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua035:1685218:1686747 [2] NCCL INFO comm 0x5149e590 rank 30 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua035:1685217:1686742 [1] NCCL INFO comm 0x94073350 rank 29 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua098:2101209:2102740 [1] NCCL INFO comm 0xb77452f0 rank 61 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua005:322786:324303 [1] NCCL INFO comm 0x9e527b50 rank 5 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua005:322787:324304 [2] NCCL INFO comm 0xa671d450 rank 6 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua098:2101210:2102744 [2] NCCL INFO comm 0xb13e4b0 rank 62 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua010:1622002:1623518 [2] NCCL INFO comm 0x95597d0 rank 10 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua074:989793:991318 [2] NCCL INFO comm 0x50124340 rank 50 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+Process SpawnProcess-2:
+gpua090:2294098:2295630 [1] NCCL INFO comm 0xb9291470 rank 57 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua031:1680701:1682217 [1] NCCL INFO comm 0xb74170b0 rank 25 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 45] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804271 milliseconds before timing out.
+gpua053:959075:960591 [1] NCCL INFO comm 0x50f9bf70 rank 33 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+Process SpawnProcess-2:
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 21] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804411 milliseconds before timing out.
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 17] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804302 milliseconds before timing out.
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 14] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804276 milliseconds before timing out.
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 1] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804307 milliseconds before timing out.
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 53] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804259 milliseconds before timing out.
+Process SpawnProcess-3:
+Process SpawnProcess-3:
+Traceback (most recent call last):
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+RuntimeError: [Rank 2] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804368 milliseconds before timing out.
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 46] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804297 milliseconds before timing out.
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 37] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804277 milliseconds before timing out.
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 29] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804484 milliseconds before timing out.
+Process SpawnProcess-2:
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 13] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804273 milliseconds before timing out.
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 50] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804680 milliseconds before timing out.
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 6] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804607 milliseconds before timing out.
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 30] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804538 milliseconds before timing out.
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 26] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804292 milliseconds before timing out.
+gpua010:1622001:1623523 [1] NCCL INFO comm 0x8e6a9490 rank 9 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua074:989792:991309 [1] NCCL INFO comm 0x91b8e50 rank 49 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 5] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804559 milliseconds before timing out.
+Process SpawnProcess-2:
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 41] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804434 milliseconds before timing out.
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 42] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804489 milliseconds before timing out.
+Process SpawnProcess-3:
+Traceback (most recent call last):
+Process SpawnProcess-3:
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 18] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804353 milliseconds before timing out.
+Process SpawnProcess-3:
+Traceback (most recent call last):
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+RuntimeError: [Rank 34] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804440 milliseconds before timing out.
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 58] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804403 milliseconds before timing out.
+Process SpawnProcess-2:
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 61] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804595 milliseconds before timing out.
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 62] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804654 milliseconds before timing out.
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 10] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804678 milliseconds before timing out.
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 22] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804467 milliseconds before timing out.
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 38] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804274 milliseconds before timing out.
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 54] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804266 milliseconds before timing out.
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 25] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1805539 milliseconds before timing out.
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 33] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1805578 milliseconds before timing out.
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 57] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1805585 milliseconds before timing out.
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 9] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1805928 milliseconds before timing out.
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 49] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1805925 milliseconds before timing out.
+gpua005:322788:324302 [3] NCCL INFO comm 0xb7586590 rank 7 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpua053:959077:960604 [3] NCCL INFO comm 0x8f7ecf20 rank 35 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 7] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1816754 milliseconds before timing out.
+gpua087:2330956:2332486 [3] NCCL INFO comm 0x4fa40250 rank 55 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpua025:63839:65363 [3] NCCL INFO comm 0xc1e534d0 rank 15 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpua029:1226924:1228445 [3] NCCL INFO comm 0x502a1280 rank 23 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 55] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1817525 milliseconds before timing out.
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 35] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1817402 milliseconds before timing out.
+gpua098:2101211:2102741 [3] NCCL INFO comm 0xb9e844a0 rank 63 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpua028:3269324:3270856 [3] NCCL INFO comm 0x50758ff0 rank 19 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 15] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1817790 milliseconds before timing out.
+gpua074:989794:991315 [3] NCCL INFO comm 0x51823d90 rank 51 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1
+srun: error: gpua005: task 1: Exited with exit code 1
+srun: error: gpua029: task 5: Exited with exit code 1
+srun: error: gpua025: task 3: Exited with exit code 1
+srun: error: gpua060: task 11: Exited with exit code 1
+srun: error: gpua035: task 7: Exited with exit code 1
+srun: error: gpua003: task 0: Exited with exit code 1
+srun: error: gpua055: task 9: Exited with exit code 1
+srun: error: gpua010: task 2: Exited with exit code 1
+srun: error: gpua087: task 13: Exited with exit code 1
+srun: error: gpua057: task 10: Exited with exit code 1
+srun: error: gpua031: task 6: Exited with exit code 1
+srun: error: gpua090: task 14: Exited with exit code 1
+srun: error: gpua053: task 8: Exited with exit code 1
+srun: error: gpua028: task 4: Exited with exit code 1
+srun: error: gpua098: task 15: Exited with exit code 1
+srun: error: gpua074: task 12: Exited with exit code 1
+# Accounting: begin_time=1688614643
+# Accounting: end_time=1688748923
+# Accounting: time=134280 threads=1
+# Finished at Fri Jul 7 11:55:23 CDT 2023 with status 1