diff --git "a/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.1.log" "b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.1.log"
new file mode 100644--- /dev/null
+++ "b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.1.log"
@@ -0,0 +1,4111 @@
+# Running on gpub001.delta.ncsa.illinois.edu
+# Started at Fri Jul 14 13:29:16 CDT 2023
+# SLURMD_NODENAME=gpub001
+# SLURM_CLUSTER_NAME=delta
+# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf
+# SLURM_CPUS_ON_NODE=64
+# SLURM_CPUS_PER_TASK=64
+# SLURM_EXPORT_ENV=PATH
+# SLURM_GET_USER_ENV=1
+# SLURM_GPUS_ON_NODE=4
+# SLURM_GTIDS=0
+# SLURM_JOBID=2157595
+# SLURM_JOB_ACCOUNT=bbjs-delta-gpu
+# SLURM_JOB_CPUS_PER_NODE='64(x16)'
+# SLURM_JOB_GID=202
+# SLURM_JOB_GPUS=0,1,2,3
+# SLURM_JOB_ID=2157595
+# SLURM_JOB_NAME=exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.log
+# SLURM_JOB_NODELIST='gpub[001-002,006,008,022,024,026-027,048-051,074,077-079]'
+# SLURM_JOB_NUM_NODES=16
+# SLURM_JOB_PARTITION=gpuA40x4
+# SLURM_JOB_QOS=bbjs-delta-gpu
+# SLURM_JOB_UID=68077
+# SLURM_JOB_USER=peng6
+# SLURM_LOCALID=0
+# SLURM_MEM_PER_NODE=240000
+# SLURM_NNODES=16
+# SLURM_NODEID=0
+# SLURM_NODELIST='gpub[001-002,006,008,022,024,026-027,048-051,074,077-079]'
+# SLURM_NODE_ALIASES='(null)'
+# SLURM_OPEN_MODE=a
+# SLURM_PRIO_PROCESS=0
+# SLURM_PROCID=0
+# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1
+# SLURM_SUBMIT_HOST=dt-login02.delta.internal.ncsa.edu
+# SLURM_TASKS_PER_NODE='1(x16)'
+# SLURM_TASK_PID=1052675
+# SLURM_TOPOLOGY_ADDR=ss00.ss09.gpub001
+# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node
+# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9728:109
+# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369
+[gpub001:0/64] 2023-07-14 13:30:20,482 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0
+[gpub001:0/64] 2023-07-14 13:30:21,930 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 64 nodes.
+[gpub001:0/64] 2023-07-14 13:30:21,964 (s2t:483) INFO: Vocabulary size: 50002
+[gpub001:0/64] 2023-07-14 13:30:35,251 (abs_task:1201) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True
+[gpub001:0/64] 2023-07-14 13:30:35,260 (abs_task:1202) INFO: Model structure:
+ESPnetS2TModel(
+  (frontend): DefaultFrontend(
+    (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True)
+    (frontend): Frontend()
+    (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False)
+  )
+  (specaug): SpecAug(
+    (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq)
+    (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time)
+  )
+  (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True)
+  (encoder): TransformerEncoder(
+    (embed): Conv2dSubsampling(
+      (conv): Sequential(
+        (0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2))
+        (1): ReLU()
+        (2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2))
+        (3): ReLU()
+      )
+      (out): Sequential(
+        (0): Linear(in_features=19456, out_features=1024, bias=True)
+        (1): PositionalEncoding(
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+      )
+    )
+    (encoders): MultiSequential(
+      (0): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (1): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (2): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (3): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (4): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (5): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (6): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (7): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (8): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (9): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (10): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (11): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (12): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (13): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (14): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (15): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (16): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (17): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (18): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (19): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (20): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (21): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (22): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (23): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+    (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+  )
+  (decoder): TransformerDecoder(
+    (embed): Sequential(
+      (0): Embedding(50002, 1024)
+      (1): PositionalEncoding(
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+    (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+    (output_layer): Linear(in_features=1024, out_features=50002, bias=True)
+    (decoders): MultiSequential(
+      (0): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (1): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (2): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (3): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (4): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (5): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (6): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (7): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (8): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (9): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (10): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (11): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (12): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (13): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (14): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (15): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (16): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (17): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (18): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (19): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (20): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (21): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (22): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (23): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+  )
+  (criterion_att): LabelSmoothingLoss(
+    (criterion): KLDivLoss()
+  )
+  (ctc): CTC(
+    (ctc_lo): Linear(in_features=1024, out_features=50002, bias=True)
+    (ctc_loss): CTCLoss()
+  )
+)
+
+Model summary:
+    Class Name: ESPnetS2TModel
+    Total Number of model parameters: 888.51 M
+    Number of trainable parameters: 888.51 M (100.0%)
+    Size: 3.55 GB
+    Type: torch.float32
+[gpub001:0/64] 2023-07-14 13:30:35,260 (abs_task:1205) INFO: Optimizer:
+AdamW (
+Parameter Group 0
+    amsgrad: False
+    betas: [0.9, 0.98]
+    capturable: False
+    eps: 1e-06
+    foreach: None
+    initial_lr: 0.00025
+    lr: 2.5e-08
+    maximize: False
+    weight_decay: 0.0
+)
+[gpub001:0/64] 2023-07-14 13:30:35,260 (abs_task:1206) INFO: Scheduler: WarmupLR(warmup_steps=10000)
+[gpub001:0/64] 2023-07-14 13:30:35,277 (abs_task:1215) INFO: Saving the configuration in exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/config.yaml
+[gpub001:0/64] 2023-07-14 13:30:35,962 (abs_task:1272) INFO: Loading pretrained params from /scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v2/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e18_d18_lr5e-4_warmup20k_raw_bpe50000/valid.acc.ave.pth
+[gpub001:0/64] 2023-07-14 13:30:44,311 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 13:30:44,462 (abs_task:1570) INFO: [valid] dataset:
+ESPnetDataset(
+  speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"}
+  text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"}
+  text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"}
+  text: {"path": "dump/raw/dev/text", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fde52ec7eb0>)
+[gpub001:0/64] 2023-07-14 13:30:44,462 (abs_task:1571) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=1012, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, 
+[gpub001:0/64] 2023-07-14 13:30:44,463 (abs_task:1572) INFO: [valid] mini-batch sizes summary: N-batch=1012, mean=128.1, min=128, max=129
+[gpub001:0/64] 2023-07-14 13:30:44,955 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 13:30:45,289 (abs_task:1570) INFO: [plot_att] dataset:
+ESPnetDataset(
+  speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"}
+  text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"}
+  text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"}
+  text: {"path": "dump/raw/dev/text", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fde52ec7b50>)
+[gpub001:0/64] 2023-07-14 13:30:45,289 (abs_task:1571) INFO: [plot_att] Batch sampler: UnsortedBatchSampler(N-batch=129591, batch_size=1, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, 
+[gpub001:0/64] 2023-07-14 13:30:45,289 (abs_task:1572) INFO: [plot_att] mini-batch sizes summary: N-batch=3, mean=1.0, min=1, max=1
+[gpub001:0/64] 2023-07-14 13:31:11,236 (trainer:159) INFO: The training was resumed using exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/checkpoint.pth
+gpub001:1052798:1052798 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.101<0>
+gpub001:1052798:1052798 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub001:1052798:1052798 [0] NCCL INFO cudaDriverVersion 12010
+NCCL version 2.14.3+cuda11.7
+[gpub001:0/64] 2023-07-14 13:31:16,544 (trainer:284) INFO: 49/60epoch started
+[gpub001:0/64] 2023-07-14 13:31:16,605 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub001:0/64] 2023-07-14 13:31:34,016 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 13:31:37,332 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdca0d2a410>)
+[gpub001:0/64] 2023-07-14 13:31:37,332 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub001:0/64] 2023-07-14 13:31:37,338 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+gpub050:2739708:2739708 [3] NCCL INFO cudaDriverVersion 12010
+gpub050:2739708:2739708 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0>
+gpub050:2739708:2739708 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub050:2739708:2739778 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0>
+gpub050:2739708:2739778 [3] NCCL INFO Using network IB
+gpub050:2739708:2739778 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub050:2739708:2739778 [3] NCCL INFO Trees [0] -1/-1/-1->43->42 [1] -1/-1/-1->43->42
+gpub050:2739708:2739778 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0
+gpub050:2739708:2739778 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0
+gpub050:2739708:2739778 [3] NCCL INFO Connected all rings
+gpub050:2739708:2739778 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 42[85000] via P2P/IPC
+gpub050:2739708:2739778 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 42[85000] via P2P/IPC
+gpub050:2739708:2739778 [3] NCCL INFO Connected all trees
+gpub050:2739708:2739778 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub050:2739708:2739778 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub050:2739708:2739778 [3] NCCL INFO comm 0x51443e00 rank 43 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub050:2739706:2739706 [1] NCCL INFO cudaDriverVersion 12010
+gpub050:2739706:2739706 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0>
+gpub050:2739706:2739706 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub050:2739706:2739779 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0>
+gpub050:2739706:2739779 [1] NCCL INFO Using network IB
+gpub050:2739706:2739779 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub050:2739706:2739779 [1] NCCL INFO Trees [0] 42/36/-1->41->40 [1] 42/-1/-1->41->40
+gpub050:2739706:2739779 [1] NCCL INFO Channel 00/0 : 41[46000] -> 42[85000] via P2P/IPC
+gpub050:2739706:2739779 [1] NCCL INFO Channel 01/0 : 41[46000] -> 42[85000] via P2P/IPC
+gpub050:2739706:2739779 [1] NCCL INFO Connected all rings
+gpub050:2739706:2739779 [1] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [receive] via NET/IB/0
+gpub050:2739706:2739779 [1] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [send] via NET/IB/0
+gpub050:2739706:2739779 [1] NCCL INFO Channel 00/0 : 41[46000] -> 40[7000] via P2P/IPC
+gpub050:2739706:2739779 [1] NCCL INFO Channel 01/0 : 41[46000] -> 40[7000] via P2P/IPC
+gpub050:2739706:2739779 [1] NCCL INFO Connected all trees
+gpub050:2739706:2739779 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub050:2739706:2739779 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub050:2739706:2739779 [1] NCCL INFO comm 0xb91afa10 rank 41 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub050:2739707:2739707 [2] NCCL INFO cudaDriverVersion 12010
+gpub050:2739707:2739707 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0>
+gpub050:2739707:2739707 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub050:2739707:2739777 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0>
+gpub050:2739707:2739777 [2] NCCL INFO Using network IB
+gpub050:2739707:2739777 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub050:2739707:2739777 [2] NCCL INFO Trees [0] 43/-1/-1->42->41 [1] 43/-1/-1->42->41
+gpub050:2739707:2739777 [2] NCCL INFO Channel 00/0 : 42[85000] -> 43[c7000] via P2P/IPC
+gpub050:2739707:2739777 [2] NCCL INFO Channel 01/0 : 42[85000] -> 43[c7000] via P2P/IPC
+gpub050:2739707:2739777 [2] NCCL INFO Connected all rings
+gpub050:2739707:2739777 [2] NCCL INFO Channel 00/0 : 42[85000] -> 41[46000] via P2P/IPC
+gpub050:2739707:2739777 [2] NCCL INFO Channel 01/0 : 42[85000] -> 41[46000] via P2P/IPC
+gpub050:2739707:2739777 [2] NCCL INFO Connected all trees
+gpub050:2739707:2739777 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub050:2739707:2739777 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub050:2739707:2739777 [2] NCCL INFO comm 0x9490430 rank 42 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub050:2739705:2739705 [0] NCCL INFO cudaDriverVersion 12010
+gpub050:2739705:2739705 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0>
+gpub050:2739705:2739705 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub050:2739705:2739780 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0>
+gpub050:2739705:2739780 [0] NCCL INFO Using network IB
+gpub050:2739705:2739780 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub050:2739705:2739780 [0] NCCL INFO Trees [0] 41/44/-1->40->49 [1] 41/-1/-1->40->37
+gpub050:2739705:2739780 [0] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0
+gpub050:2739705:2739780 [0] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0
+gpub050:2739705:2739780 [0] NCCL INFO Channel 00/0 : 40[7000] -> 41[46000] via P2P/IPC
+gpub050:2739705:2739780 [0] NCCL INFO Channel 01/0 : 40[7000] -> 41[46000] via P2P/IPC
+gpub050:2739705:2739780 [0] NCCL INFO Connected all rings
+gpub050:2739705:2739780 [0] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [receive] via NET/IB/0
+gpub050:2739705:2739780 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [send] via NET/IB/0
+gpub050:2739705:2739780 [0] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [send] via NET/IB/0
+gpub050:2739705:2739780 [0] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [receive] via NET/IB/0
+gpub050:2739705:2739780 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [receive] via NET/IB/0
+gpub050:2739705:2739780 [0] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [send] via NET/IB/0
+gpub050:2739705:2739780 [0] NCCL INFO Connected all trees
+gpub050:2739705:2739780 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub050:2739705:2739780 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub050:2739705:2739780 [0] NCCL INFO comm 0xb798f2d0 rank 40 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub078:587387:587387 [1] NCCL INFO cudaDriverVersion 12010
+gpub078:587387:587387 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0>
+gpub078:587387:587387 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub078:587387:587522 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0>
+gpub078:587387:587522 [1] NCCL INFO Using network IB
+gpub078:587387:587522 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub078:587387:587522 [1] NCCL INFO Trees [0] 58/52/-1->57->56 [1] 58/-1/-1->57->56
+gpub078:587387:587522 [1] NCCL INFO Channel 00/0 : 57[46000] -> 58[85000] via P2P/IPC
+gpub078:587387:587522 [1] NCCL INFO Channel 01/0 : 57[46000] -> 58[85000] via P2P/IPC
+gpub078:587387:587522 [1] NCCL INFO Connected all rings
+gpub078:587387:587522 [1] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [receive] via NET/IB/0
+gpub078:587387:587522 [1] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [send] via NET/IB/0
+gpub078:587387:587522 [1] NCCL INFO Channel 00/0 : 57[46000] -> 56[7000] via P2P/IPC
+gpub078:587387:587522 [1] NCCL INFO Channel 01/0 : 57[46000] -> 56[7000] via P2P/IPC
+gpub078:587387:587522 [1] NCCL INFO Connected all trees
+gpub078:587387:587522 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub078:587387:587522 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub078:587387:587522 [1] NCCL INFO comm 0x8bcb3990 rank 57 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub078:587386:587386 [0] NCCL INFO cudaDriverVersion 12010
+gpub078:587386:587386 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0>
+gpub078:587386:587386 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub078:587386:587521 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0>
+gpub078:587386:587521 [0] NCCL INFO Using network IB
+gpub078:587386:587521 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub078:587386:587521 [0] NCCL INFO Trees [0] 57/60/-1->56->48 [1] 57/-1/-1->56->53
+gpub078:587386:587521 [0] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0
+gpub078:587386:587521 [0] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0
+gpub078:587386:587521 [0] NCCL INFO Channel 00/0 : 56[7000] -> 57[46000] via P2P/IPC
+gpub078:587386:587521 [0] NCCL INFO Channel 01/0 : 56[7000] -> 57[46000] via P2P/IPC
+gpub078:587386:587521 [0] NCCL INFO Connected all rings
+gpub078:587386:587521 [0] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [receive] via NET/IB/0
+gpub078:587386:587521 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [send] via NET/IB/0
+gpub078:587386:587521 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [receive] via NET/IB/0
+gpub078:587386:587521 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [send] via NET/IB/0
+gpub078:587386:587521 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [receive] via NET/IB/0
+gpub078:587386:587521 [0] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [send] via NET/IB/0
+gpub078:587386:587521 [0] NCCL INFO Connected all trees
+gpub078:587386:587521 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub078:587386:587521 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub078:587386:587521 [0] NCCL INFO comm 0xb210a550 rank 56 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub078:587389:587389 [3] NCCL INFO cudaDriverVersion 12010
+gpub078:587389:587389 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0>
+gpub078:587389:587389 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub078:587389:587520 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0>
+gpub078:587389:587520 [3] NCCL INFO Using network IB
+gpub078:587389:587520 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub078:587389:587520 [3] NCCL INFO Trees [0] -1/-1/-1->59->58 [1] -1/-1/-1->59->58
+gpub078:587389:587520 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0
+gpub078:587389:587520 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0
+gpub078:587389:587520 [3] NCCL INFO Connected all rings
+gpub078:587389:587520 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 58[85000] via P2P/IPC
+gpub078:587389:587520 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 58[85000] via P2P/IPC
+gpub078:587389:587520 [3] NCCL INFO Connected all trees
+gpub078:587389:587520 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub078:587389:587520 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub078:587389:587520 [3] NCCL INFO comm 0xf3a7e40 rank 59 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub002:2311584:2311584 [0] NCCL INFO cudaDriverVersion 12010
+gpub002:2311584:2311584 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0>
+gpub002:2311584:2311584 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub002:2311584:2311667 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0>
+gpub002:2311584:2311667 [0] NCCL INFO Using network IB
+gpub002:2311584:2311667 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub002:2311584:2311667 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12
+gpub002:2311584:2311667 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0
+gpub002:2311584:2311667 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0
+gpub002:2311584:2311667 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC
+gpub002:2311584:2311667 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC
+gpub002:2311584:2311667 [0] NCCL INFO Connected all rings
+gpub002:2311584:2311667 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/IB/0
+gpub002:2311584:2311667 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/IB/0
+gpub002:2311584:2311667 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/IB/0
+gpub002:2311584:2311667 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/IB/0
+gpub002:2311584:2311667 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/IB/0
+gpub002:2311584:2311667 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/IB/0
+gpub002:2311584:2311667 [0] NCCL INFO Connected all trees
+gpub002:2311584:2311667 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub002:2311584:2311667 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub002:2311584:2311667 [0] NCCL INFO comm 0x9d597d00 rank 4 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub002:2311585:2311585 [1] NCCL INFO cudaDriverVersion 12010
+gpub002:2311585:2311585 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0>
+gpub002:2311585:2311585 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub002:2311585:2311664 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0>
+gpub002:2311585:2311664 [1] NCCL INFO Using network IB
+gpub002:2311585:2311664 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub002:2311585:2311664 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4
+gpub002:2311585:2311664 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC
+gpub002:2311585:2311664 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC
+gpub002:2311585:2311664 [1] NCCL INFO Connected all rings
+gpub002:2311585:2311664 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/IB/0
+gpub002:2311585:2311664 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/IB/0
+gpub002:2311585:2311664 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC
+gpub002:2311585:2311664 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC
+gpub002:2311585:2311664 [1] NCCL INFO Connected all trees
+gpub002:2311585:2311664 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub002:2311585:2311664 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub002:2311585:2311664 [1] NCCL INFO comm 0x9cc6bd40 rank 5 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub002:2311586:2311586 [2] NCCL INFO cudaDriverVersion 12010
+gpub002:2311586:2311586 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0>
+gpub002:2311586:2311586 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub002:2311586:2311666 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0>
+gpub002:2311586:2311666 [2] NCCL INFO Using network IB
+gpub002:2311586:2311666 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub002:2311586:2311666 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5
+gpub002:2311586:2311666 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC
+gpub002:2311586:2311666 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC
+gpub002:2311586:2311666 [2] NCCL INFO Connected all rings
+gpub002:2311586:2311666 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC
+gpub002:2311586:2311666 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC
+gpub002:2311586:2311666 [2] NCCL INFO Connected all trees
+gpub002:2311586:2311666 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub002:2311586:2311666 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub002:2311586:2311666 [2] NCCL INFO comm 0x8ee58800 rank 6 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub002:2311587:2311587 [3] NCCL INFO cudaDriverVersion 12010
+gpub002:2311587:2311587 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0>
+gpub002:2311587:2311587 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub002:2311587:2311665 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0>
+gpub002:2311587:2311665 [3] NCCL INFO Using network IB
+gpub002:2311587:2311665 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub002:2311587:2311665 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6
+gpub002:2311587:2311665 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0
+gpub002:2311587:2311665 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0
+gpub002:2311587:2311665 [3] NCCL INFO Connected all rings
+gpub002:2311587:2311665 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC
+gpub002:2311587:2311665 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC
+gpub002:2311587:2311665 [3] NCCL INFO Connected all trees
+gpub002:2311587:2311665 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub002:2311587:2311665 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub002:2311587:2311665 [3] NCCL INFO comm 0x4fabf1f0 rank 7 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub078:587388:587388 [2] NCCL INFO cudaDriverVersion 12010
+gpub078:587388:587388 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0>
+gpub078:587388:587388 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub078:587388:587523 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0>
+gpub078:587388:587523 [2] NCCL INFO Using network IB
+gpub078:587388:587523 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub078:587388:587523 [2] NCCL INFO Trees [0] 59/-1/-1->58->57 [1] 59/-1/-1->58->57
+gpub078:587388:587523 [2] NCCL INFO Channel 00/0 : 58[85000] -> 59[c7000] via P2P/IPC
+gpub078:587388:587523 [2] NCCL INFO Channel 01/0 : 58[85000] -> 59[c7000] via P2P/IPC
+gpub078:587388:587523 [2] NCCL INFO Connected all rings
+gpub078:587388:587523 [2] NCCL INFO Channel 00/0 : 58[85000] -> 57[46000] via P2P/IPC
+gpub078:587388:587523 [2] NCCL INFO Channel 01/0 : 58[85000] -> 57[46000] via P2P/IPC
+gpub078:587388:587523 [2] NCCL INFO Connected all trees
+gpub078:587388:587523 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub078:587388:587523 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub078:587388:587523 [2] NCCL INFO comm 0x4f14aa50 rank 58 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub051:3424421:3424421 [3] NCCL INFO cudaDriverVersion 12010
+gpub051:3424421:3424421 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0>
+gpub051:3424421:3424421 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub051:3424421:3424552 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0>
+gpub051:3424421:3424552 [3] NCCL INFO Using network IB
+gpub051:3424421:3424552 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub051:3424421:3424552 [3] NCCL INFO Trees [0] -1/-1/-1->47->46 [1] -1/-1/-1->47->46
+gpub051:3424421:3424552 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0
+gpub051:3424421:3424552 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0
+gpub051:3424421:3424552 [3] NCCL INFO Connected all rings
+gpub051:3424421:3424552 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 46[85000] via P2P/IPC
+gpub051:3424421:3424552 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 46[85000] via P2P/IPC
+gpub051:3424421:3424552 [3] NCCL INFO Connected all trees
+gpub051:3424421:3424552 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub051:3424421:3424552 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub051:3424421:3424552 [3] NCCL INFO comm 0x9ee4290 rank 47 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub051:3424419:3424419 [1] NCCL INFO cudaDriverVersion 12010
+gpub051:3424419:3424419 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0>
+gpub051:3424419:3424419 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub051:3424419:3424553 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0>
+gpub051:3424419:3424553 [1] NCCL INFO Using network IB
+gpub051:3424419:3424553 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub051:3424419:3424553 [1] NCCL INFO Trees [0] 46/-1/-1->45->44 [1] 46/52/-1->45->44
+gpub051:3424419:3424553 [1] NCCL INFO Channel 00/0 : 45[46000] -> 46[85000] via P2P/IPC
+gpub051:3424419:3424553 [1] NCCL INFO Channel 01/0 : 45[46000] -> 46[85000] via P2P/IPC
+gpub051:3424419:3424553 [1] NCCL INFO Connected all rings
+gpub051:3424419:3424553 [1] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [send] via NET/IB/0
+gpub051:3424419:3424553 [1] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [receive] via NET/IB/0
+gpub051:3424419:3424553 [1] NCCL INFO Channel 00/0 : 45[46000] -> 44[7000] via P2P/IPC
+gpub051:3424419:3424553 [1] NCCL INFO Channel 01/0 : 45[46000] -> 44[7000] via P2P/IPC
+gpub051:3424419:3424553 [1] NCCL INFO Connected all trees
+gpub051:3424419:3424553 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub051:3424419:3424553 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub051:3424419:3424553 [1] NCCL INFO comm 0xb60902d0 rank 45 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub051:3424418:3424418 [0] NCCL INFO cudaDriverVersion 12010
+gpub051:3424418:3424418 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0>
+gpub051:3424418:3424418 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub051:3424418:3424555 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0>
+gpub051:3424418:3424555 [0] NCCL INFO Using network IB
+gpub051:3424418:3424555 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub051:3424418:3424555 [0] NCCL INFO Trees [0] 45/-1/-1->44->40 [1] 45/36/-1->44->29
+gpub051:3424418:3424555 [0] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0
+gpub051:3424418:3424555 [0] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0
+gpub051:3424418:3424555 [0] NCCL INFO Channel 00/0 : 44[7000] -> 45[46000] via P2P/IPC
+gpub051:3424418:3424555 [0] NCCL INFO Channel 01/0 : 44[7000] -> 45[46000] via P2P/IPC
+gpub051:3424418:3424555 [0] NCCL INFO Connected all rings
+gpub051:3424418:3424555 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [receive] via NET/IB/0
+gpub051:3424418:3424555 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [receive] via NET/IB/0
+gpub051:3424418:3424555 [0] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [receive] via NET/IB/0
+gpub051:3424418:3424555 [0] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [send] via NET/IB/0
+gpub051:3424418:3424555 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [send] via NET/IB/0
+gpub051:3424418:3424555 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [send] via NET/IB/0
+gpub051:3424418:3424555 [0] NCCL INFO Connected all trees
+gpub051:3424418:3424555 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub051:3424418:3424555 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub051:3424418:3424555 [0] NCCL INFO comm 0x8ebe3540 rank 44 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub051:3424420:3424420 [2] NCCL INFO cudaDriverVersion 12010
+gpub051:3424420:3424420 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0>
+gpub051:3424420:3424420 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub051:3424420:3424554 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0>
+gpub051:3424420:3424554 [2] NCCL INFO Using network IB
+gpub051:3424420:3424554 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub051:3424420:3424554 [2] NCCL INFO Trees [0] 47/-1/-1->46->45 [1] 47/-1/-1->46->45
+gpub051:3424420:3424554 [2] NCCL INFO Channel 00/0 : 46[85000] -> 47[c7000] via P2P/IPC
+gpub051:3424420:3424554 [2] NCCL INFO Channel 01/0 : 46[85000] -> 47[c7000] via P2P/IPC
+gpub051:3424420:3424554 [2] NCCL INFO Connected all rings
+gpub051:3424420:3424554 [2] NCCL INFO Channel 00/0 : 46[85000] -> 45[46000] via P2P/IPC
+gpub051:3424420:3424554 [2] NCCL INFO Channel 01/0 : 46[85000] -> 45[46000] via P2P/IPC
+gpub051:3424420:3424554 [2] NCCL INFO Connected all trees
+gpub051:3424420:3424554 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub051:3424420:3424554 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub051:3424420:3424554 [2] NCCL INFO comm 0x4f403790 rank 46 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub027:4034780:4034780 [1] NCCL INFO cudaDriverVersion 12010
+gpub027:4034780:4034780 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0>
+gpub027:4034780:4034780 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub027:4034780:4034863 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0>
+gpub027:4034780:4034863 [1] NCCL INFO Using network IB
+gpub027:4034780:4034863 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub027:4034780:4034863 [1] NCCL INFO Trees [0] 30/-1/-1->29->28 [1] 30/44/-1->29->28
+gpub027:4034780:4034863 [1] NCCL INFO Channel 00/0 : 29[46000] -> 30[85000] via P2P/IPC
+gpub027:4034780:4034863 [1] NCCL INFO Channel 01/0 : 29[46000] -> 30[85000] via P2P/IPC
+gpub027:4034780:4034863 [1] NCCL INFO Connected all rings
+gpub027:4034780:4034863 [1] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [send] via NET/IB/0
+gpub027:4034780:4034863 [1] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [receive] via NET/IB/0
+gpub027:4034780:4034863 [1] NCCL INFO Channel 00/0 : 29[46000] -> 28[7000] via P2P/IPC
+gpub027:4034780:4034863 [1] NCCL INFO Channel 01/0 : 29[46000] -> 28[7000] via P2P/IPC
+gpub027:4034780:4034863 [1] NCCL INFO Connected all trees
+gpub027:4034780:4034863 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub027:4034780:4034863 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub027:4034780:4034863 [1] NCCL INFO comm 0x9afc8490 rank 29 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub027:4034779:4034779 [0] NCCL INFO cudaDriverVersion 12010
+gpub027:4034779:4034779 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0>
+gpub027:4034779:4034779 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub027:4034779:4034862 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0>
+gpub027:4034779:4034862 [0] NCCL INFO Using network IB
+gpub027:4034779:4034862 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub027:4034779:4034862 [0] NCCL INFO Trees [0] 29/-1/-1->28->24 [1] 29/12/-1->28->60
+gpub027:4034779:4034862 [0] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0
+gpub027:4034779:4034862 [0] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0
+gpub027:4034779:4034862 [0] NCCL INFO Channel 00/0 : 28[7000] -> 29[46000] via P2P/IPC
+gpub027:4034779:4034862 [0] NCCL INFO Channel 01/0 : 28[7000] -> 29[46000] via P2P/IPC
+gpub027:4034779:4034862 [0] NCCL INFO Connected all rings
+gpub027:4034779:4034862 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [receive] via NET/IB/0
+gpub027:4034779:4034862 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [receive] via NET/IB/0
+gpub027:4034779:4034862 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [receive] via NET/IB/0
+gpub027:4034779:4034862 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [send] via NET/IB/0
+gpub027:4034779:4034862 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [send] via NET/IB/0
+gpub027:4034779:4034862 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [send] via NET/IB/0
+gpub027:4034779:4034862 [0] NCCL INFO Connected all trees
+gpub027:4034779:4034862 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub027:4034779:4034862 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub027:4034779:4034862 [0] NCCL INFO comm 0xb5e6b1f0 rank 28 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub027:4034782:4034782 [3] NCCL INFO cudaDriverVersion 12010
+gpub027:4034782:4034782 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0>
+gpub027:4034782:4034782 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub027:4034782:4034861 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0>
+gpub027:4034782:4034861 [3] NCCL INFO Using network IB
+gpub027:4034782:4034861 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub027:4034782:4034861 [3] NCCL INFO Trees [0] -1/-1/-1->31->30 [1] -1/-1/-1->31->30
+gpub027:4034782:4034861 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0
+gpub027:4034782:4034861 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0
+gpub027:4034782:4034861 [3] NCCL INFO Connected all rings
+gpub027:4034782:4034861 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 30[85000] via P2P/IPC
+gpub027:4034782:4034861 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 30[85000] via P2P/IPC
+gpub027:4034782:4034861 [3] NCCL INFO Connected all trees
+gpub027:4034782:4034861 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub027:4034782:4034861 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub027:4034782:4034861 [3] NCCL INFO comm 0x4f996350 rank 31 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub027:4034781:4034781 [2] NCCL INFO cudaDriverVersion 12010
+gpub027:4034781:4034781 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0>
+gpub027:4034781:4034781 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub027:4034781:4034864 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0>
+gpub027:4034781:4034864 [2] NCCL INFO Using network IB
+gpub027:4034781:4034864 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub027:4034781:4034864 [2] NCCL INFO Trees [0] 31/-1/-1->30->29 [1] 31/-1/-1->30->29
+gpub027:4034781:4034864 [2] NCCL INFO Channel 00/0 : 30[85000] -> 31[c7000] via P2P/IPC
+gpub027:4034781:4034864 [2] NCCL INFO Channel 01/0 : 30[85000] -> 31[c7000] via P2P/IPC
+gpub027:4034781:4034864 [2] NCCL INFO Connected all rings
+gpub027:4034781:4034864 [2] NCCL INFO Channel 00/0 : 30[85000] -> 29[46000] via P2P/IPC
+gpub027:4034781:4034864 [2] NCCL INFO Channel 01/0 : 30[85000] -> 29[46000] via P2P/IPC
+gpub027:4034781:4034864 [2] NCCL INFO Connected all trees
+gpub027:4034781:4034864 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub027:4034781:4034864 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub027:4034781:4034864 [2] NCCL INFO comm 0x8d940630 rank 30 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub077:2521721:2521721 [3] NCCL INFO cudaDriverVersion 12010
+gpub077:2521721:2521721 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.177<0>
+gpub077:2521721:2521721 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub077:2521721:2521790 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.177<0>
+gpub077:2521721:2521790 [3] NCCL INFO Using network IB
+gpub077:2521721:2521790 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub077:2521721:2521790 [3] NCCL INFO Trees [0] -1/-1/-1->55->54 [1] -1/-1/-1->55->54
+gpub077:2521721:2521790 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0
+gpub077:2521721:2521790 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0
+gpub077:2521721:2521790 [3] NCCL INFO Connected all rings
+gpub077:2521721:2521790 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 54[85000] via P2P/IPC
+gpub077:2521721:2521790 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 54[85000] via P2P/IPC
+gpub077:2521721:2521790 [3] NCCL INFO Connected all trees
+gpub077:2521721:2521790 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub077:2521721:2521790 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub077:2521721:2521790 [3] NCCL INFO comm 0x500bb780 rank 55 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub079:3396495:3396495 [2] NCCL INFO cudaDriverVersion 12010
+gpub079:3396495:3396495 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.179<0>
+gpub079:3396495:3396495 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub079:3396495:3396576 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.179<0>
+gpub079:3396495:3396576 [2] NCCL INFO Using network IB
+gpub079:3396495:3396576 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub079:3396495:3396576 [2] NCCL INFO Trees [0] 63/-1/-1->62->61 [1] 63/-1/-1->62->61
+gpub079:3396495:3396576 [2] NCCL INFO Channel 00/0 : 62[85000] -> 63[c7000] via P2P/IPC
+gpub079:3396495:3396576 [2] NCCL INFO Channel 01/0 : 62[85000] -> 63[c7000] via P2P/IPC
+gpub079:3396495:3396576 [2] NCCL INFO Connected all rings
+gpub079:3396495:3396576 [2] NCCL INFO Channel 00/0 : 62[85000] -> 61[46000] via P2P/IPC
+gpub079:3396495:3396576 [2] NCCL INFO Channel 01/0 : 62[85000] -> 61[46000] via P2P/IPC
+gpub079:3396495:3396576 [2] NCCL INFO Connected all trees
+gpub079:3396495:3396576 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub079:3396495:3396576 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub079:3396495:3396576 [2] NCCL INFO comm 0x8e939be0 rank 62 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub006:1859755:1859755 [3] NCCL INFO cudaDriverVersion 12010
+gpub006:1859755:1859755 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.106<0>
+gpub006:1859755:1859755 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub006:1859755:1859833 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.106<0>
+gpub006:1859755:1859833 [3] NCCL INFO Using network IB
+gpub006:1859755:1859833 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub006:1859755:1859833 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10
+gpub006:1859755:1859833 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0
+gpub006:1859755:1859833 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0
+gpub006:1859755:1859833 [3] NCCL INFO Connected all rings
+gpub006:1859755:1859833 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC
+gpub006:1859755:1859833 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC
+gpub006:1859755:1859833 [3] NCCL INFO Connected all trees
+gpub006:1859755:1859833 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub006:1859755:1859833 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub006:1859755:1859833 [3] NCCL INFO comm 0x50847890 rank 11 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub049:277663:277663 [1] NCCL INFO cudaDriverVersion 12010
+gpub049:277663:277663 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.149<0>
+gpub049:277663:277663 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub049:277663:277744 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.149<0>
+gpub049:277663:277744 [1] NCCL INFO Using network IB
+gpub049:277663:277744 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub049:277663:277744 [1] NCCL INFO Trees [0] 38/-1/-1->37->36 [1] 38/40/-1->37->36
+gpub049:277663:277744 [1] NCCL INFO Channel 00/0 : 37[46000] -> 38[85000] via P2P/IPC
+gpub049:277663:277744 [1] NCCL INFO Channel 01/0 : 37[46000] -> 38[85000] via P2P/IPC
+gpub049:277663:277744 [1] NCCL INFO Connected all rings
+gpub049:277663:277744 [1] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [send] via NET/IB/0
+gpub049:277663:277744 [1] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [receive] via NET/IB/0
+gpub049:277663:277744 [1] NCCL INFO Channel 00/0 : 37[46000] -> 36[7000] via P2P/IPC
+gpub049:277663:277744 [1] NCCL INFO Channel 01/0 : 37[46000] -> 36[7000] via P2P/IPC
+gpub049:277663:277744 [1] NCCL INFO Connected all trees
+gpub049:277663:277744 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub049:277663:277744 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub049:277663:277744 [1] NCCL INFO comm 0xb77e62d0 rank 37 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub079:3396496:3396496 [3] NCCL INFO cudaDriverVersion 12010
+gpub079:3396496:3396496 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.179<0>
+gpub079:3396496:3396496 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub079:3396496:3396574 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.179<0>
+gpub079:3396496:3396574 [3] NCCL INFO Using network IB
+gpub079:3396496:3396574 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub079:3396496:3396574 [3] NCCL INFO Trees [0] -1/-1/-1->63->62 [1] -1/-1/-1->63->62
+gpub079:3396496:3396574 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0
+gpub079:3396496:3396574 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0
+gpub079:3396496:3396574 [3] NCCL INFO Connected all rings
+gpub079:3396496:3396574 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 62[85000] via P2P/IPC
+gpub079:3396496:3396574 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 62[85000] via P2P/IPC
+gpub079:3396496:3396574 [3] NCCL INFO Connected all trees
+gpub079:3396496:3396574 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub079:3396496:3396574 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub079:3396496:3396574 [3] NCCL INFO comm 0x51317510 rank 63 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub074:4055358:4055358 [2] NCCL INFO cudaDriverVersion 12010
+gpub074:4055358:4055358 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0>
+gpub074:4055358:4055358 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub074:4055358:4055429 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0>
+gpub074:4055358:4055429 [2] NCCL INFO Using network IB
+gpub074:4055358:4055429 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub074:4055358:4055429 [2] NCCL INFO Trees [0] 51/-1/-1->50->49 [1] 51/-1/-1->50->49
+gpub074:4055358:4055429 [2] NCCL INFO Channel 00/0 : 50[85000] -> 51[c7000] via P2P/IPC
+gpub074:4055358:4055429 [2] NCCL INFO Channel 01/0 : 50[85000] -> 51[c7000] via P2P/IPC
+gpub074:4055358:4055429 [2] NCCL INFO Connected all rings
+gpub074:4055358:4055429 [2] NCCL INFO Channel 00/0 : 50[85000] -> 49[46000] via P2P/IPC
+gpub074:4055358:4055429 [2] NCCL INFO Channel 01/0 : 50[85000] -> 49[46000] via P2P/IPC
+gpub049:277665:277665 [3] NCCL INFO cudaDriverVersion 12010
+gpub049:277665:277665 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.149<0>
+gpub049:277665:277665 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub049:277665:277743 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.149<0>
+gpub049:277665:277743 [3] NCCL INFO Using network IB
+gpub049:277665:277743 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub049:277665:277743 [3] NCCL INFO Trees [0] -1/-1/-1->39->38 [1] -1/-1/-1->39->38
+gpub049:277665:277743 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0
+gpub049:277665:277743 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0
+gpub049:277665:277743 [3] NCCL INFO Connected all rings
+gpub049:277665:277743 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 38[85000] via P2P/IPC
+gpub049:277665:277743 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 38[85000] via P2P/IPC
+gpub074:4055358:4055429 [2] NCCL INFO Connected all trees
+gpub074:4055358:4055429 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub074:4055358:4055429 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub074:4055358:4055429 [2] NCCL INFO comm 0x5076e6a0 rank 50 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub049:277665:277743 [3] NCCL INFO Connected all trees
+gpub049:277665:277743 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub049:277665:277743 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub049:277665:277743 [3] NCCL INFO comm 0x9d1a0c70 rank 39 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub049:277662:277662 [0] NCCL INFO cudaDriverVersion 12010
+gpub049:277662:277662 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.149<0>
+gpub049:277662:277662 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub049:277662:277745 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.149<0>
+gpub049:277662:277745 [0] NCCL INFO Using network IB
+gpub049:277662:277745 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub049:277662:277745 [0] NCCL INFO Trees [0] 37/-1/-1->36->41 [1] 37/32/-1->36->44
+gpub049:277662:277745 [0] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0
+gpub049:277662:277745 [0] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0
+gpub049:277662:277745 [0] NCCL INFO Channel 00/0 : 36[7000] -> 37[46000] via P2P/IPC
+gpub049:277662:277745 [0] NCCL INFO Channel 01/0 : 36[7000] -> 37[46000] via P2P/IPC
+gpub049:277662:277745 [0] NCCL INFO Connected all rings
+gpub049:277662:277745 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [receive] via NET/IB/0
+gpub049:277662:277745 [0] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [send] via NET/IB/0
+gpub049:277662:277745 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [send] via NET/IB/0
+gpub049:277662:277745 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [receive] via NET/IB/0
+gpub049:277662:277745 [0] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [receive] via NET/IB/0
+gpub049:277662:277745 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [send] via NET/IB/0
+gpub049:277662:277745 [0] NCCL INFO Connected all trees
+gpub049:277662:277745 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub049:277662:277745 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub049:277662:277745 [0] NCCL INFO comm 0x50033560 rank 36 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub074:4055359:4055359 [3] NCCL INFO cudaDriverVersion 12010
+gpub074:4055359:4055359 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0>
+gpub074:4055359:4055359 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub074:4055359:4055428 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0>
+gpub074:4055359:4055428 [3] NCCL INFO Using network IB
+gpub074:4055359:4055428 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub074:4055359:4055428 [3] NCCL INFO Trees [0] -1/-1/-1->51->50 [1] -1/-1/-1->51->50
+gpub074:4055359:4055428 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0
+gpub074:4055359:4055428 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0
+gpub074:4055359:4055428 [3] NCCL INFO Connected all rings
+gpub074:4055359:4055428 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 50[85000] via P2P/IPC
+gpub074:4055359:4055428 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 50[85000] via P2P/IPC
+gpub074:4055359:4055428 [3] NCCL INFO Connected all trees
+gpub074:4055359:4055428 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub074:4055359:4055428 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub074:4055359:4055428 [3] NCCL INFO comm 0xb59ce3d0 rank 51 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub077:2521719:2521719 [1] NCCL INFO cudaDriverVersion 12010
+gpub077:2521719:2521719 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.177<0>
+gpub077:2521719:2521719 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub077:2521719:2521792 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.177<0>
+gpub077:2521719:2521792 [1] NCCL INFO Using network IB
+gpub077:2521719:2521792 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub077:2521719:2521792 [1] NCCL INFO Trees [0] 54/-1/-1->53->52 [1] 54/56/-1->53->52
+gpub077:2521719:2521792 [1] NCCL INFO Channel 00/0 : 53[46000] -> 54[85000] via P2P/IPC
+gpub077:2521719:2521792 [1] NCCL INFO Channel 01/0 : 53[46000] -> 54[85000] via P2P/IPC
+gpub077:2521719:2521792 [1] NCCL INFO Connected all rings
+gpub077:2521719:2521792 [1] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [send] via NET/IB/0
+gpub077:2521719:2521792 [1] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [receive] via NET/IB/0
+gpub077:2521719:2521792 [1] NCCL INFO Channel 00/0 : 53[46000] -> 52[7000] via P2P/IPC
+gpub077:2521719:2521792 [1] NCCL INFO Channel 01/0 : 53[46000] -> 52[7000] via P2P/IPC
+gpub077:2521719:2521792 [1] NCCL INFO Connected all trees
+gpub077:2521719:2521792 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub077:2521719:2521792 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub077:2521719:2521792 [1] NCCL INFO comm 0xb802530 rank 53 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub079:3396493:3396493 [0] NCCL INFO cudaDriverVersion 12010
+gpub079:3396493:3396493 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.179<0>
+gpub079:3396493:3396493 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub079:3396493:3396573 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.179<0>
+gpub079:3396493:3396573 [0] NCCL INFO Using network IB
+gpub079:3396493:3396573 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub079:3396493:3396573 [0] NCCL INFO Trees [0] 61/-1/-1->60->56 [1] 61/28/-1->60->-1
+gpub079:3396493:3396573 [0] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0
+gpub079:3396493:3396573 [0] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0
+gpub079:3396493:3396573 [0] NCCL INFO Channel 00/0 : 60[7000] -> 61[46000] via P2P/IPC
+gpub079:3396493:3396573 [0] NCCL INFO Channel 01/0 : 60[7000] -> 61[46000] via P2P/IPC
+gpub079:3396493:3396573 [0] NCCL INFO Connected all rings
+gpub079:3396493:3396573 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [receive] via NET/IB/0
+gpub079:3396493:3396573 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [receive] via NET/IB/0
+gpub079:3396493:3396573 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [send] via NET/IB/0
+gpub079:3396493:3396573 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [send] via NET/IB/0
+gpub079:3396493:3396573 [0] NCCL INFO Connected all trees
+gpub079:3396493:3396573 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub079:3396493:3396573 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub079:3396493:3396573 [0] NCCL INFO comm 0x4f9d83d0 rank 60 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub026:2781297:2781297 [1] NCCL INFO cudaDriverVersion 12010
+gpub026:2781297:2781297 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.126<0>
+gpub026:2781297:2781297 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub026:2781297:2781382 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.126<0>
+gpub026:2781297:2781382 [1] NCCL INFO Using network IB
+gpub026:2781297:2781382 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub026:2781297:2781382 [1] NCCL INFO Trees [0] 26/20/-1->25->24 [1] 26/-1/-1->25->24
+gpub026:2781297:2781382 [1] NCCL INFO Channel 00/0 : 25[46000] -> 26[85000] via P2P/IPC
+gpub026:2781297:2781382 [1] NCCL INFO Channel 01/0 : 25[46000] -> 26[85000] via P2P/IPC
+gpub026:2781297:2781382 [1] NCCL INFO Connected all rings
+gpub026:2781297:2781382 [1] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [receive] via NET/IB/0
+gpub026:2781297:2781382 [1] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [send] via NET/IB/0
+gpub026:2781297:2781382 [1] NCCL INFO Channel 00/0 : 25[46000] -> 24[7000] via P2P/IPC
+gpub026:2781297:2781382 [1] NCCL INFO Channel 01/0 : 25[46000] -> 24[7000] via P2P/IPC
+gpub026:2781297:2781382 [1] NCCL INFO Connected all trees
+gpub026:2781297:2781382 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub026:2781297:2781382 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub026:2781297:2781382 [1] NCCL INFO comm 0x50ca5540 rank 25 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub006:1859754:1859754 [2] NCCL INFO cudaDriverVersion 12010
+gpub006:1859754:1859754 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.106<0>
+gpub006:1859754:1859754 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub006:1859754:1859834 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.106<0>
+gpub006:1859754:1859834 [2] NCCL INFO Using network IB
+gpub006:1859754:1859834 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub006:1859754:1859834 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9
+gpub006:1859754:1859834 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC
+gpub006:1859754:1859834 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC
+gpub006:1859754:1859834 [2] NCCL INFO Connected all rings
+gpub006:1859754:1859834 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC
+gpub006:1859754:1859834 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC
+gpub006:1859754:1859834 [2] NCCL INFO Connected all trees
+gpub006:1859754:1859834 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub006:1859754:1859834 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub006:1859754:1859834 [2] NCCL INFO comm 0xa42aef0 rank 10 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub074:4055357:4055357 [1] NCCL INFO cudaDriverVersion 12010
+gpub074:4055357:4055357 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0>
+gpub074:4055357:4055357 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub074:4055357:4055427 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0>
+gpub074:4055357:4055427 [1] NCCL INFO Using network IB
+gpub074:4055357:4055427 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub074:4055357:4055427 [1] NCCL INFO Trees [0] 50/40/-1->49->48 [1] 50/-1/-1->49->48
+gpub074:4055357:4055427 [1] NCCL INFO Channel 00/0 : 49[46000] -> 50[85000] via P2P/IPC
+gpub074:4055357:4055427 [1] NCCL INFO Channel 01/0 : 49[46000] -> 50[85000] via P2P/IPC
+gpub074:4055357:4055427 [1] NCCL INFO Connected all rings
+gpub074:4055357:4055427 [1] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [receive] via NET/IB/0
+gpub074:4055357:4055427 [1] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [send] via NET/IB/0
+gpub074:4055357:4055427 [1] NCCL INFO Channel 00/0 : 49[46000] -> 48[7000] via P2P/IPC
+gpub074:4055357:4055427 [1] NCCL INFO Channel 01/0 : 49[46000] -> 48[7000] via P2P/IPC
+gpub074:4055357:4055427 [1] NCCL INFO Connected all trees
+gpub074:4055357:4055427 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub074:4055357:4055427 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub074:4055357:4055427 [1] NCCL INFO comm 0x8b30550 rank 49 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub079:3396494:3396494 [1] NCCL INFO cudaDriverVersion 12010
+gpub079:3396494:3396494 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.179<0>
+gpub079:3396494:3396494 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub079:3396494:3396575 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.179<0>
+gpub079:3396494:3396575 [1] NCCL INFO Using network IB
+gpub079:3396494:3396575 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub079:3396494:3396575 [1] NCCL INFO Trees [0] 62/-1/-1->61->60 [1] 62/-1/-1->61->60
+gpub079:3396494:3396575 [1] NCCL INFO Channel 00/0 : 61[46000] -> 62[85000] via P2P/IPC
+gpub079:3396494:3396575 [1] NCCL INFO Channel 01/0 : 61[46000] -> 62[85000] via P2P/IPC
+gpub079:3396494:3396575 [1] NCCL INFO Connected all rings
+gpub079:3396494:3396575 [1] NCCL INFO Channel 00/0 : 61[46000] -> 60[7000] via P2P/IPC
+gpub079:3396494:3396575 [1] NCCL INFO Channel 01/0 : 61[46000] -> 60[7000] via P2P/IPC
+gpub079:3396494:3396575 [1] NCCL INFO Connected all trees
+gpub079:3396494:3396575 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub079:3396494:3396575 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub079:3396494:3396575 [1] NCCL INFO comm 0xc163a90 rank 61 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub026:2781299:2781299 [3] NCCL INFO cudaDriverVersion 12010
+gpub026:2781299:2781299 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.126<0>
+gpub026:2781299:2781299 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub026:2781299:2781379 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.126<0>
+gpub026:2781299:2781379 [3] NCCL INFO Using network IB
+gpub026:2781299:2781379 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub026:2781299:2781379 [3] NCCL INFO Trees [0] -1/-1/-1->27->26 [1] -1/-1/-1->27->26
+gpub026:2781299:2781379 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0
+gpub026:2781299:2781379 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0
+gpub026:2781299:2781379 [3] NCCL INFO Connected all rings
+gpub026:2781299:2781379 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 26[85000] via P2P/IPC
+gpub026:2781299:2781379 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 26[85000] via P2P/IPC
+gpub026:2781299:2781379 [3] NCCL INFO Connected all trees
+gpub026:2781299:2781379 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub026:2781299:2781379 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub026:2781299:2781379 [3] NCCL INFO comm 0x507c61a0 rank 27 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub077:2521720:2521720 [2] NCCL INFO cudaDriverVersion 12010
+gpub077:2521720:2521720 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.177<0>
+gpub077:2521720:2521720 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub077:2521720:2521791 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.177<0>
+gpub077:2521720:2521791 [2] NCCL INFO Using network IB
+gpub077:2521720:2521791 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub077:2521720:2521791 [2] NCCL INFO Trees [0] 55/-1/-1->54->53 [1] 55/-1/-1->54->53
+gpub077:2521720:2521791 [2] NCCL INFO Channel 00/0 : 54[85000] -> 55[c7000] via P2P/IPC
+gpub077:2521720:2521791 [2] NCCL INFO Channel 01/0 : 54[85000] -> 55[c7000] via P2P/IPC
+gpub077:2521720:2521791 [2] NCCL INFO Connected all rings
+gpub077:2521720:2521791 [2] NCCL INFO Channel 00/0 : 54[85000] -> 53[46000] via P2P/IPC
+gpub077:2521720:2521791 [2] NCCL INFO Channel 01/0 : 54[85000] -> 53[46000] via P2P/IPC
+gpub077:2521720:2521791 [2] NCCL INFO Connected all trees
+gpub077:2521720:2521791 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub077:2521720:2521791 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub077:2521720:2521791 [2] NCCL INFO comm 0xa4c559d0 rank 54 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub006:1859752:1859752 [0] NCCL INFO cudaDriverVersion 12010
+gpub006:1859752:1859752 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.106<0>
+gpub006:1859752:1859752 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub006:1859752:1859836 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.106<0>
+gpub006:1859752:1859836 [0] NCCL INFO Using network IB
+gpub006:1859752:1859836 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub006:1859752:1859836 [0] NCCL INFO Trees [0] 9/12/-1->8->17 [1] 9/-1/-1->8->5
+gpub006:1859752:1859836 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0
+gpub006:1859752:1859836 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0
+gpub006:1859752:1859836 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC
+gpub006:1859752:1859836 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC
+gpub006:1859752:1859836 [0] NCCL INFO Connected all rings
+gpub006:1859752:1859836 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/IB/0
+gpub006:1859752:1859836 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/IB/0
+gpub006:1859752:1859836 [0] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [send] via NET/IB/0
+gpub006:1859752:1859836 [0] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [receive] via NET/IB/0
+gpub006:1859752:1859836 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/IB/0
+gpub006:1859752:1859836 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/IB/0
+gpub006:1859752:1859836 [0] NCCL INFO Connected all trees
+gpub006:1859752:1859836 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub006:1859752:1859836 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub006:1859752:1859836 [0] NCCL INFO comm 0x50278a40 rank 8 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub008:2990364:2990364 [0] NCCL INFO cudaDriverVersion 12010
+gpub008:2990364:2990364 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0>
+gpub008:2990364:2990364 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub008:2990364:2990440 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.108<0>
+gpub008:2990364:2990440 [0] NCCL INFO Using network IB
+gpub008:2990364:2990440 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub008:2990364:2990440 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->28
+gpub008:2990364:2990440 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0
+gpub008:2990364:2990440 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0
+gpub008:2990364:2990440 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC
+gpub008:2990364:2990440 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC
+gpub008:2990364:2990440 [0] NCCL INFO Connected all rings
+gpub008:2990364:2990440 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/IB/0
+gpub008:2990364:2990440 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/IB/0
+gpub008:2990364:2990440 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [send] via NET/IB/0
+gpub008:2990364:2990440 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [receive] via NET/IB/0
+gpub008:2990364:2990440 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/IB/0
+gpub008:2990364:2990440 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/IB/0
+gpub008:2990364:2990440 [0] NCCL INFO Connected all trees
+gpub008:2990364:2990440 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub008:2990364:2990440 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub008:2990364:2990440 [0] NCCL INFO comm 0xa229210 rank 12 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub077:2521718:2521718 [0] NCCL INFO cudaDriverVersion 12010
+gpub077:2521718:2521718 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.177<0>
+gpub077:2521718:2521718 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub077:2521718:2521789 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.177<0>
+gpub077:2521718:2521789 [0] NCCL INFO Using network IB
+gpub077:2521718:2521789 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub077:2521718:2521789 [0] NCCL INFO Trees [0] 53/-1/-1->52->57 [1] 53/48/-1->52->45
+gpub077:2521718:2521789 [0] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0
+gpub077:2521718:2521789 [0] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0
+gpub077:2521718:2521789 [0] NCCL INFO Channel 00/0 : 52[7000] -> 53[46000] via P2P/IPC
+gpub077:2521718:2521789 [0] NCCL INFO Channel 01/0 : 52[7000] -> 53[46000] via P2P/IPC
+gpub077:2521718:2521789 [0] NCCL INFO Connected all rings
+gpub077:2521718:2521789 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [receive] via NET/IB/0
+gpub077:2521718:2521789 [0] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [send] via NET/IB/0
+gpub077:2521718:2521789 [0] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [receive] via NET/IB/0
+gpub077:2521718:2521789 [0] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [send] via NET/IB/0
+gpub077:2521718:2521789 [0] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [receive] via NET/IB/0
+gpub077:2521718:2521789 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [send] via NET/IB/0
+gpub077:2521718:2521789 [0] NCCL INFO Connected all trees
+gpub077:2521718:2521789 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub077:2521718:2521789 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub077:2521718:2521789 [0] NCCL INFO comm 0x5162ad90 rank 52 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub008:2990366:2990366 [2] NCCL INFO cudaDriverVersion 12010
+gpub008:2990366:2990366 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0>
+gpub008:2990366:2990366 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub008:2990366:2990443 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.108<0>
+gpub008:2990366:2990443 [2] NCCL INFO Using network IB
+gpub008:2990366:2990443 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub008:2990366:2990443 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13
+gpub008:2990366:2990443 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC
+gpub008:2990366:2990443 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC
+gpub008:2990366:2990443 [2] NCCL INFO Connected all rings
+gpub008:2990366:2990443 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC
+gpub008:2990366:2990443 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC
+gpub008:2990366:2990443 [2] NCCL INFO Connected all trees
+gpub008:2990366:2990443 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub008:2990366:2990443 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub008:2990366:2990443 [2] NCCL INFO comm 0xb6d2c880 rank 14 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub074:4055356:4055356 [0] NCCL INFO cudaDriverVersion 12010
+gpub074:4055356:4055356 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0>
+gpub074:4055356:4055356 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub074:4055356:4055430 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0>
+gpub074:4055356:4055430 [0] NCCL INFO Using network IB
+gpub074:4055356:4055430 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub074:4055356:4055430 [0] NCCL INFO Trees [0] 49/56/-1->48->32 [1] 49/-1/-1->48->52
+gpub074:4055356:4055430 [0] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0
+gpub074:4055356:4055430 [0] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0
+gpub074:4055356:4055430 [0] NCCL INFO Channel 00/0 : 48[7000] -> 49[46000] via P2P/IPC
+gpub074:4055356:4055430 [0] NCCL INFO Channel 01/0 : 48[7000] -> 49[46000] via P2P/IPC
+gpub074:4055356:4055430 [0] NCCL INFO Connected all rings
+gpub074:4055356:4055430 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [send] via NET/IB/0
+gpub074:4055356:4055430 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [send] via NET/IB/0
+gpub074:4055356:4055430 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [receive] via NET/IB/0
+gpub074:4055356:4055430 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [send] via NET/IB/0
+gpub074:4055356:4055430 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [receive] via NET/IB/0
+gpub074:4055356:4055430 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [receive] via NET/IB/0
+gpub074:4055356:4055430 [0] NCCL INFO Connected all trees
+gpub074:4055356:4055430 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub074:4055356:4055430 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub074:4055356:4055430 [0] NCCL INFO comm 0x9c0ae50 rank 48 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub006:1859753:1859753 [1] NCCL INFO cudaDriverVersion 12010
+gpub006:1859753:1859753 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.106<0>
+gpub006:1859753:1859753 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub006:1859753:1859835 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.106<0>
+gpub006:1859753:1859835 [1] NCCL INFO Using network IB
+gpub006:1859753:1859835 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub006:1859753:1859835 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8
+gpub006:1859753:1859835 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC
+gpub006:1859753:1859835 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC
+gpub006:1859753:1859835 [1] NCCL INFO Connected all rings
+gpub006:1859753:1859835 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/IB/0
+gpub006:1859753:1859835 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/IB/0
+gpub006:1859753:1859835 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC
+gpub006:1859753:1859835 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC
+gpub006:1859753:1859835 [1] NCCL INFO Connected all trees
+gpub006:1859753:1859835 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub006:1859753:1859835 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub006:1859753:1859835 [1] NCCL INFO comm 0xa3eb5f0 rank 9 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub001:1052799:1052799 [1] NCCL INFO cudaDriverVersion 12010
+gpub001:1052799:1052799 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.101<0>
+gpub001:1052799:1052799 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub001:1052799:1052880 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.101<0>
+gpub001:1052799:1052880 [1] NCCL INFO Using network IB
+gpub001:1052799:1052880 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub001:1052799:1052880 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0
+gpub001:1052799:1052880 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC
+gpub001:1052799:1052880 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC
+gpub001:1052799:1052880 [1] NCCL INFO Connected all rings
+gpub001:1052799:1052880 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC
+gpub001:1052799:1052880 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC
+gpub001:1052799:1052880 [1] NCCL INFO Connected all trees
+gpub001:1052799:1052880 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub001:1052799:1052880 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub001:1052799:1052880 [1] NCCL INFO comm 0x50befe70 rank 1 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub001:1052801:1052801 [3] NCCL INFO cudaDriverVersion 12010
+gpub001:1052801:1052801 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.101<0>
+gpub001:1052801:1052801 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub001:1052801:1052879 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.101<0>
+gpub001:1052801:1052879 [3] NCCL INFO Using network IB
+gpub001:1052801:1052879 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub001:1052801:1052879 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2
+gpub001:1052801:1052879 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0
+gpub001:1052801:1052879 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0
+gpub001:1052801:1052879 [3] NCCL INFO Connected all rings
+gpub001:1052801:1052879 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC
+gpub001:1052801:1052879 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC
+gpub001:1052801:1052879 [3] NCCL INFO Connected all trees
+gpub001:1052801:1052879 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub001:1052801:1052879 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub001:1052801:1052879 [3] NCCL INFO comm 0xb78dc020 rank 3 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub026:2781298:2781298 [2] NCCL INFO cudaDriverVersion 12010
+gpub026:2781298:2781298 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.126<0>
+gpub026:2781298:2781298 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub026:2781298:2781380 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.126<0>
+gpub026:2781298:2781380 [2] NCCL INFO Using network IB
+gpub026:2781298:2781380 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub026:2781298:2781380 [2] NCCL INFO Trees [0] 27/-1/-1->26->25 [1] 27/-1/-1->26->25
+gpub026:2781298:2781380 [2] NCCL INFO Channel 00/0 : 26[85000] -> 27[c7000] via P2P/IPC
+gpub026:2781298:2781380 [2] NCCL INFO Channel 01/0 : 26[85000] -> 27[c7000] via P2P/IPC
+gpub026:2781298:2781380 [2] NCCL INFO Connected all rings
+gpub026:2781298:2781380 [2] NCCL INFO Channel 00/0 : 26[85000] -> 25[46000] via P2P/IPC
+gpub026:2781298:2781380 [2] NCCL INFO Channel 01/0 : 26[85000] -> 25[46000] via P2P/IPC
+gpub049:277664:277664 [2] NCCL INFO cudaDriverVersion 12010
+gpub049:277664:277664 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.149<0>
+gpub049:277664:277664 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub049:277664:277742 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.149<0>
+gpub049:277664:277742 [2] NCCL INFO Using network IB
+gpub049:277664:277742 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub049:277664:277742 [2] NCCL INFO Trees [0] 39/-1/-1->38->37 [1] 39/-1/-1->38->37
+gpub049:277664:277742 [2] NCCL INFO Channel 00/0 : 38[85000] -> 39[c7000] via P2P/IPC
+gpub049:277664:277742 [2] NCCL INFO Channel 01/0 : 38[85000] -> 39[c7000] via P2P/IPC
+gpub049:277664:277742 [2] NCCL INFO Connected all rings
+gpub049:277664:277742 [2] NCCL INFO Channel 00/0 : 38[85000] -> 37[46000] via P2P/IPC
+gpub049:277664:277742 [2] NCCL INFO Channel 01/0 : 38[85000] -> 37[46000] via P2P/IPC
+gpub049:277664:277742 [2] NCCL INFO Connected all trees
+gpub026:2781298:2781380 [2] NCCL INFO Connected all trees
+gpub026:2781298:2781380 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub026:2781298:2781380 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub026:2781298:2781380 [2] NCCL INFO comm 0x8e36d550 rank 26 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub049:277664:277742 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub049:277664:277742 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub049:277664:277742 [2] NCCL INFO comm 0x92096c0 rank 38 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub008:2990367:2990367 [3] NCCL INFO cudaDriverVersion 12010
+gpub008:2990367:2990367 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0>
+gpub008:2990367:2990367 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub008:2990367:2990442 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.108<0>
+gpub008:2990367:2990442 [3] NCCL INFO Using network IB
+gpub008:2990367:2990442 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub008:2990367:2990442 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14
+gpub008:2990367:2990442 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0
+gpub008:2990367:2990442 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0
+gpub008:2990367:2990442 [3] NCCL INFO Connected all rings
+gpub008:2990367:2990442 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC
+gpub008:2990367:2990442 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC
+gpub008:2990367:2990442 [3] NCCL INFO Connected all trees
+gpub008:2990367:2990442 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub008:2990367:2990442 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub008:2990367:2990442 [3] NCCL INFO comm 0x4fa470f0 rank 15 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub001:1052798:1052882 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.101<0>
+gpub001:1052798:1052882 [0] NCCL INFO Using network IB
+gpub001:1052798:1052882 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub001:1052798:1052882 [0] NCCL INFO Channel 00/02 :    0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19
+gpub001:1052798:1052882 [0] NCCL INFO Channel 01/02 :    0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19
+gpub001:1052798:1052882 [0] NCCL INFO Trees [0] 1/32/-1->0->-1 [1] 1/-1/-1->0->4
+gpub001:1052798:1052882 [0] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0
+gpub001:1052798:1052882 [0] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0
+gpub001:1052798:1052882 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC
+gpub001:1052798:1052882 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC
+gpub001:1052798:1052882 [0] NCCL INFO Connected all rings
+gpub001:1052798:1052882 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/IB/0
+gpub001:1052798:1052882 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [receive] via NET/IB/0
+gpub001:1052798:1052882 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [send] via NET/IB/0
+gpub001:1052798:1052882 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/IB/0
+gpub001:1052798:1052882 [0] NCCL INFO Connected all trees
+gpub001:1052798:1052882 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub001:1052798:1052882 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub001:1052798:1052882 [0] NCCL INFO comm 0x50dde690 rank 0 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub022:106667:106667 [1] NCCL INFO cudaDriverVersion 12010
+gpub022:106667:106667 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0>
+gpub022:106667:106667 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub022:106667:106747 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.122<0>
+gpub022:106667:106747 [1] NCCL INFO Using network IB
+gpub022:106667:106747 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub022:106667:106747 [1] NCCL INFO Trees [0] 18/8/-1->17->16 [1] 18/-1/-1->17->16
+gpub022:106667:106747 [1] NCCL INFO Channel 00/0 : 17[46000] -> 18[85000] via P2P/IPC
+gpub022:106667:106747 [1] NCCL INFO Channel 01/0 : 17[46000] -> 18[85000] via P2P/IPC
+gpub022:106667:106747 [1] NCCL INFO Connected all rings
+gpub022:106667:106747 [1] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [receive] via NET/IB/0
+gpub022:106667:106747 [1] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [send] via NET/IB/0
+gpub022:106667:106747 [1] NCCL INFO Channel 00/0 : 17[46000] -> 16[7000] via P2P/IPC
+gpub022:106667:106747 [1] NCCL INFO Channel 01/0 : 17[46000] -> 16[7000] via P2P/IPC
+gpub022:106667:106747 [1] NCCL INFO Connected all trees
+gpub022:106667:106747 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub022:106667:106747 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub022:106667:106747 [1] NCCL INFO comm 0x8f3e3330 rank 17 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub008:2990365:2990365 [1] NCCL INFO cudaDriverVersion 12010
+gpub008:2990365:2990365 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0>
+gpub008:2990365:2990365 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub008:2990365:2990441 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.108<0>
+gpub008:2990365:2990441 [1] NCCL INFO Using network IB
+gpub008:2990365:2990441 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub008:2990365:2990441 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/20/-1->13->12
+gpub008:2990365:2990441 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC
+gpub008:2990365:2990441 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC
+gpub008:2990365:2990441 [1] NCCL INFO Connected all rings
+gpub008:2990365:2990441 [1] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [send] via NET/IB/0
+gpub008:2990365:2990441 [1] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [receive] via NET/IB/0
+gpub008:2990365:2990441 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC
+gpub008:2990365:2990441 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC
+gpub008:2990365:2990441 [1] NCCL INFO Connected all trees
+gpub008:2990365:2990441 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub008:2990365:2990441 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub008:2990365:2990441 [1] NCCL INFO comm 0x98eccf0 rank 13 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub024:541987:541987 [2] NCCL INFO cudaDriverVersion 12010
+gpub024:541987:541987 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.124<0>
+gpub024:541987:541987 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub024:541987:542066 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.124<0>
+gpub024:541987:542066 [2] NCCL INFO Using network IB
+gpub024:541987:542066 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub024:541987:542066 [2] NCCL INFO Trees [0] 23/-1/-1->22->21 [1] 23/-1/-1->22->21
+gpub024:541987:542066 [2] NCCL INFO Channel 00/0 : 22[85000] -> 23[c7000] via P2P/IPC
+gpub024:541987:542066 [2] NCCL INFO Channel 01/0 : 22[85000] -> 23[c7000] via P2P/IPC
+gpub024:541987:542066 [2] NCCL INFO Connected all rings
+gpub024:541987:542066 [2] NCCL INFO Channel 00/0 : 22[85000] -> 21[46000] via P2P/IPC
+gpub024:541987:542066 [2] NCCL INFO Channel 01/0 : 22[85000] -> 21[46000] via P2P/IPC
+gpub024:541987:542066 [2] NCCL INFO Connected all trees
+gpub024:541987:542066 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub024:541987:542066 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub024:541987:542066 [2] NCCL INFO comm 0x505ff970 rank 22 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub001:1052800:1052800 [2] NCCL INFO cudaDriverVersion 12010
+gpub001:1052800:1052800 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.101<0>
+gpub001:1052800:1052800 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub001:1052800:1052881 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.101<0>
+gpub001:1052800:1052881 [2] NCCL INFO Using network IB
+gpub001:1052800:1052881 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub001:1052800:1052881 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1
+gpub001:1052800:1052881 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC
+gpub001:1052800:1052881 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC
+gpub001:1052800:1052881 [2] NCCL INFO Connected all rings
+gpub001:1052800:1052881 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC
+gpub001:1052800:1052881 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC
+gpub001:1052800:1052881 [2] NCCL INFO Connected all trees
+gpub001:1052800:1052881 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub001:1052800:1052881 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub001:1052800:1052881 [2] NCCL INFO comm 0x8e66c510 rank 2 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub022:106669:106669 [3] NCCL INFO cudaDriverVersion 12010
+gpub022:106669:106669 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0>
+gpub022:106669:106669 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub022:106669:106748 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.122<0>
+gpub022:106669:106748 [3] NCCL INFO Using network IB
+gpub022:106669:106748 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub022:106669:106748 [3] NCCL INFO Trees [0] -1/-1/-1->19->18 [1] -1/-1/-1->19->18
+gpub022:106669:106748 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0
+gpub022:106669:106748 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0
+gpub022:106669:106748 [3] NCCL INFO Connected all rings
+gpub022:106669:106748 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 18[85000] via P2P/IPC
+gpub022:106669:106748 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 18[85000] via P2P/IPC
+gpub022:106669:106748 [3] NCCL INFO Connected all trees
+gpub022:106669:106748 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub022:106669:106748 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub022:106669:106748 [3] NCCL INFO comm 0x4f1d7190 rank 19 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub026:2781296:2781296 [0] NCCL INFO cudaDriverVersion 12010
+gpub026:2781296:2781296 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.126<0>
+gpub026:2781296:2781296 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub026:2781296:2781381 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.126<0>
+gpub026:2781296:2781381 [0] NCCL INFO Using network IB
+gpub026:2781296:2781381 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub026:2781296:2781381 [0] NCCL INFO Trees [0] 25/28/-1->24->16 [1] 25/-1/-1->24->21
+gpub026:2781296:2781381 [0] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0
+gpub026:2781296:2781381 [0] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0
+gpub026:2781296:2781381 [0] NCCL INFO Channel 00/0 : 24[7000] -> 25[46000] via P2P/IPC
+gpub026:2781296:2781381 [0] NCCL INFO Channel 01/0 : 24[7000] -> 25[46000] via P2P/IPC
+gpub026:2781296:2781381 [0] NCCL INFO Connected all rings
+gpub026:2781296:2781381 [0] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [receive] via NET/IB/0
+gpub026:2781296:2781381 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [send] via NET/IB/0
+gpub026:2781296:2781381 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [receive] via NET/IB/0
+gpub026:2781296:2781381 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [send] via NET/IB/0
+gpub026:2781296:2781381 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [receive] via NET/IB/0
+gpub026:2781296:2781381 [0] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [send] via NET/IB/0
+gpub026:2781296:2781381 [0] NCCL INFO Connected all trees
+gpub026:2781296:2781381 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub026:2781296:2781381 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub026:2781296:2781381 [0] NCCL INFO comm 0xaebd9cd0 rank 24 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub024:541985:541985 [0] NCCL INFO cudaDriverVersion 12010
+gpub024:541985:541985 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.124<0>
+gpub024:541985:541985 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub024:541985:542068 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.124<0>
+gpub024:541985:542068 [0] NCCL INFO Using network IB
+gpub024:541985:542068 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub024:541985:542068 [0] NCCL INFO Trees [0] 21/-1/-1->20->25 [1] 21/16/-1->20->13
+gpub024:541985:542068 [0] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0
+gpub024:541985:542068 [0] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0
+gpub024:541985:542068 [0] NCCL INFO Channel 00/0 : 20[7000] -> 21[46000] via P2P/IPC
+gpub024:541985:542068 [0] NCCL INFO Channel 01/0 : 20[7000] -> 21[46000] via P2P/IPC
+gpub024:541985:542068 [0] NCCL INFO Connected all rings
+gpub024:541985:542068 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [receive] via NET/IB/0
+gpub024:541985:542068 [0] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [send] via NET/IB/0
+gpub024:541985:542068 [0] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [receive] via NET/IB/0
+gpub024:541985:542068 [0] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [send] via NET/IB/0
+gpub024:541985:542068 [0] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [receive] via NET/IB/0
+gpub024:541985:542068 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [send] via NET/IB/0
+gpub024:541985:542068 [0] NCCL INFO Connected all trees
+gpub024:541985:542068 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub024:541985:542068 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub024:541985:542068 [0] NCCL INFO comm 0x4ffe64c0 rank 20 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub022:106668:106668 [2] NCCL INFO cudaDriverVersion 12010
+gpub022:106668:106668 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0>
+gpub022:106668:106668 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub022:106668:106749 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.122<0>
+gpub022:106668:106749 [2] NCCL INFO Using network IB
+gpub022:106668:106749 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub022:106668:106749 [2] NCCL INFO Trees [0] 19/-1/-1->18->17 [1] 19/-1/-1->18->17
+gpub022:106668:106749 [2] NCCL INFO Channel 00/0 : 18[85000] -> 19[c7000] via P2P/IPC
+gpub022:106668:106749 [2] NCCL INFO Channel 01/0 : 18[85000] -> 19[c7000] via P2P/IPC
+gpub022:106668:106749 [2] NCCL INFO Connected all rings
+gpub022:106668:106749 [2] NCCL INFO Channel 00/0 : 18[85000] -> 17[46000] via P2P/IPC
+gpub022:106668:106749 [2] NCCL INFO Channel 01/0 : 18[85000] -> 17[46000] via P2P/IPC
+gpub022:106668:106749 [2] NCCL INFO Connected all trees
+gpub022:106668:106749 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub022:106668:106749 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub022:106668:106749 [2] NCCL INFO comm 0x4fd27690 rank 18 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub024:541988:541988 [3] NCCL INFO cudaDriverVersion 12010
+gpub024:541988:541988 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.124<0>
+gpub024:541988:541988 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub024:541988:542067 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.124<0>
+gpub024:541988:542067 [3] NCCL INFO Using network IB
+gpub024:541988:542067 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub024:541988:542067 [3] NCCL INFO Trees [0] -1/-1/-1->23->22 [1] -1/-1/-1->23->22
+gpub024:541988:542067 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0
+gpub024:541988:542067 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0
+gpub024:541988:542067 [3] NCCL INFO Connected all rings
+gpub024:541988:542067 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 22[85000] via P2P/IPC
+gpub024:541988:542067 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 22[85000] via P2P/IPC
+gpub024:541988:542067 [3] NCCL INFO Connected all trees
+gpub024:541988:542067 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub024:541988:542067 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub024:541988:542067 [3] NCCL INFO comm 0xb65fe8d0 rank 23 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub048:3933786:3933786 [3] NCCL INFO cudaDriverVersion 12010
+gpub048:3933786:3933786 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.148<0>
+gpub048:3933786:3933786 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub048:3933786:3933849 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.148<0>
+gpub048:3933786:3933849 [3] NCCL INFO Using network IB
+gpub048:3933786:3933849 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub048:3933786:3933849 [3] NCCL INFO Trees [0] -1/-1/-1->35->34 [1] -1/-1/-1->35->34
+gpub048:3933786:3933849 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0
+gpub048:3933786:3933849 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0
+gpub048:3933786:3933849 [3] NCCL INFO Connected all rings
+gpub048:3933786:3933849 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 34[85000] via P2P/IPC
+gpub048:3933786:3933849 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 34[85000] via P2P/IPC
+gpub048:3933786:3933849 [3] NCCL INFO Connected all trees
+gpub048:3933786:3933849 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub048:3933786:3933849 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub048:3933786:3933849 [3] NCCL INFO comm 0x8e08e110 rank 35 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub048:3933785:3933785 [2] NCCL INFO cudaDriverVersion 12010
+gpub048:3933785:3933785 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.148<0>
+gpub048:3933785:3933785 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub048:3933785:3933846 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.148<0>
+gpub048:3933785:3933846 [2] NCCL INFO Using network IB
+gpub048:3933785:3933846 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub048:3933785:3933846 [2] NCCL INFO Trees [0] 35/-1/-1->34->33 [1] 35/-1/-1->34->33
+gpub048:3933785:3933846 [2] NCCL INFO Channel 00/0 : 34[85000] -> 35[c7000] via P2P/IPC
+gpub048:3933785:3933846 [2] NCCL INFO Channel 01/0 : 34[85000] -> 35[c7000] via P2P/IPC
+gpub048:3933785:3933846 [2] NCCL INFO Connected all rings
+gpub048:3933785:3933846 [2] NCCL INFO Channel 00/0 : 34[85000] -> 33[46000] via P2P/IPC
+gpub048:3933785:3933846 [2] NCCL INFO Channel 01/0 : 34[85000] -> 33[46000] via P2P/IPC
+gpub048:3933785:3933846 [2] NCCL INFO Connected all trees
+gpub048:3933785:3933846 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub048:3933785:3933846 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub048:3933785:3933846 [2] NCCL INFO comm 0xb9dce190 rank 34 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub048:3933784:3933784 [1] NCCL INFO cudaDriverVersion 12010
+gpub048:3933784:3933784 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.148<0>
+gpub048:3933784:3933784 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub048:3933784:3933848 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.148<0>
+gpub048:3933784:3933848 [1] NCCL INFO Using network IB
+gpub048:3933784:3933848 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub048:3933784:3933848 [1] NCCL INFO Trees [0] 34/16/-1->33->32 [1] 34/-1/-1->33->32
+gpub048:3933784:3933848 [1] NCCL INFO Channel 00/0 : 33[46000] -> 34[85000] via P2P/IPC
+gpub048:3933784:3933848 [1] NCCL INFO Channel 01/0 : 33[46000] -> 34[85000] via P2P/IPC
+gpub048:3933784:3933848 [1] NCCL INFO Connected all rings
+gpub048:3933784:3933848 [1] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [receive] via NET/IB/0
+gpub048:3933784:3933848 [1] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [send] via NET/IB/0
+gpub048:3933784:3933848 [1] NCCL INFO Channel 00/0 : 33[46000] -> 32[7000] via P2P/IPC
+gpub048:3933784:3933848 [1] NCCL INFO Channel 01/0 : 33[46000] -> 32[7000] via P2P/IPC
+gpub048:3933784:3933848 [1] NCCL INFO Connected all trees
+gpub048:3933784:3933848 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub048:3933784:3933848 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub048:3933784:3933848 [1] NCCL INFO comm 0x9d3ee1d0 rank 33 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub024:541986:541986 [1] NCCL INFO cudaDriverVersion 12010
+gpub024:541986:541986 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.124<0>
+gpub024:541986:541986 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub024:541986:542065 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.124<0>
+gpub024:541986:542065 [1] NCCL INFO Using network IB
+gpub024:541986:542065 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub024:541986:542065 [1] NCCL INFO Trees [0] 22/-1/-1->21->20 [1] 22/24/-1->21->20
+gpub024:541986:542065 [1] NCCL INFO Channel 00/0 : 21[46000] -> 22[85000] via P2P/IPC
+gpub024:541986:542065 [1] NCCL INFO Channel 01/0 : 21[46000] -> 22[85000] via P2P/IPC
+gpub024:541986:542065 [1] NCCL INFO Connected all rings
+gpub024:541986:542065 [1] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [send] via NET/IB/0
+gpub024:541986:542065 [1] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [receive] via NET/IB/0
+gpub024:541986:542065 [1] NCCL INFO Channel 00/0 : 21[46000] -> 20[7000] via P2P/IPC
+gpub024:541986:542065 [1] NCCL INFO Channel 01/0 : 21[46000] -> 20[7000] via P2P/IPC
+gpub024:541986:542065 [1] NCCL INFO Connected all trees
+gpub024:541986:542065 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub024:541986:542065 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub024:541986:542065 [1] NCCL INFO comm 0x8c61ca80 rank 21 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub048:3933783:3933783 [0] NCCL INFO cudaDriverVersion 12010
+gpub048:3933783:3933783 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.148<0>
+gpub048:3933783:3933783 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub048:3933783:3933847 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.148<0>
+gpub048:3933783:3933847 [0] NCCL INFO Using network IB
+gpub048:3933783:3933847 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub048:3933783:3933847 [0] NCCL INFO Trees [0] 33/48/-1->32->0 [1] 33/-1/-1->32->36
+gpub048:3933783:3933847 [0] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0
+gpub048:3933783:3933847 [0] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0
+gpub048:3933783:3933847 [0] NCCL INFO Channel 00/0 : 32[7000] -> 33[46000] via P2P/IPC
+gpub048:3933783:3933847 [0] NCCL INFO Channel 01/0 : 32[7000] -> 33[46000] via P2P/IPC
+gpub048:3933783:3933847 [0] NCCL INFO Connected all rings
+gpub048:3933783:3933847 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [send] via NET/IB/0
+gpub048:3933783:3933847 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [send] via NET/IB/0
+gpub048:3933783:3933847 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [receive] via NET/IB/0
+gpub048:3933783:3933847 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [send] via NET/IB/0
+gpub048:3933783:3933847 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [receive] via NET/IB/0
+gpub048:3933783:3933847 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [receive] via NET/IB/0
+gpub048:3933783:3933847 [0] NCCL INFO Connected all trees
+gpub048:3933783:3933847 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub048:3933783:3933847 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub048:3933783:3933847 [0] NCCL INFO comm 0x8d070d10 rank 32 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub022:106666:106666 [0] NCCL INFO cudaDriverVersion 12010
+gpub022:106666:106666 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0>
+gpub022:106666:106666 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub022:106666:106746 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.122<0>
+gpub022:106666:106746 [0] NCCL INFO Using network IB
+gpub022:106666:106746 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub022:106666:106746 [0] NCCL INFO Trees [0] 17/24/-1->16->33 [1] 17/-1/-1->16->20
+gpub022:106666:106746 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0
+gpub022:106666:106746 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0
+gpub022:106666:106746 [0] NCCL INFO Channel 00/0 : 16[7000] -> 17[46000] via P2P/IPC
+gpub022:106666:106746 [0] NCCL INFO Channel 01/0 : 16[7000] -> 17[46000] via P2P/IPC
+gpub022:106666:106746 [0] NCCL INFO Connected all rings
+gpub022:106666:106746 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [send] via NET/IB/0
+gpub022:106666:106746 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [send] via NET/IB/0
+gpub022:106666:106746 [0] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [send] via NET/IB/0
+gpub022:106666:106746 [0] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [receive] via NET/IB/0
+gpub022:106666:106746 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [receive] via NET/IB/0
+gpub022:106666:106746 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [receive] via NET/IB/0
+gpub022:106666:106746 [0] NCCL INFO Connected all trees
+gpub022:106666:106746 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub022:106666:106746 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub022:106666:106746 [0] NCCL INFO comm 0x4ef16f50 rank 16 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[gpub001:0/64] 2023-07-14 13:38:31,549 (trainer:732) INFO: 49epoch:train:1-100batch: iter_time=1.254, forward_time=0.216, loss_ctc=75.424, loss_att=56.205, acc=0.707, loss=61.970, backward_time=1.042, grad_norm=126.161, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.157e-05, train_time=8.698
+[gpub001:0/64] 2023-07-14 13:40:47,428 (trainer:732) INFO: 49epoch:train:101-200batch: iter_time=1.324e-04, forward_time=0.143, loss_ctc=78.109, loss_att=58.369, acc=0.696, loss=64.291, backward_time=1.027, grad_norm=156.563, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.156e-05, train_time=2.718
+[gpub001:0/64] 2023-07-14 13:43:04,326 (trainer:732) INFO: 49epoch:train:201-300batch: iter_time=1.368e-04, forward_time=0.144, loss_ctc=71.274, loss_att=53.833, acc=0.706, loss=59.065, backward_time=1.034, grad_norm=117.395, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.156e-05, train_time=2.738
+[gpub001:0/64] 2023-07-14 13:45:19,908 (trainer:732) INFO: 49epoch:train:301-400batch: iter_time=1.152e-04, forward_time=0.140, loss_ctc=82.935, loss_att=67.130, acc=0.686, loss=71.872, backward_time=1.023, grad_norm=143.181, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.155e-05, train_time=2.711
+[gpub001:0/64] 2023-07-14 13:47:38,398 (trainer:732) INFO: 49epoch:train:401-500batch: iter_time=1.057e-04, forward_time=0.140, loss_ctc=67.558, loss_att=49.800, acc=0.725, loss=55.128, backward_time=1.028, grad_norm=137.364, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.155e-05, train_time=2.770
+[gpub001:0/64] 2023-07-14 13:49:52,942 (trainer:732) INFO: 49epoch:train:501-600batch: iter_time=1.077e-04, forward_time=0.139, loss_ctc=67.201, loss_att=46.260, acc=0.720, loss=52.542, backward_time=1.019, grad_norm=114.336, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.154e-05, train_time=2.691
+[gpub001:0/64] 2023-07-14 13:52:16,838 (trainer:732) INFO: 49epoch:train:601-700batch: iter_time=1.205e-04, forward_time=0.142, loss_ctc=70.668, loss_att=51.546, acc=0.714, loss=57.283, backward_time=1.035, grad_norm=119.406, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.154e-05, train_time=2.878
+[gpub001:0/64] 2023-07-14 13:54:35,492 (trainer:732) INFO: 49epoch:train:701-800batch: iter_time=1.298e-04, forward_time=0.142, loss_ctc=61.423, loss_att=43.823, acc=0.717, loss=49.103, backward_time=1.026, grad_norm=107.937, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.153e-05, train_time=2.773
+[gpub001:0/64] 2023-07-14 13:55:27,347 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub001:0/64] 2023-07-14 13:55:45,050 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 13:55:48,387 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdca0d2b910>)
+[gpub001:0/64] 2023-07-14 13:55:48,388 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub001:0/64] 2023-07-14 13:55:48,394 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 14:01:54,878 (trainer:732) INFO: 49epoch:train:801-900batch: iter_time=1.305, forward_time=0.165, loss_ctc=83.766, loss_att=63.502, acc=0.709, loss=69.582, backward_time=1.037, grad_norm=161.753, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.153e-05, train_time=8.787
+[gpub001:0/64] 2023-07-14 14:04:11,115 (trainer:732) INFO: 49epoch:train:901-1000batch: iter_time=1.176e-04, forward_time=0.143, loss_ctc=74.996, loss_att=53.416, acc=0.702, loss=59.890, backward_time=1.026, grad_norm=131.720, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.152e-05, train_time=2.725
+[gpub001:0/64] 2023-07-14 14:06:26,981 (trainer:732) INFO: 49epoch:train:1001-1100batch: iter_time=1.237e-04, forward_time=0.143, loss_ctc=71.753, loss_att=56.326, acc=0.702, loss=60.954, backward_time=1.025, grad_norm=130.090, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.152e-05, train_time=2.717
+[gpub001:0/64] 2023-07-14 14:08:43,048 (trainer:732) INFO: 49epoch:train:1101-1200batch: iter_time=1.122e-04, forward_time=0.142, loss_ctc=79.119, loss_att=61.363, acc=0.706, loss=66.690, backward_time=1.027, grad_norm=115.657, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.151e-05, train_time=2.721
+[gpub001:0/64] 2023-07-14 14:10:58,405 (trainer:732) INFO: 49epoch:train:1201-1300batch: iter_time=1.299e-04, forward_time=0.143, loss_ctc=72.908, loss_att=53.208, acc=0.712, loss=59.118, backward_time=1.023, grad_norm=115.476, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.150e-05, train_time=2.707
+[gpub001:0/64] 2023-07-14 14:13:13,436 (trainer:732) INFO: 49epoch:train:1301-1400batch: iter_time=1.351e-04, forward_time=0.142, loss_ctc=61.698, loss_att=43.601, acc=0.720, loss=49.030, backward_time=1.022, grad_norm=118.028, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.150e-05, train_time=2.700
+[gpub001:0/64] 2023-07-14 14:15:28,715 (trainer:732) INFO: 49epoch:train:1401-1500batch: iter_time=1.272e-04, forward_time=0.143, loss_ctc=69.799, loss_att=51.631, acc=0.718, loss=57.082, backward_time=1.023, grad_norm=137.188, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.149e-05, train_time=2.705
+[gpub001:0/64] 2023-07-14 14:17:44,041 (trainer:732) INFO: 49epoch:train:1501-1600batch: iter_time=1.310e-04, forward_time=0.143, loss_ctc=62.191, loss_att=43.167, acc=0.720, loss=48.874, backward_time=1.024, grad_norm=110.624, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.149e-05, train_time=2.706
+[gpub001:0/64] 2023-07-14 14:19:28,411 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub001:0/64] 2023-07-14 14:19:46,106 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 14:19:49,464 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdc3133ae30>)
+[gpub001:0/64] 2023-07-14 14:19:49,464 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub001:0/64] 2023-07-14 14:19:49,518 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 14:24:53,336 (trainer:732) INFO: 49epoch:train:1601-1700batch: iter_time=2.398, forward_time=0.158, loss_ctc=86.888, loss_att=63.993, acc=0.702, loss=70.861, backward_time=1.039, grad_norm=144.375, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.148e-05, train_time=8.586
+[gpub001:0/64] 2023-07-14 14:27:11,824 (trainer:732) INFO: 49epoch:train:1701-1800batch: iter_time=1.060e-04, forward_time=0.144, loss_ctc=72.288, loss_att=55.796, acc=0.708, loss=60.743, backward_time=1.032, grad_norm=127.355, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.148e-05, train_time=2.770
+[gpub001:0/64] 2023-07-14 14:29:27,834 (trainer:732) INFO: 49epoch:train:1801-1900batch: iter_time=1.113e-04, forward_time=0.145, loss_ctc=72.461, loss_att=50.750, acc=0.721, loss=57.264, backward_time=1.026, grad_norm=124.107, clip=100.000, loss_scale=5.127e+32, optim_step_time=0.181, optim0_lr0=5.147e-05, train_time=2.720
+[gpub001:0/64] 2023-07-14 14:31:43,989 (trainer:732) INFO: 49epoch:train:1901-2000batch: iter_time=1.183e-04, forward_time=0.144, loss_ctc=77.931, loss_att=60.068, acc=0.711, loss=65.427, backward_time=1.028, grad_norm=134.506, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.180, optim0_lr0=5.147e-05, train_time=2.723
+[gpub001:0/64] 2023-07-14 14:34:00,002 (trainer:732) INFO: 49epoch:train:2001-2100batch: iter_time=1.221e-04, forward_time=0.144, loss_ctc=74.295, loss_att=58.950, acc=0.728, loss=63.554, backward_time=1.026, grad_norm=139.921, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.180, optim0_lr0=5.146e-05, train_time=2.720
+[gpub001:0/64] 2023-07-14 14:35:34,874 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub001:0/64] 2023-07-14 14:36:15,530 (trainer:732) INFO: 49epoch:train:2101-2200batch: iter_time=1.218e-04, forward_time=0.144, loss_ctc=66.147, loss_att=47.940, acc=0.731, loss=53.402, backward_time=1.025, grad_norm=127.949, clip=100.000, loss_scale=5.497e+32, optim_step_time=0.180, optim0_lr0=5.146e-05, train_time=2.710
+[gpub001:0/64] 2023-07-14 14:38:31,109 (trainer:732) INFO: 49epoch:train:2201-2300batch: iter_time=1.115e-04, forward_time=0.144, loss_ctc=63.495, loss_att=43.732, acc=0.732, loss=49.661, backward_time=1.025, grad_norm=107.717, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.145e-05, train_time=2.711
+[gpub001:0/64] 2023-07-14 14:40:46,863 (trainer:732) INFO: 49epoch:train:2301-2400batch: iter_time=1.134e-04, forward_time=0.144, loss_ctc=70.905, loss_att=53.186, acc=0.722, loss=58.502, backward_time=1.025, grad_norm=119.707, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.144e-05, train_time=2.715
+[gpub001:0/64] 2023-07-14 14:43:02,330 (trainer:732) INFO: 49epoch:train:2401-2500batch: iter_time=1.097e-04, forward_time=0.143, loss_ctc=71.274, loss_att=49.885, acc=0.724, loss=56.302, backward_time=1.024, grad_norm=140.068, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.144e-05, train_time=2.709
+[gpub001:0/64] 2023-07-14 14:43:03,512 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub001:0/64] 2023-07-14 14:43:21,506 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 14:43:24,979 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fde3ed43460>)
+[gpub001:0/64] 2023-07-14 14:43:24,979 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub001:0/64] 2023-07-14 14:43:24,985 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 14:50:14,455 (trainer:732) INFO: 49epoch:train:2501-2600batch: iter_time=1.236, forward_time=0.144, loss_ctc=77.943, loss_att=56.932, acc=0.709, loss=63.235, backward_time=1.048, grad_norm=215.635, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.143e-05, train_time=8.642
+[gpub001:0/64] 2023-07-14 14:52:30,604 (trainer:732) INFO: 49epoch:train:2601-2700batch: iter_time=1.298e-04, forward_time=0.144, loss_ctc=76.520, loss_att=56.178, acc=0.714, loss=62.281, backward_time=1.026, grad_norm=138.030, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.143e-05, train_time=2.723
+[gpub001:0/64] 2023-07-14 14:54:46,489 (trainer:732) INFO: 49epoch:train:2701-2800batch: iter_time=1.242e-04, forward_time=0.145, loss_ctc=70.928, loss_att=50.393, acc=0.725, loss=56.554, backward_time=1.025, grad_norm=113.963, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.142e-05, train_time=2.717
+[gpub001:0/64] 2023-07-14 14:57:02,630 (trainer:732) INFO: 49epoch:train:2801-2900batch: iter_time=1.260e-04, forward_time=0.145, loss_ctc=80.053, loss_att=64.710, acc=0.711, loss=69.313, backward_time=1.028, grad_norm=130.479, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.142e-05, train_time=2.723
+[gpub001:0/64] 2023-07-14 14:59:17,960 (trainer:732) INFO: 49epoch:train:2901-3000batch: iter_time=1.280e-04, forward_time=0.143, loss_ctc=67.583, loss_att=50.322, acc=0.734, loss=55.500, backward_time=1.023, grad_norm=118.809, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.141e-05, train_time=2.706
+[gpub001:0/64] 2023-07-14 15:01:33,622 (trainer:732) INFO: 49epoch:train:3001-3100batch: iter_time=1.303e-04, forward_time=0.146, loss_ctc=64.848, loss_att=44.323, acc=0.737, loss=50.480, backward_time=1.024, grad_norm=131.361, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.141e-05, train_time=2.713
+[gpub001:0/64] 2023-07-14 15:03:48,945 (trainer:732) INFO: 49epoch:train:3101-3200batch: iter_time=1.302e-04, forward_time=0.144, loss_ctc=68.493, loss_att=50.724, acc=0.731, loss=56.054, backward_time=1.022, grad_norm=139.131, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.140e-05, train_time=2.706
+[gpub001:0/64] 2023-07-14 15:06:04,272 (trainer:732) INFO: 49epoch:train:3201-3300batch: iter_time=1.350e-04, forward_time=0.144, loss_ctc=62.583, loss_att=44.592, acc=0.725, loss=49.990, backward_time=1.023, grad_norm=116.550, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.140e-05, train_time=2.706
+[gpub001:0/64] 2023-07-14 15:06:50,474 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub001:0/64] 2023-07-14 15:07:08,807 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 15:07:12,196 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbf63e3be0>)
+[gpub001:0/64] 2023-07-14 15:07:12,197 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub001:0/64] 2023-07-14 15:07:12,203 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 15:12:51,478 (trainer:732) INFO: 49epoch:train:3301-3400batch: iter_time=1.286, forward_time=0.144, loss_ctc=82.374, loss_att=58.429, acc=0.718, loss=65.612, backward_time=1.042, grad_norm=161.884, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.139e-05, train_time=8.144
+[gpub001:0/64] 2023-07-14 15:15:09,454 (trainer:732) INFO: 49epoch:train:3401-3500batch: iter_time=1.225e-04, forward_time=0.143, loss_ctc=72.824, loss_att=56.074, acc=0.705, loss=61.099, backward_time=1.027, grad_norm=124.071, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.138e-05, train_time=2.759
+[gpub001:0/64] 2023-07-14 15:16:18,542 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub001:0/64] 2023-07-14 15:17:26,359 (trainer:732) INFO: 49epoch:train:3501-3600batch: iter_time=1.224e-04, forward_time=0.144, loss_ctc=70.789, loss_att=51.064, acc=0.720, loss=56.982, backward_time=1.025, grad_norm=127.943, clip=100.000, loss_scale=2.417e+32, optim_step_time=0.180, optim0_lr0=5.138e-05, train_time=2.738
+[gpub001:0/64] 2023-07-14 15:19:44,415 (trainer:732) INFO: 49epoch:train:3601-3700batch: iter_time=1.162e-04, forward_time=0.144, loss_ctc=81.997, loss_att=65.961, acc=0.690, loss=70.771, backward_time=1.027, grad_norm=125.683, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.137e-05, train_time=2.761
+[gpub001:0/64] 2023-07-14 15:22:04,902 (trainer:732) INFO: 49epoch:train:3701-3800batch: iter_time=1.162e-04, forward_time=0.144, loss_ctc=68.446, loss_att=49.301, acc=0.730, loss=55.044, backward_time=1.028, grad_norm=115.877, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.137e-05, train_time=2.810
+[gpub001:0/64] 2023-07-14 15:24:23,287 (trainer:732) INFO: 49epoch:train:3801-3900batch: iter_time=1.214e-04, forward_time=0.144, loss_ctc=70.058, loss_att=52.734, acc=0.711, loss=57.931, backward_time=1.024, grad_norm=135.739, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.136e-05, train_time=2.767
+[gpub001:0/64] 2023-07-14 15:26:40,552 (trainer:732) INFO: 49epoch:train:3901-4000batch: iter_time=1.206e-04, forward_time=0.145, loss_ctc=63.341, loss_att=45.010, acc=0.729, loss=50.510, backward_time=1.027, grad_norm=115.671, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.136e-05, train_time=2.745
+[gpub001:0/64] 2023-07-14 15:28:56,692 (trainer:732) INFO: 49epoch:train:4001-4100batch: iter_time=1.112e-04, forward_time=0.145, loss_ctc=67.558, loss_att=49.060, acc=0.721, loss=54.609, backward_time=1.024, grad_norm=113.122, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.135e-05, train_time=2.723
+[gpub001:0/64] 2023-07-14 15:30:27,503 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub001:0/64] 2023-07-14 15:30:45,587 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 15:30:49,041 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbecfc7490>)
+[gpub001:0/64] 2023-07-14 15:30:49,041 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub001:0/64] 2023-07-14 15:30:49,047 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 15:35:52,270 (trainer:732) INFO: 49epoch:train:4101-4200batch: iter_time=1.261, forward_time=0.144, loss_ctc=70.221, loss_att=53.085, acc=0.717, loss=58.226, backward_time=1.035, grad_norm=140.521, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.135e-05, train_time=8.311
+[gpub001:0/64] 2023-07-14 15:38:08,843 (trainer:732) INFO: 49epoch:train:4201-4300batch: iter_time=1.161e-04, forward_time=0.144, loss_ctc=73.382, loss_att=57.566, acc=0.707, loss=62.311, backward_time=1.029, grad_norm=116.208, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.134e-05, train_time=2.731
+[gpub001:0/64] 2023-07-14 15:40:24,503 (trainer:732) INFO: 49epoch:train:4301-4400batch: iter_time=1.151e-04, forward_time=0.144, loss_ctc=72.437, loss_att=50.993, acc=0.718, loss=57.426, backward_time=1.025, grad_norm=135.729, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.134e-05, train_time=2.713
+[gpub001:0/64] 2023-07-14 15:42:39,948 (trainer:732) INFO: 49epoch:train:4401-4500batch: iter_time=1.171e-04, forward_time=0.144, loss_ctc=79.101, loss_att=66.083, acc=0.685, loss=69.989, backward_time=1.023, grad_norm=131.828, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.133e-05, train_time=2.709
+[gpub001:0/64] 2023-07-14 15:44:55,456 (trainer:732) INFO: 49epoch:train:4501-4600batch: iter_time=1.249e-04, forward_time=0.143, loss_ctc=67.710, loss_att=48.912, acc=0.733, loss=54.551, backward_time=1.023, grad_norm=151.097, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.133e-05, train_time=2.710
+[gpub001:0/64] 2023-07-14 15:47:10,473 (trainer:732) INFO: 49epoch:train:4601-4700batch: iter_time=1.201e-04, forward_time=0.142, loss_ctc=67.960, loss_att=47.417, acc=0.714, loss=53.580, backward_time=1.021, grad_norm=139.328, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.132e-05, train_time=2.700
+[gpub001:0/64] 2023-07-14 15:49:28,348 (trainer:732) INFO: 49epoch:train:4701-4800batch: iter_time=1.428e-04, forward_time=0.143, loss_ctc=66.335, loss_att=46.579, acc=0.730, loss=52.506, backward_time=1.025, grad_norm=121.960, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.131e-05, train_time=2.757
+[gpub001:0/64] 2023-07-14 15:51:43,745 (trainer:732) INFO: 49epoch:train:4801-4900batch: iter_time=1.151e-04, forward_time=0.144, loss_ctc=65.075, loss_att=47.591, acc=0.717, loss=52.836, backward_time=1.024, grad_norm=142.515, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.131e-05, train_time=2.708
+[gpub001:0/64] 2023-07-14 15:53:59,339 (trainer:732) INFO: 49epoch:train:4901-5000batch: iter_time=1.186e-04, forward_time=0.144, loss_ctc=76.840, loss_att=55.520, acc=0.720, loss=61.916, backward_time=1.025, grad_norm=146.307, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.130e-05, train_time=2.712
+[gpub001:0/64] 2023-07-14 15:54:01,002 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub001:0/64] 2023-07-14 15:54:18,910 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 15:54:22,671 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbf6585270>)
+[gpub001:0/64] 2023-07-14 15:54:22,671 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub001:0/64] 2023-07-14 15:54:22,677 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 16:02:02,572 (trainer:732) INFO: 49epoch:train:5001-5100batch: iter_time=1.246, forward_time=0.173, loss_ctc=70.620, loss_att=55.070, acc=0.710, loss=59.735, backward_time=1.102, grad_norm=143.891, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=5.130e-05, train_time=9.664
+[gpub001:0/64] 2023-07-14 16:05:05,429 (trainer:732) INFO: 49epoch:train:5101-5200batch: iter_time=1.234e-04, forward_time=0.144, loss_ctc=74.968, loss_att=53.708, acc=0.725, loss=60.086, backward_time=1.111, grad_norm=140.062, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.129e-05, train_time=3.657
+[gpub001:0/64] 2023-07-14 16:07:39,205 (trainer:732) INFO: 49epoch:train:5201-5300batch: iter_time=1.230e-04, forward_time=0.143, loss_ctc=78.360, loss_att=62.863, acc=0.701, loss=67.512, backward_time=1.045, grad_norm=129.656, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.129e-05, train_time=3.075
+[gpub001:0/64] 2023-07-14 16:10:30,819 (trainer:732) INFO: 49epoch:train:5301-5400batch: iter_time=1.198e-04, forward_time=0.143, loss_ctc=70.380, loss_att=52.634, acc=0.739, loss=57.958, backward_time=1.065, grad_norm=145.551, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.179, optim0_lr0=5.128e-05, train_time=3.432
+[gpub001:0/64] 2023-07-14 16:13:06,699 (trainer:732) INFO: 49epoch:train:5401-5500batch: iter_time=1.248e-04, forward_time=0.144, loss_ctc=66.159, loss_att=49.037, acc=0.727, loss=54.174, backward_time=1.050, grad_norm=127.077, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.128e-05, train_time=3.117
+[gpub001:0/64] 2023-07-14 16:15:41,679 (trainer:732) INFO: 49epoch:train:5501-5600batch: iter_time=1.175e-04, forward_time=0.143, loss_ctc=63.480, loss_att=42.575, acc=0.741, loss=48.847, backward_time=1.045, grad_norm=114.198, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.127e-05, train_time=3.099
+[gpub001:0/64] 2023-07-14 16:18:02,778 (trainer:732) INFO: 49epoch:train:5601-5700batch: iter_time=1.207e-04, forward_time=0.143, loss_ctc=71.660, loss_att=53.814, acc=0.721, loss=59.168, backward_time=1.031, grad_norm=134.301, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.127e-05, train_time=2.822
+[gpub001:0/64] 2023-07-14 16:20:31,593 (trainer:732) INFO: 49epoch:train:5701-5800batch: iter_time=1.171e-04, forward_time=0.144, loss_ctc=71.301, loss_att=52.766, acc=0.725, loss=58.326, backward_time=1.041, grad_norm=137.471, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.126e-05, train_time=2.976
+[gpub001:0/64] 2023-07-14 16:21:30,492 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub001:0/64] 2023-07-14 16:21:48,619 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 16:21:52,043 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbd9e2ff70>)
+[gpub001:0/64] 2023-07-14 16:21:52,043 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub001:0/64] 2023-07-14 16:21:52,049 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 16:27:57,645 (trainer:732) INFO: 49epoch:train:5801-5900batch: iter_time=1.522, forward_time=0.161, loss_ctc=73.115, loss_att=49.116, acc=0.724, loss=56.316, backward_time=1.040, grad_norm=136.046, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.126e-05, train_time=8.921
+[gpub001:0/64] 2023-07-14 16:30:13,954 (trainer:732) INFO: 49epoch:train:5901-6000batch: iter_time=1.296e-04, forward_time=0.144, loss_ctc=71.267, loss_att=55.700, acc=0.705, loss=60.370, backward_time=1.026, grad_norm=123.436, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.125e-05, train_time=2.726
+[gpub001:0/64] 2023-07-14 16:32:30,026 (trainer:732) INFO: 49epoch:train:6001-6100batch: iter_time=1.304e-04, forward_time=0.144, loss_ctc=70.944, loss_att=50.849, acc=0.722, loss=56.877, backward_time=1.022, grad_norm=205.768, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.124e-05, train_time=2.721
+[gpub001:0/64] 2023-07-14 16:34:45,468 (trainer:732) INFO: 49epoch:train:6101-6200batch: iter_time=1.363e-04, forward_time=0.144, loss_ctc=80.848, loss_att=65.774, acc=0.690, loss=70.296, backward_time=1.024, grad_norm=189.035, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.124e-05, train_time=2.709
+[gpub001:0/64] 2023-07-14 16:37:00,897 (trainer:732) INFO: 49epoch:train:6201-6300batch: iter_time=1.565e-04, forward_time=0.144, loss_ctc=68.282, loss_att=48.857, acc=0.734, loss=54.685, backward_time=1.025, grad_norm=112.677, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.123e-05, train_time=2.708
+[gpub001:0/64] 2023-07-14 16:39:16,422 (trainer:732) INFO: 49epoch:train:6301-6400batch: iter_time=1.438e-04, forward_time=0.144, loss_ctc=69.857, loss_att=51.306, acc=0.719, loss=56.871, backward_time=1.024, grad_norm=136.182, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.123e-05, train_time=2.710
+[gpub001:0/64] 2023-07-14 16:41:31,597 (trainer:732) INFO: 49epoch:train:6401-6500batch: iter_time=1.378e-04, forward_time=0.144, loss_ctc=62.547, loss_att=44.512, acc=0.727, loss=49.923, backward_time=1.022, grad_norm=115.827, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.122e-05, train_time=2.703
+[gpub001:0/64] 2023-07-14 16:43:46,948 (trainer:732) INFO: 49epoch:train:6501-6600batch: iter_time=1.210e-04, forward_time=0.145, loss_ctc=68.154, loss_att=48.823, acc=0.723, loss=54.622, backward_time=1.024, grad_norm=127.783, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.122e-05, train_time=2.707
+[gpub001:0/64] 2023-07-14 16:45:29,079 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub001:0/64] 2023-07-14 16:45:47,237 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 16:45:50,693 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbf65db4f0>)
+[gpub001:0/64] 2023-07-14 16:45:50,693 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub001:0/64] 2023-07-14 16:45:50,699 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 16:49:39,620 (trainer:732) INFO: 49epoch:train:6601-6700batch: iter_time=2.079, forward_time=0.183, loss_ctc=76.672, loss_att=56.369, acc=0.715, loss=62.460, backward_time=1.034, grad_norm=132.969, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=5.121e-05, train_time=7.053
+[gpub001:0/64] 2023-07-14 16:51:56,314 (trainer:732) INFO: 49epoch:train:6701-6800batch: iter_time=1.109e-04, forward_time=0.145, loss_ctc=70.489, loss_att=56.214, acc=0.716, loss=60.496, backward_time=1.027, grad_norm=152.693, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.121e-05, train_time=2.734
+[gpub001:0/64] 2023-07-14 16:54:12,308 (trainer:732) INFO: 49epoch:train:6801-6900batch: iter_time=1.133e-04, forward_time=0.143, loss_ctc=71.773, loss_att=49.137, acc=0.730, loss=55.928, backward_time=1.027, grad_norm=116.515, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.120e-05, train_time=2.720
+[gpub001:0/64] 2023-07-14 16:56:28,064 (trainer:732) INFO: 49epoch:train:6901-7000batch: iter_time=1.082e-04, forward_time=0.145, loss_ctc=76.239, loss_att=59.077, acc=0.716, loss=64.225, backward_time=1.026, grad_norm=122.696, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.120e-05, train_time=2.715
+[gpub001:0/64] 2023-07-14 16:58:43,796 (trainer:732) INFO: 49epoch:train:7001-7100batch: iter_time=1.142e-04, forward_time=0.144, loss_ctc=73.758, loss_att=58.472, acc=0.731, loss=63.058, backward_time=1.026, grad_norm=119.532, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.119e-05, train_time=2.714
+[gpub001:0/64] 2023-07-14 17:00:59,084 (trainer:732) INFO: 49epoch:train:7101-7200batch: iter_time=1.003e-04, forward_time=0.142, loss_ctc=67.069, loss_att=48.465, acc=0.731, loss=54.046, backward_time=1.023, grad_norm=124.919, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.119e-05, train_time=2.706
+[gpub001:0/64] 2023-07-14 17:03:14,636 (trainer:732) INFO: 49epoch:train:7201-7300batch: iter_time=9.950e-05, forward_time=0.144, loss_ctc=63.394, loss_att=43.420, acc=0.735, loss=49.412, backward_time=1.024, grad_norm=118.629, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.179, optim0_lr0=5.118e-05, train_time=2.711
+[gpub001:0/64] 2023-07-14 17:05:30,294 (trainer:732) INFO: 49epoch:train:7301-7400batch: iter_time=1.066e-04, forward_time=0.144, loss_ctc=70.573, loss_att=52.037, acc=0.731, loss=57.598, backward_time=1.025, grad_norm=138.691, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.117e-05, train_time=2.713
+[gpub001:0/64] 2023-07-14 17:07:45,765 (trainer:732) INFO: 49epoch:train:7401-7500batch: iter_time=9.254e-05, forward_time=0.144, loss_ctc=67.756, loss_att=48.815, acc=0.728, loss=54.498, backward_time=1.025, grad_norm=152.362, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.117e-05, train_time=2.709
+[gpub001:0/64] 2023-07-14 17:07:47,423 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub001:0/64] 2023-07-14 17:08:05,615 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 17:08:09,029 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd0f38b7d30>)
+[gpub001:0/64] 2023-07-14 17:08:09,029 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub001:0/64] 2023-07-14 17:08:09,035 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 17:15:00,221 (trainer:732) INFO: 49epoch:train:7501-7600batch: iter_time=1.254, forward_time=0.144, loss_ctc=73.840, loss_att=54.828, acc=0.713, loss=60.532, backward_time=1.035, grad_norm=126.378, clip=100.000, loss_scale=2.434e+32, optim_step_time=0.180, optim0_lr0=5.116e-05, train_time=8.689
+[gpub001:0/64] 2023-07-14 17:17:16,603 (trainer:732) INFO: 49epoch:train:7601-7700batch: iter_time=1.198e-04, forward_time=0.144, loss_ctc=73.851, loss_att=54.824, acc=0.712, loss=60.532, backward_time=1.027, grad_norm=149.921, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.116e-05, train_time=2.727
+[gpub001:0/64] 2023-07-14 17:19:32,240 (trainer:732) INFO: 49epoch:train:7701-7800batch: iter_time=1.247e-04, forward_time=0.145, loss_ctc=69.949, loss_att=52.432, acc=0.715, loss=57.687, backward_time=1.024, grad_norm=117.932, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.115e-05, train_time=2.712
+[gpub001:0/64] 2023-07-14 17:21:47,986 (trainer:732) INFO: 49epoch:train:7801-7900batch: iter_time=1.104e-04, forward_time=0.145, loss_ctc=80.270, loss_att=64.561, acc=0.698, loss=69.274, backward_time=1.027, grad_norm=140.177, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.115e-05, train_time=2.715
+[gpub001:0/64] 2023-07-14 17:24:03,451 (trainer:732) INFO: 49epoch:train:7901-8000batch: iter_time=1.315e-04, forward_time=0.145, loss_ctc=65.982, loss_att=48.615, acc=0.731, loss=53.825, backward_time=1.024, grad_norm=143.731, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.114e-05, train_time=2.709
+[gpub001:0/64] 2023-07-14 17:26:18,815 (trainer:732) INFO: 49epoch:train:8001-8100batch: iter_time=1.331e-04, forward_time=0.145, loss_ctc=63.870, loss_att=44.066, acc=0.731, loss=50.007, backward_time=1.023, grad_norm=124.882, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.114e-05, train_time=2.707
+[gpub001:0/64] 2023-07-14 17:28:34,027 (trainer:732) INFO: 49epoch:train:8101-8200batch: iter_time=1.181e-04, forward_time=0.144, loss_ctc=68.490, loss_att=50.287, acc=0.723, loss=55.748, backward_time=1.021, grad_norm=116.591, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.113e-05, train_time=2.704
+[gpub001:0/64] 2023-07-14 17:30:49,344 (trainer:732) INFO: 49epoch:train:8201-8300batch: iter_time=1.236e-04, forward_time=0.145, loss_ctc=60.268, loss_att=42.861, acc=0.725, loss=48.083, backward_time=1.021, grad_norm=105.963, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.113e-05, train_time=2.706
+[gpub001:0/64] 2023-07-14 17:31:35,992 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub001:0/64] 2023-07-14 17:31:53,991 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 17:31:57,440 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbd84677c0>)
+[gpub001:0/64] 2023-07-14 17:31:57,440 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub001:0/64] 2023-07-14 17:31:57,446 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 17:38:13,424 (trainer:732) INFO: 49epoch:train:8301-8400batch: iter_time=1.214, forward_time=0.154, loss_ctc=81.413, loss_att=59.604, acc=0.719, loss=66.147, backward_time=1.043, grad_norm=156.011, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.112e-05, train_time=8.881
+[gpub001:0/64] 2023-07-14 17:40:30,027 (trainer:732) INFO: 49epoch:train:8401-8500batch: iter_time=1.238e-04, forward_time=0.145, loss_ctc=72.136, loss_att=55.373, acc=0.718, loss=60.402, backward_time=1.026, grad_norm=133.755, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.112e-05, train_time=2.732
+[gpub001:0/64] 2023-07-14 17:42:45,754 (trainer:732) INFO: 49epoch:train:8501-8600batch: iter_time=1.202e-04, forward_time=0.145, loss_ctc=70.343, loss_att=50.701, acc=0.727, loss=56.593, backward_time=1.028, grad_norm=121.862, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.111e-05, train_time=2.714
+[gpub001:0/64] 2023-07-14 17:45:01,816 (trainer:732) INFO: 49epoch:train:8601-8700batch: iter_time=1.256e-04, forward_time=0.146, loss_ctc=80.448, loss_att=64.353, acc=0.708, loss=69.182, backward_time=1.028, grad_norm=135.963, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.111e-05, train_time=2.721
+[gpub001:0/64] 2023-07-14 17:47:20,645 (trainer:732) INFO: 49epoch:train:8701-8800batch: iter_time=1.180e-04, forward_time=0.145, loss_ctc=67.178, loss_att=48.931, acc=0.743, loss=54.405, backward_time=1.028, grad_norm=142.495, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.110e-05, train_time=2.776
+[gpub001:0/64] 2023-07-14 17:49:37,238 (trainer:732) INFO: 49epoch:train:8801-8900batch: iter_time=1.296e-04, forward_time=0.146, loss_ctc=68.598, loss_att=50.099, acc=0.731, loss=55.649, backward_time=1.027, grad_norm=124.003, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.109e-05, train_time=2.732
+[gpub001:0/64] 2023-07-14 17:51:56,378 (trainer:732) INFO: 49epoch:train:8901-9000batch: iter_time=1.274e-04, forward_time=0.145, loss_ctc=62.040, loss_att=43.423, acc=0.740, loss=49.008, backward_time=1.025, grad_norm=114.553, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.109e-05, train_time=2.783
+[gpub001:0/64] 2023-07-14 17:54:11,781 (trainer:732) INFO: 49epoch:train:9001-9100batch: iter_time=1.150e-04, forward_time=0.144, loss_ctc=67.395, loss_att=49.894, acc=0.729, loss=55.144, backward_time=1.022, grad_norm=128.721, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.108e-05, train_time=2.708
+[gpub001:0/64] 2023-07-14 17:56:00,273 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub001:0/64] 2023-07-14 17:56:18,139 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 17:56:21,608 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbed219b70>)
+[gpub001:0/64] 2023-07-14 17:56:21,608 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub001:0/64] 2023-07-14 17:56:21,615 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 18:01:26,368 (trainer:732) INFO: 49epoch:train:9101-9200batch: iter_time=1.617, forward_time=0.164, loss_ctc=69.323, loss_att=50.650, acc=0.726, loss=56.252, backward_time=1.035, grad_norm=140.311, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.108e-05, train_time=8.692
+[gpub001:0/64] 2023-07-14 18:03:44,759 (trainer:732) INFO: 49epoch:train:9201-9300batch: iter_time=1.228e-04, forward_time=0.147, loss_ctc=73.686, loss_att=57.660, acc=0.717, loss=62.468, backward_time=1.032, grad_norm=120.818, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.107e-05, train_time=2.768
+[gpub001:0/64] 2023-07-14 18:06:01,753 (trainer:732) INFO: 49epoch:train:9301-9400batch: iter_time=1.486e-04, forward_time=0.145, loss_ctc=71.589, loss_att=48.903, acc=0.728, loss=55.709, backward_time=1.025, grad_norm=119.501, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.107e-05, train_time=2.740
+[gpub001:0/64] 2023-07-14 18:08:19,922 (trainer:732) INFO: 49epoch:train:9401-9500batch: iter_time=1.301e-04, forward_time=0.144, loss_ctc=80.193, loss_att=65.513, acc=0.702, loss=69.917, backward_time=1.031, grad_norm=140.511, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.106e-05, train_time=2.763
+[gpub001:0/64] 2023-07-14 18:10:37,699 (trainer:732) INFO: 49epoch:train:9501-9600batch: iter_time=1.212e-04, forward_time=0.144, loss_ctc=67.328, loss_att=48.700, acc=0.744, loss=54.289, backward_time=1.027, grad_norm=128.383, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.106e-05, train_time=2.755
+[gpub001:0/64] 2023-07-14 18:12:55,391 (trainer:732) INFO: 49epoch:train:9601-9700batch: iter_time=1.316e-04, forward_time=0.143, loss_ctc=65.278, loss_att=46.806, acc=0.726, loss=52.347, backward_time=1.026, grad_norm=135.251, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.105e-05, train_time=2.754
+[gpub001:0/64] 2023-07-14 18:15:15,142 (trainer:732) INFO: 49epoch:train:9701-9800batch: iter_time=1.367e-04, forward_time=0.145, loss_ctc=65.510, loss_att=45.641, acc=0.741, loss=51.602, backward_time=1.034, grad_norm=115.004, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.105e-05, train_time=2.795
+[gpub001:0/64] 2023-07-14 18:17:33,807 (trainer:732) INFO: 49epoch:train:9801-9900batch: iter_time=1.295e-04, forward_time=0.144, loss_ctc=64.369, loss_att=47.790, acc=0.723, loss=52.764, backward_time=1.027, grad_norm=108.677, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.104e-05, train_time=2.773
+[gpub001:0/64] 2023-07-14 18:19:49,329 (trainer:732) INFO: 49epoch:train:9901-10000batch: iter_time=1.004e-04, forward_time=0.144, loss_ctc=76.087, loss_att=54.280, acc=0.724, loss=60.822, backward_time=1.024, grad_norm=120.371, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.104e-05, train_time=2.710
+[gpub001:0/64] 2023-07-14 18:33:02,493 (trainer:338) INFO: 49epoch results: [train] iter_time=0.177, forward_time=0.146, loss_ctc=71.210, loss_att=52.672, acc=0.719, loss=58.234, backward_time=1.030, grad_norm=131.679, clip=100.000, loss_scale=2.702e+32, optim_step_time=0.180, optim0_lr0=5.130e-05, train_time=3.462, time=4 hours, 48 minutes and 48.83 seconds, total_count=460000, gpu_max_cached_mem_GB=34.336, [valid] loss_ctc=43.418, cer_ctc=0.254, loss_att=37.707, acc=0.674, cer=0.423, wer=0.998, loss=39.421, time=7 minutes and 3.51 seconds, total_count=47058, gpu_max_cached_mem_GB=37.631, [att_plot] time=5 minutes and 53.56 seconds, total_count=0, gpu_max_cached_mem_GB=37.631
+[gpub001:0/64] 2023-07-14 18:33:18,474 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub001:0/64] 2023-07-14 18:33:18,484 (trainer:272) INFO: 50/60epoch started. Estimated time to finish: 2 days, 7 hours and 22 minutes
+[gpub001:0/64] 2023-07-14 18:33:18,487 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub001:0/64] 2023-07-14 18:33:35,824 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 18:33:39,090 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbf61aa1a0>)
+[gpub001:0/64] 2023-07-14 18:33:39,090 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub001:0/64] 2023-07-14 18:33:39,096 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 18:37:48,677 (trainer:732) INFO: 50epoch:train:1-100batch: iter_time=1.165, forward_time=0.188, loss_ctc=76.860, loss_att=56.022, acc=0.704, loss=62.273, backward_time=1.063, grad_norm=182.532, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.187, optim0_lr0=5.103e-05, train_time=5.403
+[gpub001:0/64] 2023-07-14 18:40:12,390 (trainer:732) INFO: 50epoch:train:101-200batch: iter_time=9.433e-05, forward_time=0.179, loss_ctc=63.633, loss_att=45.042, acc=0.731, loss=50.619, backward_time=1.036, grad_norm=138.710, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.103e-05, train_time=2.872
+[gpub001:0/64] 2023-07-14 18:42:28,865 (trainer:732) INFO: 50epoch:train:201-300batch: iter_time=9.529e-05, forward_time=0.145, loss_ctc=66.674, loss_att=52.287, acc=0.720, loss=56.603, backward_time=1.031, grad_norm=131.160, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.102e-05, train_time=2.731
+[gpub001:0/64] 2023-07-14 18:44:55,125 (trainer:732) INFO: 50epoch:train:301-400batch: iter_time=9.373e-05, forward_time=0.144, loss_ctc=70.708, loss_att=47.914, acc=0.725, loss=54.752, backward_time=1.038, grad_norm=128.284, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.101e-05, train_time=2.925
+[gpub001:0/64] 2023-07-14 18:47:16,114 (trainer:732) INFO: 50epoch:train:401-500batch: iter_time=9.543e-05, forward_time=0.145, loss_ctc=76.756, loss_att=56.886, acc=0.714, loss=62.847, backward_time=1.034, grad_norm=131.145, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.101e-05, train_time=2.820
+[gpub001:0/64] 2023-07-14 18:49:35,575 (trainer:732) INFO: 50epoch:train:501-600batch: iter_time=9.838e-05, forward_time=0.144, loss_ctc=66.678, loss_att=50.737, acc=0.720, loss=55.519, backward_time=1.034, grad_norm=115.394, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.100e-05, train_time=2.789
+[gpub001:0/64] 2023-07-14 18:52:04,139 (trainer:732) INFO: 50epoch:train:601-700batch: iter_time=9.636e-05, forward_time=0.157, loss_ctc=66.833, loss_att=48.220, acc=0.716, loss=53.804, backward_time=1.050, grad_norm=122.552, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.100e-05, train_time=2.971
+[gpub001:0/64] 2023-07-14 18:54:29,918 (trainer:732) INFO: 50epoch:train:701-800batch: iter_time=9.484e-05, forward_time=0.144, loss_ctc=71.313, loss_att=46.886, acc=0.720, loss=54.214, backward_time=1.043, grad_norm=121.906, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.099e-05, train_time=2.915
+[gpub001:0/64] 2023-07-14 18:55:23,066 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub001:0/64] 2023-07-14 18:55:40,513 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 18:55:43,867 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbf61a96f0>)
+[gpub001:0/64] 2023-07-14 18:55:43,867 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub001:0/64] 2023-07-14 18:55:43,874 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 19:00:29,306 (trainer:732) INFO: 50epoch:train:801-900batch: iter_time=1.702, forward_time=0.164, loss_ctc=74.067, loss_att=54.822, acc=0.707, loss=60.595, backward_time=1.045, grad_norm=162.733, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.099e-05, train_time=7.188
+[gpub001:0/64] 2023-07-14 19:02:46,018 (trainer:732) INFO: 50epoch:train:901-1000batch: iter_time=1.192e-04, forward_time=0.144, loss_ctc=63.885, loss_att=44.598, acc=0.731, loss=50.384, backward_time=1.029, grad_norm=110.262, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.098e-05, train_time=2.734
+[gpub001:0/64] 2023-07-14 19:05:02,005 (trainer:732) INFO: 50epoch:train:1001-1100batch: iter_time=1.114e-04, forward_time=0.145, loss_ctc=65.165, loss_att=50.965, acc=0.731, loss=55.225, backward_time=1.029, grad_norm=115.952, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.098e-05, train_time=2.720
+[gpub001:0/64] 2023-07-14 19:07:17,905 (trainer:732) INFO: 50epoch:train:1101-1200batch: iter_time=1.223e-04, forward_time=0.144, loss_ctc=72.477, loss_att=48.920, acc=0.727, loss=55.987, backward_time=1.029, grad_norm=135.909, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.097e-05, train_time=2.718
+[gpub001:0/64] 2023-07-14 19:09:33,663 (trainer:732) INFO: 50epoch:train:1201-1300batch: iter_time=1.099e-04, forward_time=0.144, loss_ctc=76.221, loss_att=57.505, acc=0.709, loss=63.120, backward_time=1.029, grad_norm=152.617, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.097e-05, train_time=2.715
+[gpub001:0/64] 2023-07-14 19:11:52,890 (trainer:732) INFO: 50epoch:train:1301-1400batch: iter_time=1.119e-04, forward_time=0.145, loss_ctc=64.913, loss_att=49.074, acc=0.717, loss=53.826, backward_time=1.032, grad_norm=143.678, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.096e-05, train_time=2.784
+[gpub001:0/64] 2023-07-14 19:14:08,777 (trainer:732) INFO: 50epoch:train:1401-1500batch: iter_time=1.097e-04, forward_time=0.145, loss_ctc=64.207, loss_att=46.300, acc=0.727, loss=51.672, backward_time=1.028, grad_norm=96.777, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.096e-05, train_time=2.718
+[gpub001:0/64] 2023-07-14 19:16:24,490 (trainer:732) INFO: 50epoch:train:1501-1600batch: iter_time=1.088e-04, forward_time=0.144, loss_ctc=72.579, loss_att=48.346, acc=0.712, loss=55.616, backward_time=1.026, grad_norm=117.818, clip=100.000, loss_scale=4.868e+32, optim_step_time=0.181, optim0_lr0=5.095e-05, train_time=2.714
+[gpub001:0/64] 2023-07-14 19:18:06,491 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub001:0/64] 2023-07-14 19:18:24,593 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 19:18:28,023 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbd941bbb0>)
+[gpub001:0/64] 2023-07-14 19:18:28,023 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub001:0/64] 2023-07-14 19:18:28,029 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 19:21:27,486 (trainer:732) INFO: 50epoch:train:1601-1700batch: iter_time=1.532, forward_time=0.145, loss_ctc=76.069, loss_att=57.854, acc=0.715, loss=63.318, backward_time=1.042, grad_norm=120.403, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.095e-05, train_time=6.060
+[gpub001:0/64] 2023-07-14 19:23:44,660 (trainer:732) INFO: 50epoch:train:1701-1800batch: iter_time=1.012e-04, forward_time=0.145, loss_ctc=69.245, loss_att=50.311, acc=0.700, loss=55.991, backward_time=1.031, grad_norm=146.320, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.094e-05, train_time=2.743
+[gpub001:0/64] 2023-07-14 19:26:00,439 (trainer:732) INFO: 50epoch:train:1801-1900batch: iter_time=1.160e-04, forward_time=0.144, loss_ctc=65.293, loss_att=47.043, acc=0.727, loss=52.518, backward_time=1.025, grad_norm=118.987, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.094e-05, train_time=2.715
+[gpub001:0/64] 2023-07-14 19:28:15,850 (trainer:732) INFO: 50epoch:train:1901-2000batch: iter_time=1.534e-04, forward_time=0.146, loss_ctc=64.296, loss_att=50.158, acc=0.720, loss=54.399, backward_time=1.025, grad_norm=125.395, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.093e-05, train_time=2.708
+[gpub001:0/64] 2023-07-14 19:30:31,788 (trainer:732) INFO: 50epoch:train:2001-2100batch: iter_time=1.528e-04, forward_time=0.147, loss_ctc=72.538, loss_att=52.328, acc=0.704, loss=58.391, backward_time=1.030, grad_norm=133.928, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.092e-05, train_time=2.719
+[gpub001:0/64] 2023-07-14 19:32:47,884 (trainer:732) INFO: 50epoch:train:2101-2200batch: iter_time=1.222e-04, forward_time=0.147, loss_ctc=73.571, loss_att=55.147, acc=0.713, loss=60.674, backward_time=1.031, grad_norm=116.254, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.092e-05, train_time=2.722
+[gpub001:0/64] 2023-07-14 19:35:03,746 (trainer:732) INFO: 50epoch:train:2201-2300batch: iter_time=1.165e-04, forward_time=0.146, loss_ctc=66.832, loss_att=48.835, acc=0.707, loss=54.234, backward_time=1.029, grad_norm=126.747, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.091e-05, train_time=2.717
+[gpub001:0/64] 2023-07-14 19:37:19,158 (trainer:732) INFO: 50epoch:train:2301-2400batch: iter_time=1.330e-04, forward_time=0.146, loss_ctc=69.922, loss_att=45.724, acc=0.719, loss=52.983, backward_time=1.027, grad_norm=121.655, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.091e-05, train_time=2.708
+[gpub001:0/64] 2023-07-14 19:39:35,125 (trainer:732) INFO: 50epoch:train:2401-2500batch: iter_time=1.500e-04, forward_time=0.147, loss_ctc=69.057, loss_att=51.498, acc=0.713, loss=56.766, backward_time=1.030, grad_norm=136.557, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.090e-05, train_time=2.719
+[gpub001:0/64] 2023-07-14 19:39:36,493 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub001:0/64] 2023-07-14 19:39:54,855 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 19:39:58,286 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbd879d4b0>)
+[gpub001:0/64] 2023-07-14 19:39:58,286 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub001:0/64] 2023-07-14 19:39:58,292 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 19:47:03,128 (trainer:732) INFO: 50epoch:train:2501-2600batch: iter_time=1.229, forward_time=0.146, loss_ctc=75.319, loss_att=54.368, acc=0.702, loss=60.653, backward_time=1.044, grad_norm=145.226, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.090e-05, train_time=8.960
+[gpub001:0/64] 2023-07-14 19:49:19,629 (trainer:732) INFO: 50epoch:train:2601-2700batch: iter_time=1.033e-04, forward_time=0.147, loss_ctc=62.652, loss_att=44.316, acc=0.729, loss=49.817, backward_time=1.029, grad_norm=163.995, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.089e-05, train_time=2.730
+[gpub001:0/64] 2023-07-14 19:51:35,460 (trainer:732) INFO: 50epoch:train:2701-2800batch: iter_time=1.076e-04, forward_time=0.145, loss_ctc=65.546, loss_att=51.752, acc=0.720, loss=55.890, backward_time=1.028, grad_norm=116.187, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.089e-05, train_time=2.716
+[gpub001:0/64] 2023-07-14 19:53:51,137 (trainer:732) INFO: 50epoch:train:2801-2900batch: iter_time=1.003e-04, forward_time=0.145, loss_ctc=69.677, loss_att=46.545, acc=0.724, loss=53.485, backward_time=1.027, grad_norm=151.273, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.088e-05, train_time=2.713
+[gpub001:0/64] 2023-07-14 19:55:55,895 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub001:0/64] 2023-07-14 19:56:06,946 (trainer:732) INFO: 50epoch:train:2901-3000batch: iter_time=1.015e-04, forward_time=0.145, loss_ctc=75.469, loss_att=56.637, acc=0.712, loss=62.287, backward_time=1.028, grad_norm=123.542, clip=100.000, loss_scale=6.225e+32, optim_step_time=0.181, optim0_lr0=5.088e-05, train_time=2.716
+[gpub001:0/64] 2023-07-14 19:58:23,795 (trainer:732) INFO: 50epoch:train:3001-3100batch: iter_time=9.923e-05, forward_time=0.145, loss_ctc=64.660, loss_att=50.088, acc=0.711, loss=54.459, backward_time=1.029, grad_norm=139.049, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.087e-05, train_time=2.737
+[gpub001:0/64] 2023-07-14 20:00:39,421 (trainer:732) INFO: 50epoch:train:3101-3200batch: iter_time=1.047e-04, forward_time=0.144, loss_ctc=65.956, loss_att=46.842, acc=0.716, loss=52.576, backward_time=1.026, grad_norm=112.318, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.087e-05, train_time=2.712
+[gpub001:0/64] 2023-07-14 20:02:55,138 (trainer:732) INFO: 50epoch:train:3201-3300batch: iter_time=1.221e-04, forward_time=0.146, loss_ctc=69.096, loss_att=46.225, acc=0.715, loss=53.086, backward_time=1.028, grad_norm=123.518, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.086e-05, train_time=2.714
+[gpub001:0/64] 2023-07-14 20:03:41,171 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub001:0/64] 2023-07-14 20:03:59,687 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 20:04:03,127 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbd87a3520>)
+[gpub001:0/64] 2023-07-14 20:04:03,127 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub001:0/64] 2023-07-14 20:04:03,133 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 20:10:00,535 (trainer:732) INFO: 50epoch:train:3301-3400batch: iter_time=1.229, forward_time=0.207, loss_ctc=76.427, loss_att=55.751, acc=0.711, loss=61.954, backward_time=1.042, grad_norm=113.736, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.086e-05, train_time=8.507
+[gpub001:0/64] 2023-07-14 20:12:16,912 (trainer:732) INFO: 50epoch:train:3401-3500batch: iter_time=1.159e-04, forward_time=0.146, loss_ctc=67.057, loss_att=48.763, acc=0.708, loss=54.251, backward_time=1.030, grad_norm=130.479, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.085e-05, train_time=2.728
+[gpub001:0/64] 2023-07-14 20:14:32,971 (trainer:732) INFO: 50epoch:train:3501-3600batch: iter_time=1.144e-04, forward_time=0.145, loss_ctc=67.118, loss_att=49.572, acc=0.728, loss=54.835, backward_time=1.028, grad_norm=137.657, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.085e-05, train_time=2.721
+[gpub001:0/64] 2023-07-14 20:16:49,816 (trainer:732) INFO: 50epoch:train:3601-3700batch: iter_time=1.234e-04, forward_time=0.145, loss_ctc=65.072, loss_att=47.251, acc=0.722, loss=52.598, backward_time=1.026, grad_norm=124.680, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.084e-05, train_time=2.737
+[gpub001:0/64] 2023-07-14 20:19:06,520 (trainer:732) INFO: 50epoch:train:3701-3800batch: iter_time=1.213e-04, forward_time=0.145, loss_ctc=68.645, loss_att=49.960, acc=0.716, loss=55.566, backward_time=1.026, grad_norm=119.968, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.084e-05, train_time=2.734
+[gpub001:0/64] 2023-07-14 20:21:22,393 (trainer:732) INFO: 50epoch:train:3801-3900batch: iter_time=1.198e-04, forward_time=0.147, loss_ctc=71.225, loss_att=52.976, acc=0.716, loss=58.451, backward_time=1.027, grad_norm=125.839, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.083e-05, train_time=2.717
+[gpub001:0/64] 2023-07-14 20:23:38,267 (trainer:732) INFO: 50epoch:train:3901-4000batch: iter_time=1.194e-04, forward_time=0.146, loss_ctc=67.296, loss_att=49.609, acc=0.708, loss=54.915, backward_time=1.027, grad_norm=122.521, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.082e-05, train_time=2.717
+[gpub001:0/64] 2023-07-14 20:25:53,904 (trainer:732) INFO: 50epoch:train:4001-4100batch: iter_time=1.199e-04, forward_time=0.146, loss_ctc=68.722, loss_att=44.612, acc=0.725, loss=51.845, backward_time=1.026, grad_norm=133.861, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.082e-05, train_time=2.713
+[gpub001:0/64] 2023-07-14 20:27:25,140 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub001:0/64] 2023-07-14 20:27:42,919 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 20:27:46,370 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd5f29074f0>)
+[gpub001:0/64] 2023-07-14 20:27:46,370 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub001:0/64] 2023-07-14 20:27:46,376 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 20:31:59,928 (trainer:732) INFO: 50epoch:train:4101-4200batch: iter_time=1.266, forward_time=0.158, loss_ctc=69.892, loss_att=54.719, acc=0.724, loss=59.271, backward_time=1.041, grad_norm=120.069, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.081e-05, train_time=7.318
+[gpub001:0/64] 2023-07-14 20:34:16,845 (trainer:732) INFO: 50epoch:train:4201-4300batch: iter_time=1.347e-04, forward_time=0.148, loss_ctc=72.245, loss_att=50.876, acc=0.714, loss=57.287, backward_time=1.033, grad_norm=144.739, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.081e-05, train_time=2.740
+[gpub001:0/64] 2023-07-14 20:36:33,354 (trainer:732) INFO: 50epoch:train:4301-4400batch: iter_time=1.372e-04, forward_time=0.145, loss_ctc=64.193, loss_att=45.992, acc=0.740, loss=51.452, backward_time=1.028, grad_norm=136.356, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.080e-05, train_time=2.730
+[gpub001:0/64] 2023-07-14 20:38:49,363 (trainer:732) INFO: 50epoch:train:4401-4500batch: iter_time=1.302e-04, forward_time=0.145, loss_ctc=64.055, loss_att=49.713, acc=0.733, loss=54.015, backward_time=1.029, grad_norm=152.603, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.080e-05, train_time=2.720
+[gpub001:0/64] 2023-07-14 20:41:05,362 (trainer:732) INFO: 50epoch:train:4501-4600batch: iter_time=1.058e-04, forward_time=0.146, loss_ctc=71.594, loss_att=49.931, acc=0.723, loss=56.430, backward_time=1.030, grad_norm=141.739, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.079e-05, train_time=2.720
+[gpub001:0/64] 2023-07-14 20:43:21,381 (trainer:732) INFO: 50epoch:train:4601-4700batch: iter_time=1.147e-04, forward_time=0.146, loss_ctc=72.948, loss_att=55.695, acc=0.725, loss=60.871, backward_time=1.029, grad_norm=108.629, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.079e-05, train_time=2.720
+[gpub001:0/64] 2023-07-14 20:45:37,140 (trainer:732) INFO: 50epoch:train:4701-4800batch: iter_time=1.130e-04, forward_time=0.145, loss_ctc=65.934, loss_att=47.948, acc=0.724, loss=53.344, backward_time=1.028, grad_norm=152.190, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.078e-05, train_time=2.715
+[gpub001:0/64] 2023-07-14 20:47:52,965 (trainer:732) INFO: 50epoch:train:4801-4900batch: iter_time=1.108e-04, forward_time=0.146, loss_ctc=68.950, loss_att=45.382, acc=0.730, loss=52.453, backward_time=1.029, grad_norm=130.881, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.078e-05, train_time=2.716
+[gpub001:0/64] 2023-07-14 20:50:08,537 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub001:0/64] 2023-07-14 20:50:26,538 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 20:50:30,045 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbd9ccce20>)
+[gpub001:0/64] 2023-07-14 20:50:30,046 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub001:0/64] 2023-07-14 20:50:30,052 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 20:54:56,835 (trainer:732) INFO: 50epoch:train:4901-5000batch: iter_time=1.261, forward_time=0.166, loss_ctc=68.113, loss_att=51.907, acc=0.712, loss=56.769, backward_time=1.030, grad_norm=123.226, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.077e-05, train_time=8.477
+[gpub001:0/64] 2023-07-14 20:57:14,888 (trainer:732) INFO: 50epoch:train:5001-5100batch: iter_time=1.200e-04, forward_time=0.146, loss_ctc=74.370, loss_att=53.834, acc=0.714, loss=59.995, backward_time=1.037, grad_norm=133.466, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.077e-05, train_time=2.761
+[gpub001:0/64] 2023-07-14 20:59:30,447 (trainer:732) INFO: 50epoch:train:5101-5200batch: iter_time=1.271e-04, forward_time=0.145, loss_ctc=63.418, loss_att=44.065, acc=0.743, loss=49.871, backward_time=1.025, grad_norm=118.916, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.076e-05, train_time=2.711
+[gpub001:0/64] 2023-07-14 21:01:46,392 (trainer:732) INFO: 50epoch:train:5201-5300batch: iter_time=1.244e-04, forward_time=0.146, loss_ctc=64.343, loss_att=50.622, acc=0.729, loss=54.738, backward_time=1.029, grad_norm=107.040, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.076e-05, train_time=2.719
+[gpub001:0/64] 2023-07-14 21:04:02,266 (trainer:732) INFO: 50epoch:train:5301-5400batch: iter_time=1.242e-04, forward_time=0.146, loss_ctc=67.130, loss_att=45.243, acc=0.738, loss=51.809, backward_time=1.027, grad_norm=126.591, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.075e-05, train_time=2.717
+[gpub001:0/64] 2023-07-14 21:06:18,342 (trainer:732) INFO: 50epoch:train:5401-5500batch: iter_time=1.258e-04, forward_time=0.147, loss_ctc=74.073, loss_att=55.688, acc=0.724, loss=61.203, backward_time=1.029, grad_norm=120.175, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.075e-05, train_time=2.721
+[gpub001:0/64] 2023-07-14 21:08:34,524 (trainer:732) INFO: 50epoch:train:5501-5600batch: iter_time=1.200e-04, forward_time=0.148, loss_ctc=64.861, loss_att=50.199, acc=0.726, loss=54.598, backward_time=1.029, grad_norm=114.083, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.074e-05, train_time=2.723
+[gpub001:0/64] 2023-07-14 21:10:57,350 (trainer:732) INFO: 50epoch:train:5601-5700batch: iter_time=1.141e-04, forward_time=0.146, loss_ctc=63.074, loss_att=45.439, acc=0.726, loss=50.730, backward_time=1.034, grad_norm=110.217, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.074e-05, train_time=2.856
+[gpub001:0/64] 2023-07-14 21:13:13,082 (trainer:732) INFO: 50epoch:train:5701-5800batch: iter_time=1.103e-04, forward_time=0.146, loss_ctc=69.031, loss_att=46.799, acc=0.723, loss=53.469, backward_time=1.027, grad_norm=143.702, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.073e-05, train_time=2.714
+[gpub001:0/64] 2023-07-14 21:14:11,324 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub001:0/64] 2023-07-14 21:14:29,273 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 21:14:32,688 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbd9ccf790>)
+[gpub001:0/64] 2023-07-14 21:14:32,688 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub001:0/64] 2023-07-14 21:14:32,749 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 21:19:03,310 (trainer:732) INFO: 50epoch:train:5801-5900batch: iter_time=2.002, forward_time=0.183, loss_ctc=73.655, loss_att=53.932, acc=0.722, loss=59.849, backward_time=1.055, grad_norm=115.715, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.073e-05, train_time=7.004
+[gpub001:0/64] 2023-07-14 21:21:32,431 (trainer:732) INFO: 50epoch:train:5901-6000batch: iter_time=1.194e-04, forward_time=0.146, loss_ctc=66.964, loss_att=50.032, acc=0.725, loss=55.112, backward_time=1.037, grad_norm=127.025, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.072e-05, train_time=2.983
+[gpub001:0/64] 2023-07-14 21:24:07,379 (trainer:732) INFO: 50epoch:train:6001-6100batch: iter_time=1.119e-04, forward_time=0.146, loss_ctc=66.762, loss_att=49.729, acc=0.735, loss=54.839, backward_time=1.080, grad_norm=115.938, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.071e-05, train_time=3.099
+[gpub001:0/64] 2023-07-14 21:26:28,272 (trainer:732) INFO: 50epoch:train:6101-6200batch: iter_time=1.172e-04, forward_time=0.146, loss_ctc=65.838, loss_att=47.261, acc=0.729, loss=52.834, backward_time=1.034, grad_norm=114.927, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.071e-05, train_time=2.818
+[gpub001:0/64] 2023-07-14 21:28:48,811 (trainer:732) INFO: 50epoch:train:6201-6300batch: iter_time=1.167e-04, forward_time=0.147, loss_ctc=68.647, loss_att=49.152, acc=0.730, loss=55.000, backward_time=1.037, grad_norm=128.845, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.070e-05, train_time=2.811
+[gpub001:0/64] 2023-07-14 21:30:05,936 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub001:0/64] 2023-07-14 21:31:12,672 (trainer:732) INFO: 50epoch:train:6301-6400batch: iter_time=1.053e-04, forward_time=0.147, loss_ctc=69.972, loss_att=51.775, acc=0.730, loss=57.234, backward_time=1.035, grad_norm=136.068, clip=100.000, loss_scale=2.484e+32, optim_step_time=0.182, optim0_lr0=5.070e-05, train_time=2.877
+[gpub001:0/64] 2023-07-14 21:33:31,183 (trainer:732) INFO: 50epoch:train:6401-6500batch: iter_time=1.154e-04, forward_time=0.146, loss_ctc=66.029, loss_att=48.270, acc=0.719, loss=53.598, backward_time=1.031, grad_norm=131.463, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.069e-05, train_time=2.770
+[gpub001:0/64] 2023-07-14 21:35:47,199 (trainer:732) INFO: 50epoch:train:6501-6600batch: iter_time=1.128e-04, forward_time=0.146, loss_ctc=68.099, loss_att=44.525, acc=0.732, loss=51.598, backward_time=1.027, grad_norm=116.086, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.069e-05, train_time=2.720
+[gpub001:0/64] 2023-07-14 21:37:20,083 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub001:0/64] 2023-07-14 21:37:38,383 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 21:37:41,825 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd5a87ef4f0>)
+[gpub001:0/64] 2023-07-14 21:37:41,825 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub001:0/64] 2023-07-14 21:37:41,831 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 21:41:34,057 (trainer:732) INFO: 50epoch:train:6601-6700batch: iter_time=1.243, forward_time=0.149, loss_ctc=75.228, loss_att=56.364, acc=0.705, loss=62.024, backward_time=1.043, grad_norm=131.714, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.068e-05, train_time=6.937
+[gpub001:0/64] 2023-07-14 21:43:51,597 (trainer:732) INFO: 50epoch:train:6701-6800batch: iter_time=1.228e-04, forward_time=0.146, loss_ctc=62.359, loss_att=46.440, acc=0.717, loss=51.216, backward_time=1.031, grad_norm=126.683, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.068e-05, train_time=2.751
+[gpub001:0/64] 2023-07-14 21:46:07,899 (trainer:732) INFO: 50epoch:train:6801-6900batch: iter_time=1.299e-04, forward_time=0.146, loss_ctc=65.735, loss_att=48.063, acc=0.725, loss=53.365, backward_time=1.029, grad_norm=134.386, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.067e-05, train_time=2.726
+[gpub001:0/64] 2023-07-14 21:48:23,594 (trainer:732) INFO: 50epoch:train:6901-7000batch: iter_time=1.278e-04, forward_time=0.145, loss_ctc=68.323, loss_att=49.027, acc=0.722, loss=54.815, backward_time=1.026, grad_norm=130.941, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.067e-05, train_time=2.714
+[gpub001:0/64] 2023-07-14 21:50:39,377 (trainer:732) INFO: 50epoch:train:7001-7100batch: iter_time=1.150e-04, forward_time=0.146, loss_ctc=70.496, loss_att=52.439, acc=0.716, loss=57.856, backward_time=1.029, grad_norm=129.853, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.066e-05, train_time=2.715
+[gpub001:0/64] 2023-07-14 21:52:55,578 (trainer:732) INFO: 50epoch:train:7101-7200batch: iter_time=1.273e-04, forward_time=0.147, loss_ctc=67.037, loss_att=51.153, acc=0.710, loss=55.918, backward_time=1.031, grad_norm=135.297, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.066e-05, train_time=2.724
+[gpub001:0/64] 2023-07-14 21:55:11,269 (trainer:732) INFO: 50epoch:train:7201-7300batch: iter_time=1.392e-04, forward_time=0.146, loss_ctc=64.625, loss_att=47.415, acc=0.716, loss=52.578, backward_time=1.028, grad_norm=138.346, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.065e-05, train_time=2.714
+[gpub001:0/64] 2023-07-14 21:57:27,032 (trainer:732) INFO: 50epoch:train:7301-7400batch: iter_time=1.153e-04, forward_time=0.146, loss_ctc=67.172, loss_att=45.670, acc=0.720, loss=52.120, backward_time=1.027, grad_norm=138.208, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.065e-05, train_time=2.715
+[gpub001:0/64] 2023-07-14 21:59:42,685 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub001:0/64] 2023-07-14 22:00:00,870 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 22:00:04,268 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbed65b4f0>)
+[gpub001:0/64] 2023-07-14 22:00:04,268 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub001:0/64] 2023-07-14 22:00:04,274 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 22:04:14,311 (trainer:732) INFO: 50epoch:train:7401-7500batch: iter_time=1.298, forward_time=0.174, loss_ctc=72.785, loss_att=54.934, acc=0.706, loss=60.289, backward_time=1.034, grad_norm=127.076, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.064e-05, train_time=8.145
+[gpub001:0/64] 2023-07-14 22:06:32,396 (trainer:732) INFO: 50epoch:train:7501-7600batch: iter_time=1.281e-04, forward_time=0.145, loss_ctc=75.267, loss_att=55.281, acc=0.706, loss=61.277, backward_time=1.033, grad_norm=127.032, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.064e-05, train_time=2.761
+[gpub001:0/64] 2023-07-14 22:08:49,298 (trainer:732) INFO: 50epoch:train:7601-7700batch: iter_time=1.148e-04, forward_time=0.146, loss_ctc=61.713, loss_att=43.430, acc=0.731, loss=48.915, backward_time=1.029, grad_norm=113.236, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.063e-05, train_time=2.738
+[gpub001:0/64] 2023-07-14 22:11:05,012 (trainer:732) INFO: 50epoch:train:7701-7800batch: iter_time=1.138e-04, forward_time=0.145, loss_ctc=64.435, loss_att=51.314, acc=0.720, loss=55.251, backward_time=1.025, grad_norm=137.487, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.063e-05, train_time=2.714
+[gpub001:0/64] 2023-07-14 22:13:20,529 (trainer:732) INFO: 50epoch:train:7801-7900batch: iter_time=1.071e-04, forward_time=0.145, loss_ctc=67.719, loss_att=45.921, acc=0.729, loss=52.460, backward_time=1.026, grad_norm=116.618, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.062e-05, train_time=2.710
+[gpub001:0/64] 2023-07-14 22:15:36,651 (trainer:732) INFO: 50epoch:train:7901-8000batch: iter_time=1.160e-04, forward_time=0.146, loss_ctc=74.668, loss_att=55.808, acc=0.718, loss=61.466, backward_time=1.030, grad_norm=138.995, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.062e-05, train_time=2.722
+[gpub001:0/64] 2023-07-14 22:17:52,426 (trainer:732) INFO: 50epoch:train:8001-8100batch: iter_time=1.146e-04, forward_time=0.146, loss_ctc=64.712, loss_att=49.154, acc=0.717, loss=53.822, backward_time=1.027, grad_norm=134.088, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.061e-05, train_time=2.715
+[gpub001:0/64] 2023-07-14 22:20:08,073 (trainer:732) INFO: 50epoch:train:8101-8200batch: iter_time=1.172e-04, forward_time=0.145, loss_ctc=62.415, loss_att=45.458, acc=0.725, loss=50.545, backward_time=1.026, grad_norm=104.346, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.061e-05, train_time=2.713
+[gpub001:0/64] 2023-07-14 22:22:24,409 (trainer:732) INFO: 50epoch:train:8201-8300batch: iter_time=1.217e-04, forward_time=0.144, loss_ctc=69.803, loss_att=47.319, acc=0.716, loss=54.064, backward_time=1.024, grad_norm=120.199, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.060e-05, train_time=2.726
+[gpub001:0/64] 2023-07-14 22:23:27,929 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub001:0/64] 2023-07-14 22:23:46,234 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 22:23:49,965 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fde64a26290>)
+[gpub001:0/64] 2023-07-14 22:23:49,965 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub001:0/64] 2023-07-14 22:23:49,971 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 22:29:23,107 (trainer:732) INFO: 50epoch:train:8301-8400batch: iter_time=1.937, forward_time=0.171, loss_ctc=73.351, loss_att=54.696, acc=0.704, loss=60.292, backward_time=1.042, grad_norm=126.226, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=5.060e-05, train_time=8.373
+[gpub001:0/64] 2023-07-14 22:31:39,375 (trainer:732) INFO: 50epoch:train:8401-8500batch: iter_time=1.123e-04, forward_time=0.144, loss_ctc=65.360, loss_att=47.307, acc=0.724, loss=52.723, backward_time=1.028, grad_norm=106.196, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.059e-05, train_time=2.726
+[gpub001:0/64] 2023-07-14 22:33:56,198 (trainer:732) INFO: 50epoch:train:8501-8600batch: iter_time=1.121e-04, forward_time=0.145, loss_ctc=62.356, loss_att=47.082, acc=0.731, loss=51.664, backward_time=1.028, grad_norm=116.896, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.058e-05, train_time=2.736
+[gpub001:0/64] 2023-07-14 22:36:12,773 (trainer:732) INFO: 50epoch:train:8601-8700batch: iter_time=1.049e-04, forward_time=0.145, loss_ctc=69.928, loss_att=48.566, acc=0.721, loss=54.975, backward_time=1.028, grad_norm=123.106, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.058e-05, train_time=2.731
+[gpub001:0/64] 2023-07-14 22:38:28,734 (trainer:732) INFO: 50epoch:train:8701-8800batch: iter_time=1.093e-04, forward_time=0.145, loss_ctc=73.339, loss_att=55.181, acc=0.711, loss=60.628, backward_time=1.027, grad_norm=118.978, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.057e-05, train_time=2.719
+[gpub001:0/64] 2023-07-14 22:40:44,531 (trainer:732) INFO: 50epoch:train:8801-8900batch: iter_time=1.146e-04, forward_time=0.146, loss_ctc=63.033, loss_att=48.849, acc=0.707, loss=53.104, backward_time=1.028, grad_norm=132.965, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.057e-05, train_time=2.716
+[gpub001:0/64] 2023-07-14 22:42:59,903 (trainer:732) INFO: 50epoch:train:8901-9000batch: iter_time=1.334e-04, forward_time=0.145, loss_ctc=64.413, loss_att=46.307, acc=0.729, loss=51.739, backward_time=1.026, grad_norm=96.639, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.056e-05, train_time=2.707
+[gpub001:0/64] 2023-07-14 22:45:18,107 (trainer:732) INFO: 50epoch:train:9001-9100batch: iter_time=1.165e-04, forward_time=0.147, loss_ctc=69.114, loss_att=45.854, acc=0.719, loss=52.832, backward_time=1.032, grad_norm=140.806, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.056e-05, train_time=2.764
+[gpub001:0/64] 2023-07-14 22:46:49,602 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub001:0/64] 2023-07-14 22:47:07,559 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 22:47:10,970 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdb17967460>)
+[gpub001:0/64] 2023-07-14 22:47:10,970 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub001:0/64] 2023-07-14 22:47:11,050 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 22:52:32,878 (trainer:732) INFO: 50epoch:train:9101-9200batch: iter_time=2.016, forward_time=0.145, loss_ctc=79.028, loss_att=57.548, acc=0.699, loss=63.992, backward_time=1.040, grad_norm=136.426, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.055e-05, train_time=8.695
+[gpub001:0/64] 2023-07-14 22:54:49,816 (trainer:732) INFO: 50epoch:train:9201-9300batch: iter_time=1.038e-04, forward_time=0.145, loss_ctc=62.727, loss_att=47.586, acc=0.737, loss=52.129, backward_time=1.032, grad_norm=123.258, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.055e-05, train_time=2.739
+[gpub001:0/64] 2023-07-14 22:57:06,559 (trainer:732) INFO: 50epoch:train:9301-9400batch: iter_time=1.113e-04, forward_time=0.145, loss_ctc=64.256, loss_att=48.937, acc=0.734, loss=53.533, backward_time=1.029, grad_norm=116.732, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.054e-05, train_time=2.735
+[gpub001:0/64] 2023-07-14 22:59:22,627 (trainer:732) INFO: 50epoch:train:9401-9500batch: iter_time=1.020e-04, forward_time=0.144, loss_ctc=68.645, loss_att=49.152, acc=0.732, loss=55.000, backward_time=1.026, grad_norm=126.326, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.054e-05, train_time=2.721
+[gpub001:0/64] 2023-07-14 23:01:39,743 (trainer:732) INFO: 50epoch:train:9501-9600batch: iter_time=1.028e-04, forward_time=0.146, loss_ctc=71.460, loss_att=50.831, acc=0.729, loss=57.020, backward_time=1.030, grad_norm=125.820, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.053e-05, train_time=2.742
+[gpub001:0/64] 2023-07-14 23:03:56,039 (trainer:732) INFO: 50epoch:train:9601-9700batch: iter_time=1.072e-04, forward_time=0.146, loss_ctc=65.840, loss_att=52.027, acc=0.718, loss=56.171, backward_time=1.029, grad_norm=137.286, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.053e-05, train_time=2.726
+[gpub001:0/64] 2023-07-14 23:06:11,688 (trainer:732) INFO: 50epoch:train:9701-9800batch: iter_time=1.074e-04, forward_time=0.145, loss_ctc=65.045, loss_att=48.007, acc=0.726, loss=53.118, backward_time=1.026, grad_norm=116.113, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.052e-05, train_time=2.713
+[gpub001:0/64] 2023-07-14 23:08:27,117 (trainer:732) INFO: 50epoch:train:9801-9900batch: iter_time=1.110e-04, forward_time=0.144, loss_ctc=69.175, loss_att=45.588, acc=0.730, loss=52.664, backward_time=1.025, grad_norm=139.307, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.052e-05, train_time=2.708
+[gpub001:0/64] 2023-07-14 23:10:42,703 (trainer:732) INFO: 50epoch:train:9901-10000batch: iter_time=1.068e-04, forward_time=0.145, loss_ctc=72.841, loss_att=53.971, acc=0.715, loss=59.632, backward_time=1.025, grad_norm=123.765, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.051e-05, train_time=2.711
+[gpub001:0/64] 2023-07-14 23:24:09,245 (trainer:338) INFO: 50epoch results: [train] iter_time=0.179, forward_time=0.148, loss_ctc=68.583, loss_att=49.901, acc=0.720, loss=55.506, backward_time=1.032, grad_norm=127.995, clip=100.000, loss_scale=3.121e+32, optim_step_time=0.182, optim0_lr0=5.077e-05, train_time=3.329, time=4 hours, 37 minutes and 40.49 seconds, total_count=470000, gpu_max_cached_mem_GB=37.635, [valid] loss_ctc=42.429, cer_ctc=0.252, loss_att=37.607, acc=0.676, cer=0.417, wer=0.998, loss=39.054, time=7 minutes and 17.28 seconds, total_count=48070, gpu_max_cached_mem_GB=37.635, [att_plot] time=5 minutes and 52.99 seconds, total_count=0, gpu_max_cached_mem_GB=37.635
+[gpub001:0/64] 2023-07-14 23:24:24,757 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub001:0/64] 2023-07-14 23:24:24,796 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.acc": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.acc.ave_5best.till50epoch.pth
+[gpub001:0/64] 2023-07-14 23:25:13,113 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.total_count": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.total_count.ave_5best.till50epoch.pth
+[gpub001:0/64] 2023-07-14 23:25:53,416 (trainer:272) INFO: 51/60epoch started. Estimated time to finish: 2 days, 1 hour and 33 minutes
+[gpub001:0/64] 2023-07-14 23:25:55,441 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub001:0/64] 2023-07-14 23:26:15,006 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 23:26:18,855 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd11e33b6a0>)
+[gpub001:0/64] 2023-07-14 23:26:18,856 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub001:0/64] 2023-07-14 23:26:18,921 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 23:36:35,831 (trainer:732) INFO: 51epoch:train:1-100batch: iter_time=4.981, forward_time=0.181, loss_ctc=71.923, loss_att=55.595, acc=0.703, loss=60.494, backward_time=1.041, grad_norm=115.944, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=5.051e-05, train_time=12.824
+[gpub001:0/64] 2023-07-14 23:38:52,739 (trainer:732) INFO: 51epoch:train:101-200batch: iter_time=1.265e-04, forward_time=0.145, loss_ctc=79.235, loss_att=65.934, acc=0.698, loss=69.924, backward_time=1.031, grad_norm=121.600, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.050e-05, train_time=2.738
+[gpub001:0/64] 2023-07-14 23:41:08,953 (trainer:732) INFO: 51epoch:train:201-300batch: iter_time=1.312e-04, forward_time=0.145, loss_ctc=69.961, loss_att=49.731, acc=0.710, loss=55.800, backward_time=1.029, grad_norm=132.357, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.050e-05, train_time=2.724
+[gpub001:0/64] 2023-07-14 23:43:26,233 (trainer:732) INFO: 51epoch:train:301-400batch: iter_time=1.131e-04, forward_time=0.144, loss_ctc=64.152, loss_att=48.408, acc=0.691, loss=53.132, backward_time=1.026, grad_norm=128.225, clip=100.000, loss_scale=2.369e+32, optim_step_time=0.182, optim0_lr0=5.049e-05, train_time=2.745
+[gpub001:0/64] 2023-07-14 23:45:41,876 (trainer:732) INFO: 51epoch:train:401-500batch: iter_time=1.198e-04, forward_time=0.145, loss_ctc=72.180, loss_att=50.535, acc=0.705, loss=57.029, backward_time=1.028, grad_norm=153.483, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.049e-05, train_time=2.713
+[gpub001:0/64] 2023-07-14 23:48:00,509 (trainer:732) INFO: 51epoch:train:501-600batch: iter_time=1.228e-04, forward_time=0.146, loss_ctc=81.916, loss_att=67.146, acc=0.698, loss=71.577, backward_time=1.036, grad_norm=135.937, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.048e-05, train_time=2.772
+[gpub001:0/64] 2023-07-14 23:50:17,111 (trainer:732) INFO: 51epoch:train:601-700batch: iter_time=1.289e-04, forward_time=0.145, loss_ctc=62.275, loss_att=44.773, acc=0.726, loss=50.023, backward_time=1.029, grad_norm=104.898, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.048e-05, train_time=2.732
+[gpub001:0/64] 2023-07-14 23:52:33,852 (trainer:732) INFO: 51epoch:train:701-800batch: iter_time=1.278e-04, forward_time=0.145, loss_ctc=74.632, loss_att=56.174, acc=0.688, loss=61.711, backward_time=1.027, grad_norm=120.912, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.047e-05, train_time=2.735
+[gpub001:0/64] 2023-07-14 23:53:29,332 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub001:0/64] 2023-07-14 23:53:47,988 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 23:53:51,397 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd11e33a920>)
+[gpub001:0/64] 2023-07-14 23:53:51,398 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub001:0/64] 2023-07-14 23:53:51,404 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 00:02:44,151 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub001:0/64] 2023-07-15 00:03:11,538 (trainer:732) INFO: 51epoch:train:801-900batch: iter_time=4.917, forward_time=0.176, loss_ctc=75.023, loss_att=61.940, acc=0.702, loss=65.865, backward_time=1.043, grad_norm=135.587, clip=100.000, loss_scale=2.914e+32, optim_step_time=0.183, optim0_lr0=5.047e-05, train_time=12.754
+[gpub001:0/64] 2023-07-15 00:05:28,614 (trainer:732) INFO: 51epoch:train:901-1000batch: iter_time=1.399e-04, forward_time=0.146, loss_ctc=77.022, loss_att=61.388, acc=0.701, loss=66.078, backward_time=1.031, grad_norm=116.891, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.046e-05, train_time=2.741
+[gpub001:0/64] 2023-07-15 00:07:45,430 (trainer:732) INFO: 51epoch:train:1001-1100batch: iter_time=1.294e-04, forward_time=0.145, loss_ctc=72.448, loss_att=57.061, acc=0.706, loss=61.677, backward_time=1.029, grad_norm=118.194, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.046e-05, train_time=2.736
+[gpub001:0/64] 2023-07-15 00:10:00,792 (trainer:732) INFO: 51epoch:train:1101-1200batch: iter_time=1.132e-04, forward_time=0.143, loss_ctc=63.641, loss_att=47.528, acc=0.704, loss=52.362, backward_time=1.025, grad_norm=122.929, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.045e-05, train_time=2.707
+[gpub001:0/64] 2023-07-15 00:12:16,409 (trainer:732) INFO: 51epoch:train:1201-1300batch: iter_time=1.234e-04, forward_time=0.144, loss_ctc=64.509, loss_att=47.066, acc=0.702, loss=52.299, backward_time=1.025, grad_norm=124.287, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.045e-05, train_time=2.712
+[gpub001:0/64] 2023-07-15 00:14:31,911 (trainer:732) INFO: 51epoch:train:1301-1400batch: iter_time=1.155e-04, forward_time=0.143, loss_ctc=81.052, loss_att=63.313, acc=0.697, loss=68.635, backward_time=1.025, grad_norm=161.228, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.044e-05, train_time=2.710
+[gpub001:0/64] 2023-07-15 00:16:47,763 (trainer:732) INFO: 51epoch:train:1401-1500batch: iter_time=1.052e-04, forward_time=0.144, loss_ctc=72.270, loss_att=53.340, acc=0.717, loss=59.019, backward_time=1.028, grad_norm=137.920, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.044e-05, train_time=2.717
+[gpub001:0/64] 2023-07-15 00:19:03,111 (trainer:732) INFO: 51epoch:train:1501-1600batch: iter_time=1.271e-04, forward_time=0.144, loss_ctc=70.162, loss_att=50.618, acc=0.706, loss=56.481, backward_time=1.025, grad_norm=170.597, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.043e-05, train_time=2.707
+[gpub001:0/64] 2023-07-15 00:20:39,237 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub001:0/64] 2023-07-15 00:20:57,151 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 00:21:00,575 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd12b7f2b60>)
+[gpub001:0/64] 2023-07-15 00:21:00,575 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub001:0/64] 2023-07-15 00:21:00,582 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 00:24:56,757 (trainer:732) INFO: 51epoch:train:1601-1700batch: iter_time=1.325, forward_time=0.146, loss_ctc=74.706, loss_att=64.628, acc=0.698, loss=67.651, backward_time=1.039, grad_norm=142.799, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.043e-05, train_time=7.073
+[gpub001:0/64] 2023-07-15 00:27:14,907 (trainer:732) INFO: 51epoch:train:1701-1800batch: iter_time=1.209e-04, forward_time=0.146, loss_ctc=75.949, loss_att=63.359, acc=0.712, loss=67.136, backward_time=1.038, grad_norm=127.884, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.042e-05, train_time=2.763
+[gpub001:0/64] 2023-07-15 00:29:30,975 (trainer:732) INFO: 51epoch:train:1801-1900batch: iter_time=1.016e-04, forward_time=0.146, loss_ctc=69.913, loss_att=50.332, acc=0.719, loss=56.206, backward_time=1.031, grad_norm=110.939, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.041e-05, train_time=2.721
+[gpub001:0/64] 2023-07-15 00:31:46,677 (trainer:732) INFO: 51epoch:train:1901-2000batch: iter_time=1.133e-04, forward_time=0.145, loss_ctc=64.394, loss_att=48.413, acc=0.709, loss=53.208, backward_time=1.028, grad_norm=121.998, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.041e-05, train_time=2.714
+[gpub001:0/64] 2023-07-15 00:34:02,438 (trainer:732) INFO: 51epoch:train:2001-2100batch: iter_time=1.079e-04, forward_time=0.145, loss_ctc=66.797, loss_att=48.245, acc=0.712, loss=53.810, backward_time=1.029, grad_norm=124.842, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.040e-05, train_time=2.715
+[gpub001:0/64] 2023-07-15 00:36:19,042 (trainer:732) INFO: 51epoch:train:2101-2200batch: iter_time=1.069e-04, forward_time=0.146, loss_ctc=83.907, loss_att=64.979, acc=0.707, loss=70.657, backward_time=1.034, grad_norm=153.393, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.040e-05, train_time=2.732
+[gpub001:0/64] 2023-07-15 00:38:36,795 (trainer:732) INFO: 51epoch:train:2201-2300batch: iter_time=1.176e-04, forward_time=0.146, loss_ctc=64.525, loss_att=48.786, acc=0.723, loss=53.508, backward_time=1.030, grad_norm=103.031, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.039e-05, train_time=2.755
+[gpub001:0/64] 2023-07-15 00:40:52,543 (trainer:732) INFO: 51epoch:train:2301-2400batch: iter_time=1.146e-04, forward_time=0.145, loss_ctc=72.083, loss_att=51.081, acc=0.715, loss=57.381, backward_time=1.026, grad_norm=151.523, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.039e-05, train_time=2.715
+[gpub001:0/64] 2023-07-15 00:43:19,481 (trainer:732) INFO: 51epoch:train:2401-2500batch: iter_time=1.164e-04, forward_time=0.145, loss_ctc=70.723, loss_att=56.251, acc=0.711, loss=60.593, backward_time=1.041, grad_norm=144.872, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.038e-05, train_time=2.939
+[gpub001:0/64] 2023-07-15 00:43:21,090 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub001:0/64] 2023-07-15 00:43:39,291 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 00:43:42,732 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdb17dd7f40>)
+[gpub001:0/64] 2023-07-15 00:43:42,733 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub001:0/64] 2023-07-15 00:43:42,739 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 00:49:10,741 (trainer:732) INFO: 51epoch:train:2501-2600batch: iter_time=1.293, forward_time=0.188, loss_ctc=72.326, loss_att=55.154, acc=0.706, loss=60.305, backward_time=1.054, grad_norm=150.639, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.038e-05, train_time=7.025
+[gpub001:0/64] 2023-07-15 00:51:36,074 (trainer:732) INFO: 51epoch:train:2601-2700batch: iter_time=1.215e-04, forward_time=0.149, loss_ctc=78.155, loss_att=65.045, acc=0.701, loss=68.978, backward_time=1.038, grad_norm=133.517, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.037e-05, train_time=2.907
+[gpub001:0/64] 2023-07-15 00:54:08,484 (trainer:732) INFO: 51epoch:train:2701-2800batch: iter_time=1.115e-04, forward_time=0.146, loss_ctc=68.157, loss_att=49.050, acc=0.717, loss=54.782, backward_time=1.051, grad_norm=120.581, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.037e-05, train_time=3.048
+[gpub001:0/64] 2023-07-15 00:56:50,168 (trainer:732) INFO: 51epoch:train:2801-2900batch: iter_time=1.083e-04, forward_time=0.145, loss_ctc=61.833, loss_att=46.996, acc=0.700, loss=51.447, backward_time=1.048, grad_norm=110.089, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.036e-05, train_time=3.233
+[gpub001:0/64] 2023-07-15 00:59:24,641 (trainer:732) INFO: 51epoch:train:2901-3000batch: iter_time=1.133e-04, forward_time=0.145, loss_ctc=74.094, loss_att=49.328, acc=0.708, loss=56.758, backward_time=1.046, grad_norm=137.040, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.036e-05, train_time=3.089
+[gpub001:0/64] 2023-07-15 01:01:53,489 (trainer:732) INFO: 51epoch:train:3001-3100batch: iter_time=1.135e-04, forward_time=0.145, loss_ctc=81.243, loss_att=67.309, acc=0.698, loss=71.489, backward_time=1.042, grad_norm=131.285, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.035e-05, train_time=2.977
+[gpub001:0/64] 2023-07-15 01:04:27,494 (trainer:732) INFO: 51epoch:train:3101-3200batch: iter_time=1.194e-04, forward_time=0.145, loss_ctc=61.668, loss_att=44.652, acc=0.728, loss=49.757, backward_time=1.047, grad_norm=103.749, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.035e-05, train_time=3.080
+[gpub001:0/64] 2023-07-15 01:06:55,343 (trainer:732) INFO: 51epoch:train:3201-3300batch: iter_time=1.166e-04, forward_time=0.145, loss_ctc=72.274, loss_att=53.996, acc=0.696, loss=59.479, backward_time=1.037, grad_norm=125.051, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.034e-05, train_time=2.957
+[gpub001:0/64] 2023-07-15 01:08:01,140 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub001:0/64] 2023-07-15 01:08:19,465 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 01:08:22,895 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbd82634f0>)
+[gpub001:0/64] 2023-07-15 01:08:22,895 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub001:0/64] 2023-07-15 01:08:22,902 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 01:13:05,704 (trainer:732) INFO: 51epoch:train:3301-3400batch: iter_time=2.009, forward_time=0.148, loss_ctc=75.223, loss_att=63.691, acc=0.710, loss=67.150, backward_time=1.051, grad_norm=142.783, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.034e-05, train_time=7.407
+[gpub001:0/64] 2023-07-15 01:15:22,767 (trainer:732) INFO: 51epoch:train:3401-3500batch: iter_time=1.216e-04, forward_time=0.147, loss_ctc=74.723, loss_att=61.360, acc=0.699, loss=65.369, backward_time=1.033, grad_norm=144.345, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.033e-05, train_time=2.741
+[gpub001:0/64] 2023-07-15 01:17:38,745 (trainer:732) INFO: 51epoch:train:3501-3600batch: iter_time=1.134e-04, forward_time=0.146, loss_ctc=65.675, loss_att=49.439, acc=0.718, loss=54.310, backward_time=1.028, grad_norm=123.402, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.033e-05, train_time=2.719
+[gpub001:0/64] 2023-07-15 01:19:54,187 (trainer:732) INFO: 51epoch:train:3601-3700batch: iter_time=1.316e-04, forward_time=0.145, loss_ctc=60.673, loss_att=46.051, acc=0.693, loss=50.437, backward_time=1.026, grad_norm=100.622, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.032e-05, train_time=2.709
+[gpub001:0/64] 2023-07-15 01:22:10,043 (trainer:732) INFO: 51epoch:train:3701-3800batch: iter_time=1.174e-04, forward_time=0.146, loss_ctc=77.507, loss_att=54.217, acc=0.713, loss=61.204, backward_time=1.029, grad_norm=155.600, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.032e-05, train_time=2.717
+[gpub001:0/64] 2023-07-15 01:24:26,099 (trainer:732) INFO: 51epoch:train:3801-3900batch: iter_time=1.260e-04, forward_time=0.147, loss_ctc=77.820, loss_att=62.731, acc=0.702, loss=67.257, backward_time=1.031, grad_norm=134.212, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.031e-05, train_time=2.721
+[gpub001:0/64] 2023-07-15 01:26:42,048 (trainer:732) INFO: 51epoch:train:3901-4000batch: iter_time=1.229e-04, forward_time=0.147, loss_ctc=64.291, loss_att=46.914, acc=0.712, loss=52.127, backward_time=1.028, grad_norm=125.741, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.031e-05, train_time=2.719
+[gpub001:0/64] 2023-07-15 01:28:57,962 (trainer:732) INFO: 51epoch:train:4001-4100batch: iter_time=1.251e-04, forward_time=0.147, loss_ctc=70.662, loss_att=55.242, acc=0.705, loss=59.868, backward_time=1.027, grad_norm=125.153, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.030e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 01:30:40,640 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub001:0/64] 2023-07-15 01:30:58,549 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 01:31:02,016 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbed694160>)
+[gpub001:0/64] 2023-07-15 01:31:02,017 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub001:0/64] 2023-07-15 01:31:02,023 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 01:35:33,364 (trainer:732) INFO: 51epoch:train:4101-4200batch: iter_time=1.388, forward_time=0.180, loss_ctc=68.988, loss_att=53.972, acc=0.717, loss=58.477, backward_time=1.042, grad_norm=120.989, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=5.030e-05, train_time=7.908
+[gpub001:0/64] 2023-07-15 01:37:51,985 (trainer:732) INFO: 51epoch:train:4201-4300batch: iter_time=1.179e-04, forward_time=0.147, loss_ctc=74.833, loss_att=60.854, acc=0.715, loss=65.048, backward_time=1.037, grad_norm=133.761, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.029e-05, train_time=2.772
+[gpub001:0/64] 2023-07-15 01:40:08,097 (trainer:732) INFO: 51epoch:train:4301-4400batch: iter_time=1.075e-04, forward_time=0.146, loss_ctc=69.438, loss_att=52.734, acc=0.721, loss=57.745, backward_time=1.029, grad_norm=123.930, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.029e-05, train_time=2.722
+[gpub001:0/64] 2023-07-15 01:42:25,511 (trainer:732) INFO: 51epoch:train:4401-4500batch: iter_time=1.121e-04, forward_time=0.155, loss_ctc=63.839, loss_att=48.472, acc=0.713, loss=53.082, backward_time=1.032, grad_norm=131.811, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.028e-05, train_time=2.748
+[gpub001:0/64] 2023-07-15 01:44:40,951 (trainer:732) INFO: 51epoch:train:4501-4600batch: iter_time=1.110e-04, forward_time=0.144, loss_ctc=66.389, loss_att=47.906, acc=0.715, loss=53.451, backward_time=1.026, grad_norm=153.232, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.028e-05, train_time=2.709
+[gpub001:0/64] 2023-07-15 01:46:57,089 (trainer:732) INFO: 51epoch:train:4601-4700batch: iter_time=1.045e-04, forward_time=0.145, loss_ctc=83.957, loss_att=64.618, acc=0.707, loss=70.420, backward_time=1.030, grad_norm=166.949, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.027e-05, train_time=2.723
+[gpub001:0/64] 2023-07-15 01:49:12,741 (trainer:732) INFO: 51epoch:train:4701-4800batch: iter_time=9.881e-05, forward_time=0.145, loss_ctc=64.633, loss_att=48.052, acc=0.725, loss=53.027, backward_time=1.029, grad_norm=122.350, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.027e-05, train_time=2.713
+[gpub001:0/64] 2023-07-15 01:51:28,305 (trainer:732) INFO: 51epoch:train:4801-4900batch: iter_time=1.049e-04, forward_time=0.145, loss_ctc=69.030, loss_att=50.159, acc=0.722, loss=55.820, backward_time=1.027, grad_norm=117.971, clip=100.000, loss_scale=1.947e+32, optim_step_time=0.182, optim0_lr0=5.026e-05, train_time=2.711
+[gpub001:0/64] 2023-07-15 01:53:45,279 (trainer:732) INFO: 51epoch:train:4901-5000batch: iter_time=1.268e-04, forward_time=0.153, loss_ctc=70.500, loss_att=57.120, acc=0.709, loss=61.134, backward_time=1.030, grad_norm=119.144, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.026e-05, train_time=2.739
+[gpub001:0/64] 2023-07-15 01:53:49,960 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub001:0/64] 2023-07-15 01:54:07,620 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 01:54:11,021 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbd993fee0>)
+[gpub001:0/64] 2023-07-15 01:54:11,021 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub001:0/64] 2023-07-15 01:54:11,028 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 02:00:36,494 (trainer:732) INFO: 51epoch:train:5001-5100batch: iter_time=1.321, forward_time=0.197, loss_ctc=71.796, loss_att=54.624, acc=0.720, loss=59.775, backward_time=1.042, grad_norm=142.285, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.025e-05, train_time=8.225
+[gpub001:0/64] 2023-07-15 02:02:53,486 (trainer:732) INFO: 51epoch:train:5101-5200batch: iter_time=1.192e-04, forward_time=0.148, loss_ctc=77.425, loss_att=64.117, acc=0.711, loss=68.109, backward_time=1.031, grad_norm=121.077, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.025e-05, train_time=2.740
+[gpub001:0/64] 2023-07-15 02:05:14,153 (trainer:732) INFO: 51epoch:train:5201-5300batch: iter_time=2.312e-04, forward_time=0.185, loss_ctc=68.549, loss_att=49.844, acc=0.725, loss=55.455, backward_time=1.034, grad_norm=126.463, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.024e-05, train_time=2.813
+[gpub001:0/64] 2023-07-15 02:07:30,780 (trainer:732) INFO: 51epoch:train:5301-5400batch: iter_time=1.146e-04, forward_time=0.146, loss_ctc=61.261, loss_att=47.446, acc=0.706, loss=51.590, backward_time=1.030, grad_norm=122.414, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.024e-05, train_time=2.732
+[gpub001:0/64] 2023-07-15 02:09:46,510 (trainer:732) INFO: 51epoch:train:5401-5500batch: iter_time=1.230e-04, forward_time=0.145, loss_ctc=71.938, loss_att=49.005, acc=0.717, loss=55.885, backward_time=1.027, grad_norm=145.363, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.023e-05, train_time=2.714
+[gpub001:0/64] 2023-07-15 02:12:02,889 (trainer:732) INFO: 51epoch:train:5501-5600batch: iter_time=1.216e-04, forward_time=0.146, loss_ctc=80.684, loss_att=65.569, acc=0.713, loss=70.103, backward_time=1.031, grad_norm=154.669, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.023e-05, train_time=2.727
+[gpub001:0/64] 2023-07-15 02:14:18,639 (trainer:732) INFO: 51epoch:train:5601-5700batch: iter_time=1.232e-04, forward_time=0.145, loss_ctc=60.971, loss_att=43.444, acc=0.734, loss=48.702, backward_time=1.028, grad_norm=110.491, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.022e-05, train_time=2.715
+[gpub001:0/64] 2023-07-15 02:16:42,195 (trainer:732) INFO: 51epoch:train:5701-5800batch: iter_time=1.226e-04, forward_time=0.206, loss_ctc=71.178, loss_att=52.973, acc=0.713, loss=58.434, backward_time=1.033, grad_norm=126.966, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=5.022e-05, train_time=2.870
+[gpub001:0/64] 2023-07-15 02:17:48,783 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub001:0/64] 2023-07-15 02:18:06,852 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 02:18:10,346 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbed67fe80>)
+[gpub001:0/64] 2023-07-15 02:18:10,346 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub001:0/64] 2023-07-15 02:18:10,352 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 02:24:23,285 (trainer:732) INFO: 51epoch:train:5801-5900batch: iter_time=3.167, forward_time=0.193, loss_ctc=73.954, loss_att=58.719, acc=0.720, loss=63.289, backward_time=1.046, grad_norm=120.270, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.021e-05, train_time=9.221
+[gpub001:0/64] 2023-07-15 02:26:42,799 (trainer:732) INFO: 51epoch:train:5901-6000batch: iter_time=1.365e-04, forward_time=0.148, loss_ctc=77.878, loss_att=59.660, acc=0.724, loss=65.125, backward_time=1.031, grad_norm=137.273, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.021e-05, train_time=2.791
+[gpub001:0/64] 2023-07-15 02:29:01,054 (trainer:732) INFO: 51epoch:train:6001-6100batch: iter_time=1.376e-04, forward_time=0.147, loss_ctc=71.477, loss_att=57.056, acc=0.717, loss=61.382, backward_time=1.032, grad_norm=132.136, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.020e-05, train_time=2.765
+[gpub001:0/64] 2023-07-15 02:31:22,810 (trainer:732) INFO: 51epoch:train:6101-6200batch: iter_time=1.325e-04, forward_time=0.145, loss_ctc=63.470, loss_att=47.131, acc=0.713, loss=52.033, backward_time=1.043, grad_norm=145.338, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.020e-05, train_time=2.835
+[gpub001:0/64] 2023-07-15 02:33:49,092 (trainer:732) INFO: 51epoch:train:6201-6300batch: iter_time=1.456e-04, forward_time=0.146, loss_ctc=63.174, loss_att=46.054, acc=0.716, loss=51.190, backward_time=1.040, grad_norm=140.173, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.019e-05, train_time=2.925
+[gpub001:0/64] 2023-07-15 02:36:09,198 (trainer:732) INFO: 51epoch:train:6301-6400batch: iter_time=9.504e-05, forward_time=0.145, loss_ctc=81.068, loss_att=60.989, acc=0.711, loss=67.012, backward_time=1.039, grad_norm=146.913, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.019e-05, train_time=2.802
+[gpub001:0/64] 2023-07-15 02:38:28,073 (trainer:732) INFO: 51epoch:train:6401-6500batch: iter_time=9.582e-05, forward_time=0.145, loss_ctc=71.425, loss_att=53.746, acc=0.727, loss=59.050, backward_time=1.032, grad_norm=128.568, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.018e-05, train_time=2.777
+[gpub001:0/64] 2023-07-15 02:40:43,948 (trainer:732) INFO: 51epoch:train:6501-6600batch: iter_time=1.048e-04, forward_time=0.144, loss_ctc=67.003, loss_att=49.566, acc=0.718, loss=54.797, backward_time=1.026, grad_norm=141.281, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.018e-05, train_time=2.717
+[gpub001:0/64] 2023-07-15 02:42:32,538 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub001:0/64] 2023-07-15 02:42:50,762 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 02:42:54,186 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdb1791b4f0>)
+[gpub001:0/64] 2023-07-15 02:42:54,186 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub001:0/64] 2023-07-15 02:42:54,192 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 02:47:36,979 (trainer:732) INFO: 51epoch:train:6601-6700batch: iter_time=1.585, forward_time=0.162, loss_ctc=70.032, loss_att=58.956, acc=0.708, loss=62.279, backward_time=1.037, grad_norm=139.517, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.017e-05, train_time=8.260
+[gpub001:0/64] 2023-07-15 02:49:54,229 (trainer:732) INFO: 51epoch:train:6701-6800batch: iter_time=1.289e-04, forward_time=0.147, loss_ctc=75.545, loss_att=58.495, acc=0.711, loss=63.610, backward_time=1.033, grad_norm=141.681, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.017e-05, train_time=2.745
+[gpub001:0/64] 2023-07-15 02:52:10,761 (trainer:732) INFO: 51epoch:train:6801-6900batch: iter_time=1.144e-04, forward_time=0.145, loss_ctc=74.367, loss_att=61.655, acc=0.708, loss=65.468, backward_time=1.029, grad_norm=128.279, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.016e-05, train_time=2.730
+[gpub001:0/64] 2023-07-15 02:54:26,812 (trainer:732) INFO: 51epoch:train:6901-7000batch: iter_time=1.238e-04, forward_time=0.145, loss_ctc=65.571, loss_att=49.989, acc=0.708, loss=54.664, backward_time=1.028, grad_norm=112.823, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.016e-05, train_time=2.721
+[gpub001:0/64] 2023-07-15 02:56:42,153 (trainer:732) INFO: 51epoch:train:7001-7100batch: iter_time=1.037e-04, forward_time=0.144, loss_ctc=56.492, loss_att=43.028, acc=0.701, loss=47.067, backward_time=1.025, grad_norm=116.572, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.015e-05, train_time=2.707
+[gpub001:0/64] 2023-07-15 02:58:58,134 (trainer:732) INFO: 51epoch:train:7101-7200batch: iter_time=1.173e-04, forward_time=0.145, loss_ctc=84.088, loss_att=59.885, acc=0.716, loss=67.146, backward_time=1.030, grad_norm=159.957, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.015e-05, train_time=2.719
+[gpub001:0/64] 2023-07-15 03:01:13,992 (trainer:732) INFO: 51epoch:train:7201-7300batch: iter_time=1.333e-04, forward_time=0.145, loss_ctc=72.488, loss_att=55.906, acc=0.708, loss=60.881, backward_time=1.029, grad_norm=126.000, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.014e-05, train_time=2.717
+[gpub001:0/64] 2023-07-15 03:03:29,592 (trainer:732) INFO: 51epoch:train:7301-7400batch: iter_time=1.097e-04, forward_time=0.144, loss_ctc=67.910, loss_att=51.640, acc=0.710, loss=56.521, backward_time=1.028, grad_norm=112.939, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.014e-05, train_time=2.712
+[gpub001:0/64] 2023-07-15 03:05:45,812 (trainer:732) INFO: 51epoch:train:7401-7500batch: iter_time=9.587e-05, forward_time=0.146, loss_ctc=69.895, loss_att=58.438, acc=0.703, loss=61.875, backward_time=1.030, grad_norm=112.497, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.013e-05, train_time=2.724
+[gpub001:0/64] 2023-07-15 03:06:00,836 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub001:0/64] 2023-07-15 03:06:19,136 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 03:06:22,592 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdc242174f0>)
+[gpub001:0/64] 2023-07-15 03:06:22,593 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub001:0/64] 2023-07-15 03:06:22,599 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 03:12:16,439 (trainer:732) INFO: 51epoch:train:7501-7600batch: iter_time=2.393, forward_time=0.145, loss_ctc=71.969, loss_att=54.560, acc=0.721, loss=59.782, backward_time=1.046, grad_norm=133.083, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.013e-05, train_time=7.812
+[gpub001:0/64] 2023-07-15 03:14:33,471 (trainer:732) INFO: 51epoch:train:7601-7700batch: iter_time=1.093e-04, forward_time=0.147, loss_ctc=76.969, loss_att=62.660, acc=0.719, loss=66.953, backward_time=1.033, grad_norm=136.541, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.012e-05, train_time=2.740
+[gpub001:0/64] 2023-07-15 03:16:50,003 (trainer:732) INFO: 51epoch:train:7701-7800batch: iter_time=1.146e-04, forward_time=0.146, loss_ctc=67.626, loss_att=48.666, acc=0.727, loss=54.354, backward_time=1.031, grad_norm=124.562, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.012e-05, train_time=2.730
+[gpub001:0/64] 2023-07-15 03:19:06,142 (trainer:732) INFO: 51epoch:train:7801-7900batch: iter_time=1.138e-04, forward_time=0.146, loss_ctc=61.458, loss_att=47.781, acc=0.708, loss=51.884, backward_time=1.029, grad_norm=132.908, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.011e-05, train_time=2.723
+[gpub001:0/64] 2023-07-15 03:21:21,748 (trainer:732) INFO: 51epoch:train:7901-8000batch: iter_time=1.118e-04, forward_time=0.144, loss_ctc=73.377, loss_att=49.099, acc=0.720, loss=56.382, backward_time=1.026, grad_norm=145.668, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.011e-05, train_time=2.712
+[gpub001:0/64] 2023-07-15 03:23:41,893 (trainer:732) INFO: 51epoch:train:8001-8100batch: iter_time=1.181e-04, forward_time=0.146, loss_ctc=80.253, loss_att=65.949, acc=0.710, loss=70.240, backward_time=1.043, grad_norm=118.794, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.010e-05, train_time=2.803
+[gpub001:0/64] 2023-07-15 03:25:57,929 (trainer:732) INFO: 51epoch:train:8101-8200batch: iter_time=1.161e-04, forward_time=0.146, loss_ctc=59.833, loss_att=43.945, acc=0.737, loss=48.712, backward_time=1.028, grad_norm=97.269, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.010e-05, train_time=2.720
+[gpub001:0/64] 2023-07-15 03:28:22,509 (trainer:732) INFO: 51epoch:train:8201-8300batch: iter_time=4.203e-04, forward_time=0.206, loss_ctc=70.476, loss_att=52.382, acc=0.712, loss=57.810, backward_time=1.036, grad_norm=124.276, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.009e-05, train_time=2.891
+[gpub001:0/64] 2023-07-15 03:29:12,660 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub001:0/64] 2023-07-15 03:29:30,661 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 03:29:34,357 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fc8efe53a30>)
+[gpub001:0/64] 2023-07-15 03:29:34,357 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub001:0/64] 2023-07-15 03:29:34,364 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 03:34:24,246 (trainer:732) INFO: 51epoch:train:8301-8400batch: iter_time=1.383, forward_time=0.183, loss_ctc=74.757, loss_att=62.043, acc=0.719, loss=65.857, backward_time=1.043, grad_norm=136.488, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.009e-05, train_time=7.234
+[gpub001:0/64] 2023-07-15 03:36:41,508 (trainer:732) INFO: 51epoch:train:8401-8500batch: iter_time=9.487e-05, forward_time=0.147, loss_ctc=73.152, loss_att=60.272, acc=0.703, loss=64.136, backward_time=1.032, grad_norm=130.904, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.008e-05, train_time=2.745
+[gpub001:0/64] 2023-07-15 03:38:58,154 (trainer:732) INFO: 51epoch:train:8501-8600batch: iter_time=1.035e-04, forward_time=0.145, loss_ctc=66.048, loss_att=49.450, acc=0.717, loss=54.429, backward_time=1.029, grad_norm=115.954, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.008e-05, train_time=2.733
+[gpub001:0/64] 2023-07-15 03:41:13,765 (trainer:732) INFO: 51epoch:train:8601-8700batch: iter_time=9.860e-05, forward_time=0.143, loss_ctc=60.580, loss_att=45.944, acc=0.699, loss=50.334, backward_time=1.024, grad_norm=132.617, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.007e-05, train_time=2.712
+[gpub001:0/64] 2023-07-15 03:43:30,063 (trainer:732) INFO: 51epoch:train:8701-8800batch: iter_time=1.299e-04, forward_time=0.147, loss_ctc=75.928, loss_att=53.765, acc=0.716, loss=60.414, backward_time=1.030, grad_norm=155.068, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.006e-05, train_time=2.726
+[gpub001:0/64] 2023-07-15 03:45:21,567 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub001:0/64] 2023-07-15 03:45:45,981 (trainer:732) INFO: 51epoch:train:8801-8900batch: iter_time=1.266e-04, forward_time=0.147, loss_ctc=77.868, loss_att=61.551, acc=0.707, loss=66.446, backward_time=1.029, grad_norm=134.524, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.006e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 03:48:03,191 (trainer:732) INFO: 51epoch:train:8901-9000batch: iter_time=1.297e-04, forward_time=0.146, loss_ctc=64.524, loss_att=46.573, acc=0.715, loss=51.958, backward_time=1.029, grad_norm=138.340, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.006e-05, train_time=2.744
+[gpub001:0/64] 2023-07-15 03:50:20,560 (trainer:732) INFO: 51epoch:train:9001-9100batch: iter_time=1.038e-04, forward_time=0.146, loss_ctc=70.040, loss_att=54.802, acc=0.706, loss=59.373, backward_time=1.032, grad_norm=151.464, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.005e-05, train_time=2.747
+[gpub001:0/64] 2023-07-15 03:51:53,225 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub001:0/64] 2023-07-15 03:52:11,289 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 03:52:14,696 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbd9ccf490>)
+[gpub001:0/64] 2023-07-15 03:52:14,696 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub001:0/64] 2023-07-15 03:52:14,703 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 03:57:27,451 (trainer:732) INFO: 51epoch:train:9101-9200batch: iter_time=1.306, forward_time=0.144, loss_ctc=65.711, loss_att=50.921, acc=0.714, loss=55.358, backward_time=1.041, grad_norm=118.734, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.005e-05, train_time=8.538
+[gpub001:0/64] 2023-07-15 03:59:49,919 (trainer:732) INFO: 51epoch:train:9201-9300batch: iter_time=1.085e-04, forward_time=0.145, loss_ctc=75.976, loss_att=57.729, acc=0.724, loss=63.203, backward_time=1.042, grad_norm=148.716, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.004e-05, train_time=2.849
+[gpub001:0/64] 2023-07-15 04:02:07,659 (trainer:732) INFO: 51epoch:train:9301-9400batch: iter_time=1.137e-04, forward_time=0.148, loss_ctc=74.558, loss_att=59.518, acc=0.719, loss=64.030, backward_time=1.034, grad_norm=124.685, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.003e-05, train_time=2.755
+[gpub001:0/64] 2023-07-15 04:04:27,052 (trainer:732) INFO: 51epoch:train:9401-9500batch: iter_time=1.244e-04, forward_time=0.146, loss_ctc=64.717, loss_att=49.875, acc=0.718, loss=54.328, backward_time=1.038, grad_norm=132.071, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.003e-05, train_time=2.788
+[gpub001:0/64] 2023-07-15 04:06:45,446 (trainer:732) INFO: 51epoch:train:9501-9600batch: iter_time=9.616e-05, forward_time=0.145, loss_ctc=55.849, loss_att=42.233, acc=0.712, loss=46.317, backward_time=1.030, grad_norm=119.204, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.002e-05, train_time=2.768
+[gpub001:0/64] 2023-07-15 04:09:01,853 (trainer:732) INFO: 51epoch:train:9601-9700batch: iter_time=1.058e-04, forward_time=0.147, loss_ctc=82.299, loss_att=58.433, acc=0.727, loss=65.593, backward_time=1.032, grad_norm=158.815, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.002e-05, train_time=2.728
+[gpub001:0/64] 2023-07-15 04:11:21,471 (trainer:732) INFO: 51epoch:train:9701-9800batch: iter_time=1.045e-04, forward_time=0.146, loss_ctc=73.290, loss_att=56.427, acc=0.712, loss=61.486, backward_time=1.036, grad_norm=130.108, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.001e-05, train_time=2.792
+[gpub001:0/64] 2023-07-15 04:13:37,214 (trainer:732) INFO: 51epoch:train:9801-9900batch: iter_time=1.051e-04, forward_time=0.145, loss_ctc=67.601, loss_att=50.730, acc=0.720, loss=55.791, backward_time=1.028, grad_norm=117.358, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.001e-05, train_time=2.715
+[gpub001:0/64] 2023-07-15 04:15:53,312 (trainer:732) INFO: 51epoch:train:9901-10000batch: iter_time=9.479e-05, forward_time=0.147, loss_ctc=68.884, loss_att=56.769, acc=0.722, loss=60.403, backward_time=1.029, grad_norm=111.830, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.000e-05, train_time=2.722
+[gpub001:0/64] 2023-07-15 04:28:17,825 (trainer:338) INFO: 51epoch results: [train] iter_time=0.271, forward_time=0.150, loss_ctc=70.867, loss_att=54.317, acc=0.712, loss=59.282, backward_time=1.033, grad_norm=130.980, clip=100.000, loss_scale=2.538e+32, optim_step_time=0.182, optim0_lr0=5.025e-05, train_time=3.480, time=4 hours, 50 minutes and 9.54 seconds, total_count=480000, gpu_max_cached_mem_GB=37.635, [valid] loss_ctc=42.872, cer_ctc=0.251, loss_att=38.738, acc=0.678, cer=0.404, wer=0.996, loss=39.978, time=6 minutes and 25.08 seconds, total_count=49082, gpu_max_cached_mem_GB=37.635, [att_plot] time=5 minutes and 49.35 seconds, total_count=0, gpu_max_cached_mem_GB=37.635
+[gpub001:0/64] 2023-07-15 04:28:33,383 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub001:0/64] 2023-07-15 04:28:33,396 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/46epoch.pth
+[gpub001:0/64] 2023-07-15 04:28:33,396 (trainer:272) INFO: 52/60epoch started. Estimated time to finish: 1 day, 20 hours and 51 minutes
+[gpub001:0/64] 2023-07-15 04:28:33,399 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub001:0/64] 2023-07-15 04:28:51,055 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 04:28:54,528 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd5d5770eb0>)
+[gpub001:0/64] 2023-07-15 04:28:54,528 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub001:0/64] 2023-07-15 04:28:54,534 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 04:35:34,681 (trainer:732) INFO: 52epoch:train:1-100batch: iter_time=2.800, forward_time=0.165, loss_ctc=69.950, loss_att=52.193, acc=0.709, loss=57.520, backward_time=1.039, grad_norm=115.030, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.000e-05, train_time=8.425
+[gpub001:0/64] 2023-07-15 04:37:51,420 (trainer:732) INFO: 52epoch:train:101-200batch: iter_time=1.270e-04, forward_time=0.146, loss_ctc=69.058, loss_att=52.313, acc=0.715, loss=57.337, backward_time=1.032, grad_norm=143.837, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.999e-05, train_time=2.735
+[gpub001:0/64] 2023-07-15 04:40:09,108 (trainer:732) INFO: 52epoch:train:201-300batch: iter_time=1.145e-04, forward_time=0.144, loss_ctc=69.512, loss_att=51.083, acc=0.718, loss=56.611, backward_time=1.032, grad_norm=136.816, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.999e-05, train_time=2.754
+[gpub001:0/64] 2023-07-15 04:42:29,211 (trainer:732) INFO: 52epoch:train:301-400batch: iter_time=1.408e-04, forward_time=0.146, loss_ctc=77.821, loss_att=61.168, acc=0.703, loss=66.164, backward_time=1.035, grad_norm=151.115, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.998e-05, train_time=2.802
+[gpub001:0/64] 2023-07-15 04:44:49,281 (trainer:732) INFO: 52epoch:train:401-500batch: iter_time=1.053e-04, forward_time=0.144, loss_ctc=59.969, loss_att=45.427, acc=0.721, loss=49.790, backward_time=1.032, grad_norm=126.820, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.998e-05, train_time=2.801
+[gpub001:0/64] 2023-07-15 04:47:08,640 (trainer:732) INFO: 52epoch:train:501-600batch: iter_time=1.146e-04, forward_time=0.145, loss_ctc=61.323, loss_att=48.328, acc=0.726, loss=52.227, backward_time=1.037, grad_norm=125.310, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.997e-05, train_time=2.787
+[gpub001:0/64] 2023-07-15 04:49:31,134 (trainer:732) INFO: 52epoch:train:601-700batch: iter_time=1.142e-04, forward_time=0.146, loss_ctc=75.112, loss_att=63.431, acc=0.711, loss=66.935, backward_time=1.054, grad_norm=142.319, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.997e-05, train_time=2.850
+[gpub001:0/64] 2023-07-15 04:51:49,592 (trainer:732) INFO: 52epoch:train:701-800batch: iter_time=1.157e-04, forward_time=0.146, loss_ctc=75.882, loss_att=61.266, acc=0.716, loss=65.651, backward_time=1.033, grad_norm=112.293, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.996e-05, train_time=2.769
+[gpub001:0/64] 2023-07-15 04:52:41,328 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub001:0/64] 2023-07-15 04:52:59,420 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 04:53:02,825 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd5d57de0e0>)
+[gpub001:0/64] 2023-07-15 04:53:02,825 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub001:0/64] 2023-07-15 04:53:02,831 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 04:57:23,419 (trainer:732) INFO: 52epoch:train:801-900batch: iter_time=1.338, forward_time=0.237, loss_ctc=68.461, loss_att=51.350, acc=0.709, loss=56.483, backward_time=1.050, grad_norm=146.068, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=4.996e-05, train_time=6.676
+[gpub001:0/64] 2023-07-15 04:59:41,599 (trainer:732) INFO: 52epoch:train:901-1000batch: iter_time=1.176e-04, forward_time=0.148, loss_ctc=69.156, loss_att=58.040, acc=0.702, loss=61.375, backward_time=1.029, grad_norm=139.715, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.996e-05, train_time=2.764
+[gpub001:0/64] 2023-07-15 05:01:57,285 (trainer:732) INFO: 52epoch:train:1001-1100batch: iter_time=1.241e-04, forward_time=0.146, loss_ctc=71.143, loss_att=50.487, acc=0.721, loss=56.684, backward_time=1.029, grad_norm=117.836, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.995e-05, train_time=2.713
+[gpub001:0/64] 2023-07-15 05:04:13,647 (trainer:732) INFO: 52epoch:train:1101-1200batch: iter_time=1.046e-04, forward_time=0.147, loss_ctc=74.174, loss_att=57.692, acc=0.712, loss=62.637, backward_time=1.030, grad_norm=138.130, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.995e-05, train_time=2.727
+[gpub001:0/64] 2023-07-15 05:06:29,733 (trainer:732) INFO: 52epoch:train:1201-1300batch: iter_time=1.114e-04, forward_time=0.147, loss_ctc=57.362, loss_att=42.969, acc=0.723, loss=47.287, backward_time=1.029, grad_norm=118.350, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.994e-05, train_time=2.721
+[gpub001:0/64] 2023-07-15 05:08:45,836 (trainer:732) INFO: 52epoch:train:1301-1400batch: iter_time=1.205e-04, forward_time=0.149, loss_ctc=65.213, loss_att=52.131, acc=0.732, loss=56.055, backward_time=1.030, grad_norm=140.121, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.994e-05, train_time=2.722
+[gpub001:0/64] 2023-07-15 05:11:02,183 (trainer:732) INFO: 52epoch:train:1401-1500batch: iter_time=1.198e-04, forward_time=0.149, loss_ctc=76.107, loss_att=62.021, acc=0.715, loss=66.247, backward_time=1.032, grad_norm=127.656, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.993e-05, train_time=2.727
+[gpub001:0/64] 2023-07-15 05:13:18,216 (trainer:732) INFO: 52epoch:train:1501-1600batch: iter_time=1.083e-04, forward_time=0.148, loss_ctc=66.997, loss_att=54.395, acc=0.727, loss=58.175, backward_time=1.030, grad_norm=117.246, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.993e-05, train_time=2.720
+[gpub001:0/64] 2023-07-15 05:14:59,026 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub001:0/64] 2023-07-15 05:15:17,049 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 05:15:20,571 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fcf328c3d60>)
+[gpub001:0/64] 2023-07-15 05:15:20,571 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub001:0/64] 2023-07-15 05:15:20,578 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 05:22:59,109 (trainer:732) INFO: 52epoch:train:1601-1700batch: iter_time=4.399, forward_time=0.169, loss_ctc=76.666, loss_att=57.348, acc=0.713, loss=63.143, backward_time=1.040, grad_norm=154.498, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.992e-05, train_time=11.617
+[gpub001:0/64] 2023-07-15 05:25:16,028 (trainer:732) INFO: 52epoch:train:1701-1800batch: iter_time=1.181e-04, forward_time=0.146, loss_ctc=68.875, loss_att=53.803, acc=0.711, loss=58.325, backward_time=1.031, grad_norm=128.396, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.992e-05, train_time=2.738
+[gpub001:0/64] 2023-07-15 05:27:31,940 (trainer:732) INFO: 52epoch:train:1801-1900batch: iter_time=1.251e-04, forward_time=0.146, loss_ctc=72.358, loss_att=52.034, acc=0.719, loss=58.132, backward_time=1.028, grad_norm=128.456, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.991e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 05:29:52,478 (trainer:732) INFO: 52epoch:train:1901-2000batch: iter_time=1.215e-04, forward_time=0.147, loss_ctc=74.207, loss_att=56.605, acc=0.713, loss=61.886, backward_time=1.047, grad_norm=146.145, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.991e-05, train_time=2.811
+[gpub001:0/64] 2023-07-15 05:32:13,571 (trainer:732) INFO: 52epoch:train:2001-2100batch: iter_time=1.101e-04, forward_time=0.146, loss_ctc=60.906, loss_att=47.967, acc=0.726, loss=51.849, backward_time=1.035, grad_norm=150.781, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.990e-05, train_time=2.822
+[gpub001:0/64] 2023-07-15 05:34:34,015 (trainer:732) INFO: 52epoch:train:2101-2200batch: iter_time=1.172e-04, forward_time=0.146, loss_ctc=59.010, loss_att=46.764, acc=0.727, loss=50.438, backward_time=1.032, grad_norm=205.576, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.990e-05, train_time=2.809
+[gpub001:0/64] 2023-07-15 05:37:07,034 (trainer:732) INFO: 52epoch:train:2201-2300batch: iter_time=0.005, forward_time=0.207, loss_ctc=72.052, loss_att=58.747, acc=0.714, loss=62.738, backward_time=1.065, grad_norm=153.377, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.212, optim0_lr0=4.989e-05, train_time=3.058
+[gpub001:0/64] 2023-07-15 05:39:24,955 (trainer:732) INFO: 52epoch:train:2301-2400batch: iter_time=1.118e-04, forward_time=0.148, loss_ctc=77.542, loss_att=63.924, acc=0.722, loss=68.010, backward_time=1.031, grad_norm=138.062, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.989e-05, train_time=2.760
+[gpub001:0/64] 2023-07-15 05:41:45,240 (trainer:732) INFO: 52epoch:train:2401-2500batch: iter_time=1.128e-04, forward_time=0.144, loss_ctc=69.658, loss_att=49.632, acc=0.723, loss=55.639, backward_time=1.028, grad_norm=121.430, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.988e-05, train_time=2.805
+[gpub001:0/64] 2023-07-15 05:41:58,122 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub001:0/64] 2023-07-15 05:42:16,233 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 05:42:19,646 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbf6867fa0>)
+[gpub001:0/64] 2023-07-15 05:42:19,646 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub001:0/64] 2023-07-15 05:42:19,652 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 05:47:06,685 (trainer:732) INFO: 52epoch:train:2501-2600batch: iter_time=1.721, forward_time=0.145, loss_ctc=68.445, loss_att=51.045, acc=0.711, loss=56.265, backward_time=1.045, grad_norm=122.585, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.988e-05, train_time=6.429
+[gpub001:0/64] 2023-07-15 05:49:23,184 (trainer:732) INFO: 52epoch:train:2601-2700batch: iter_time=1.130e-04, forward_time=0.147, loss_ctc=68.564, loss_att=53.136, acc=0.713, loss=57.764, backward_time=1.031, grad_norm=117.186, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.987e-05, train_time=2.730
+[gpub001:0/64] 2023-07-15 05:51:38,947 (trainer:732) INFO: 52epoch:train:2701-2800batch: iter_time=1.209e-04, forward_time=0.145, loss_ctc=68.961, loss_att=48.677, acc=0.721, loss=54.762, backward_time=1.027, grad_norm=134.473, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.987e-05, train_time=2.715
+[gpub001:0/64] 2023-07-15 05:53:54,877 (trainer:732) INFO: 52epoch:train:2801-2900batch: iter_time=1.247e-04, forward_time=0.146, loss_ctc=77.652, loss_att=61.174, acc=0.707, loss=66.117, backward_time=1.028, grad_norm=169.753, clip=100.000, loss_scale=3.829e+32, optim_step_time=0.181, optim0_lr0=4.986e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 05:56:10,325 (trainer:732) INFO: 52epoch:train:2901-3000batch: iter_time=1.269e-04, forward_time=0.146, loss_ctc=59.198, loss_att=43.540, acc=0.729, loss=48.237, backward_time=1.026, grad_norm=130.238, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=4.986e-05, train_time=2.709
+[gpub001:0/64] 2023-07-15 05:58:26,241 (trainer:732) INFO: 52epoch:train:3001-3100batch: iter_time=1.275e-04, forward_time=0.146, loss_ctc=61.246, loss_att=47.337, acc=0.732, loss=51.510, backward_time=1.029, grad_norm=113.002, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=4.985e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 06:00:42,516 (trainer:732) INFO: 52epoch:train:3101-3200batch: iter_time=1.293e-04, forward_time=0.147, loss_ctc=73.840, loss_att=62.268, acc=0.715, loss=65.740, backward_time=1.030, grad_norm=132.126, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=4.985e-05, train_time=2.725
+[gpub001:0/64] 2023-07-15 06:02:01,422 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub001:0/64] 2023-07-15 06:02:58,632 (trainer:732) INFO: 52epoch:train:3201-3300batch: iter_time=1.225e-04, forward_time=0.147, loss_ctc=72.796, loss_att=59.618, acc=0.723, loss=63.572, backward_time=1.031, grad_norm=118.588, clip=100.000, loss_scale=5.100e+32, optim_step_time=0.182, optim0_lr0=4.984e-05, train_time=2.722
+[gpub001:0/64] 2023-07-15 06:03:46,609 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub001:0/64] 2023-07-15 06:04:04,946 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 06:04:08,402 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fc8f312f7f0>)
+[gpub001:0/64] 2023-07-15 06:04:08,402 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub001:0/64] 2023-07-15 06:04:08,408 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 06:08:26,592 (trainer:732) INFO: 52epoch:train:3301-3400batch: iter_time=1.265, forward_time=0.146, loss_ctc=67.963, loss_att=50.761, acc=0.709, loss=55.921, backward_time=1.042, grad_norm=138.942, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.984e-05, train_time=6.559
+[gpub001:0/64] 2023-07-15 06:10:43,347 (trainer:732) INFO: 52epoch:train:3401-3500batch: iter_time=1.063e-04, forward_time=0.145, loss_ctc=68.987, loss_att=57.616, acc=0.701, loss=61.027, backward_time=1.030, grad_norm=121.698, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.983e-05, train_time=2.735
+[gpub001:0/64] 2023-07-15 06:12:59,043 (trainer:732) INFO: 52epoch:train:3501-3600batch: iter_time=1.163e-04, forward_time=0.145, loss_ctc=70.640, loss_att=49.445, acc=0.720, loss=55.804, backward_time=1.027, grad_norm=163.147, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.983e-05, train_time=2.714
+[gpub001:0/64] 2023-07-15 06:15:17,290 (trainer:732) INFO: 52epoch:train:3601-3700batch: iter_time=1.213e-04, forward_time=0.146, loss_ctc=72.309, loss_att=57.367, acc=0.701, loss=61.849, backward_time=1.036, grad_norm=192.071, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.982e-05, train_time=2.765
+[gpub001:0/64] 2023-07-15 06:17:32,908 (trainer:732) INFO: 52epoch:train:3701-3800batch: iter_time=1.242e-04, forward_time=0.146, loss_ctc=56.135, loss_att=43.432, acc=0.725, loss=47.242, backward_time=1.027, grad_norm=120.250, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.982e-05, train_time=2.712
+[gpub001:0/64] 2023-07-15 06:19:48,547 (trainer:732) INFO: 52epoch:train:3801-3900batch: iter_time=1.126e-04, forward_time=0.145, loss_ctc=64.984, loss_att=53.140, acc=0.717, loss=56.693, backward_time=1.026, grad_norm=124.613, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.981e-05, train_time=2.713
+[gpub001:0/64] 2023-07-15 06:22:04,604 (trainer:732) INFO: 52epoch:train:3901-4000batch: iter_time=1.046e-04, forward_time=0.146, loss_ctc=75.460, loss_att=63.239, acc=0.704, loss=66.905, backward_time=1.030, grad_norm=124.785, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.981e-05, train_time=2.721
+[gpub001:0/64] 2023-07-15 06:24:20,473 (trainer:732) INFO: 52epoch:train:4001-4100batch: iter_time=1.162e-04, forward_time=0.145, loss_ctc=68.466, loss_att=55.208, acc=0.726, loss=59.186, backward_time=1.028, grad_norm=118.649, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.980e-05, train_time=2.717
+[gpub001:0/64] 2023-07-15 06:25:55,117 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub001:0/64] 2023-07-15 06:26:13,418 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 06:26:16,916 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbf77b74f0>)
+[gpub001:0/64] 2023-07-15 06:26:16,916 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub001:0/64] 2023-07-15 06:26:16,923 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 06:30:23,969 (trainer:732) INFO: 52epoch:train:4101-4200batch: iter_time=1.316, forward_time=0.182, loss_ctc=70.188, loss_att=50.115, acc=0.713, loss=56.137, backward_time=1.038, grad_norm=155.015, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=4.980e-05, train_time=7.268
+[gpub001:0/64] 2023-07-15 06:32:52,217 (trainer:732) INFO: 52epoch:train:4201-4300batch: iter_time=1.130e-04, forward_time=0.147, loss_ctc=69.316, loss_att=51.445, acc=0.717, loss=56.806, backward_time=1.043, grad_norm=137.794, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.979e-05, train_time=2.966
+[gpub001:0/64] 2023-07-15 06:35:08,342 (trainer:732) INFO: 52epoch:train:4301-4400batch: iter_time=1.201e-04, forward_time=0.146, loss_ctc=72.977, loss_att=57.668, acc=0.705, loss=62.261, backward_time=1.029, grad_norm=141.607, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.979e-05, train_time=2.722
+[gpub001:0/64] 2023-07-15 06:37:26,814 (trainer:732) INFO: 52epoch:train:4401-4500batch: iter_time=1.139e-04, forward_time=0.146, loss_ctc=63.643, loss_att=45.419, acc=0.719, loss=50.886, backward_time=1.056, grad_norm=113.299, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.978e-05, train_time=2.769
+[gpub001:0/64] 2023-07-15 06:39:57,698 (trainer:732) INFO: 52epoch:train:4501-4600batch: iter_time=1.134e-04, forward_time=0.147, loss_ctc=75.027, loss_att=58.300, acc=0.701, loss=63.318, backward_time=1.043, grad_norm=180.474, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.978e-05, train_time=3.017
+[gpub001:0/64] 2023-07-15 06:42:13,491 (trainer:732) INFO: 52epoch:train:4601-4700batch: iter_time=1.131e-04, forward_time=0.147, loss_ctc=56.007, loss_att=43.374, acc=0.727, loss=47.164, backward_time=1.027, grad_norm=140.554, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.977e-05, train_time=2.716
+[gpub001:0/64] 2023-07-15 06:44:29,393 (trainer:732) INFO: 52epoch:train:4701-4800batch: iter_time=1.151e-04, forward_time=0.147, loss_ctc=67.817, loss_att=52.699, acc=0.717, loss=57.235, backward_time=1.029, grad_norm=125.462, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.977e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 06:46:45,318 (trainer:732) INFO: 52epoch:train:4801-4900batch: iter_time=1.277e-04, forward_time=0.146, loss_ctc=73.382, loss_att=62.579, acc=0.709, loss=65.820, backward_time=1.029, grad_norm=138.062, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.976e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 06:49:01,133 (trainer:732) INFO: 52epoch:train:4901-5000batch: iter_time=1.282e-04, forward_time=0.148, loss_ctc=67.029, loss_att=51.868, acc=0.723, loss=56.416, backward_time=1.028, grad_norm=126.438, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.976e-05, train_time=2.716
+[gpub001:0/64] 2023-07-15 06:49:16,564 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub001:0/64] 2023-07-15 06:49:34,812 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 06:49:38,218 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdb17d8b4f0>)
+[gpub001:0/64] 2023-07-15 06:49:38,218 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub001:0/64] 2023-07-15 06:49:38,224 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 06:55:32,657 (trainer:732) INFO: 52epoch:train:5001-5100batch: iter_time=2.462, forward_time=0.167, loss_ctc=69.941, loss_att=52.394, acc=0.707, loss=57.658, backward_time=1.039, grad_norm=123.555, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.975e-05, train_time=7.830
+[gpub001:0/64] 2023-07-15 06:57:48,838 (trainer:732) INFO: 52epoch:train:5101-5200batch: iter_time=1.244e-04, forward_time=0.145, loss_ctc=67.509, loss_att=53.022, acc=0.712, loss=57.368, backward_time=1.027, grad_norm=124.915, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.975e-05, train_time=2.723
+[gpub001:0/64] 2023-07-15 07:00:08,648 (trainer:732) INFO: 52epoch:train:5201-5300batch: iter_time=1.262e-04, forward_time=0.145, loss_ctc=66.544, loss_att=46.869, acc=0.728, loss=52.771, backward_time=1.037, grad_norm=123.961, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.974e-05, train_time=2.796
+[gpub001:0/64] 2023-07-15 07:02:28,465 (trainer:732) INFO: 52epoch:train:5301-5400batch: iter_time=1.234e-04, forward_time=0.147, loss_ctc=76.176, loss_att=59.538, acc=0.700, loss=64.529, backward_time=1.041, grad_norm=207.376, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.974e-05, train_time=2.796
+[gpub001:0/64] 2023-07-15 07:04:57,605 (trainer:732) INFO: 52epoch:train:5401-5500batch: iter_time=1.271e-04, forward_time=0.145, loss_ctc=59.021, loss_att=44.631, acc=0.724, loss=48.948, backward_time=1.045, grad_norm=134.157, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.973e-05, train_time=2.983
+[gpub001:0/64] 2023-07-15 07:07:19,312 (trainer:732) INFO: 52epoch:train:5501-5600batch: iter_time=1.223e-04, forward_time=0.145, loss_ctc=60.113, loss_att=46.606, acc=0.727, loss=50.658, backward_time=1.044, grad_norm=107.221, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.973e-05, train_time=2.834
+[gpub001:0/64] 2023-07-15 07:09:42,242 (trainer:732) INFO: 52epoch:train:5601-5700batch: iter_time=1.310e-04, forward_time=0.146, loss_ctc=74.213, loss_att=63.764, acc=0.702, loss=66.899, backward_time=1.047, grad_norm=124.188, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.972e-05, train_time=2.858
+[gpub001:0/64] 2023-07-15 07:11:58,361 (trainer:732) INFO: 52epoch:train:5701-5800batch: iter_time=1.328e-04, forward_time=0.146, loss_ctc=72.924, loss_att=58.764, acc=0.720, loss=63.012, backward_time=1.029, grad_norm=109.159, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.972e-05, train_time=2.722
+[gpub001:0/64] 2023-07-15 07:12:46,518 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub001:0/64] 2023-07-15 07:13:04,771 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 07:13:08,290 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbf64ae800>)
+[gpub001:0/64] 2023-07-15 07:13:08,290 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub001:0/64] 2023-07-15 07:13:08,297 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 07:17:17,109 (trainer:732) INFO: 52epoch:train:5801-5900batch: iter_time=1.320, forward_time=0.194, loss_ctc=67.235, loss_att=50.707, acc=0.707, loss=55.665, backward_time=1.041, grad_norm=126.723, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.971e-05, train_time=6.375
+[gpub001:0/64] 2023-07-15 07:19:34,139 (trainer:732) INFO: 52epoch:train:5901-6000batch: iter_time=1.124e-04, forward_time=0.146, loss_ctc=67.571, loss_att=55.834, acc=0.707, loss=59.355, backward_time=1.028, grad_norm=119.801, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.971e-05, train_time=2.740
+[gpub001:0/64] 2023-07-15 07:21:50,446 (trainer:732) INFO: 52epoch:train:6001-6100batch: iter_time=1.117e-04, forward_time=0.145, loss_ctc=69.750, loss_att=48.735, acc=0.721, loss=55.039, backward_time=1.028, grad_norm=137.401, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.970e-05, train_time=2.726
+[gpub001:0/64] 2023-07-15 07:24:05,901 (trainer:732) INFO: 52epoch:train:6101-6200batch: iter_time=1.260e-04, forward_time=0.143, loss_ctc=74.044, loss_att=56.325, acc=0.704, loss=61.641, backward_time=1.026, grad_norm=136.136, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.970e-05, train_time=2.709
+[gpub001:0/64] 2023-07-15 07:26:21,636 (trainer:732) INFO: 52epoch:train:6201-6300batch: iter_time=1.212e-04, forward_time=0.145, loss_ctc=55.855, loss_att=43.352, acc=0.724, loss=47.103, backward_time=1.028, grad_norm=115.672, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.969e-05, train_time=2.714
+[gpub001:0/64] 2023-07-15 07:28:42,756 (trainer:732) INFO: 52epoch:train:6301-6400batch: iter_time=1.105e-04, forward_time=0.145, loss_ctc=62.969, loss_att=51.098, acc=0.723, loss=54.659, backward_time=1.046, grad_norm=130.164, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.969e-05, train_time=2.822
+[gpub001:0/64] 2023-07-15 07:31:00,939 (trainer:732) INFO: 52epoch:train:6401-6500batch: iter_time=1.146e-04, forward_time=0.147, loss_ctc=75.551, loss_att=63.770, acc=0.701, loss=67.304, backward_time=1.032, grad_norm=129.815, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.968e-05, train_time=2.763
+[gpub001:0/64] 2023-07-15 07:33:28,577 (trainer:732) INFO: 52epoch:train:6501-6600batch: iter_time=1.292e-04, forward_time=0.146, loss_ctc=68.149, loss_att=54.330, acc=0.725, loss=58.476, backward_time=1.036, grad_norm=113.777, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.968e-05, train_time=2.953
+[gpub001:0/64] 2023-07-15 07:35:03,598 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub001:0/64] 2023-07-15 07:35:21,711 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 07:35:25,095 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbf64e77f0>)
+[gpub001:0/64] 2023-07-15 07:35:25,095 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub001:0/64] 2023-07-15 07:35:25,101 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 07:39:15,653 (trainer:732) INFO: 52epoch:train:6601-6700batch: iter_time=1.307, forward_time=0.166, loss_ctc=70.728, loss_att=49.460, acc=0.717, loss=55.840, backward_time=1.039, grad_norm=123.671, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.967e-05, train_time=6.941
+[gpub001:0/64] 2023-07-15 07:41:33,326 (trainer:732) INFO: 52epoch:train:6701-6800batch: iter_time=1.170e-04, forward_time=0.145, loss_ctc=68.471, loss_att=51.019, acc=0.714, loss=56.255, backward_time=1.034, grad_norm=118.156, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.967e-05, train_time=2.754
+[gpub001:0/64] 2023-07-15 07:43:49,511 (trainer:732) INFO: 52epoch:train:6801-6900batch: iter_time=1.138e-04, forward_time=0.145, loss_ctc=72.102, loss_att=56.769, acc=0.710, loss=61.369, backward_time=1.028, grad_norm=153.381, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.966e-05, train_time=2.723
+[gpub001:0/64] 2023-07-15 07:46:08,170 (trainer:732) INFO: 52epoch:train:6901-7000batch: iter_time=1.145e-04, forward_time=0.145, loss_ctc=62.933, loss_att=45.330, acc=0.724, loss=50.611, backward_time=1.039, grad_norm=115.571, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.966e-05, train_time=2.773
+[gpub001:0/64] 2023-07-15 07:48:28,609 (trainer:732) INFO: 52epoch:train:7001-7100batch: iter_time=1.199e-04, forward_time=0.147, loss_ctc=76.028, loss_att=57.030, acc=0.707, loss=62.729, backward_time=1.034, grad_norm=162.178, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.965e-05, train_time=2.809
+[gpub001:0/64] 2023-07-15 07:50:46,730 (trainer:732) INFO: 52epoch:train:7101-7200batch: iter_time=1.191e-04, forward_time=0.146, loss_ctc=56.737, loss_att=43.740, acc=0.729, loss=47.639, backward_time=1.031, grad_norm=117.600, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.965e-05, train_time=2.762
+[gpub001:0/64] 2023-07-15 07:53:15,548 (trainer:732) INFO: 52epoch:train:7201-7300batch: iter_time=1.316e-04, forward_time=0.146, loss_ctc=67.374, loss_att=53.427, acc=0.716, loss=57.611, backward_time=1.037, grad_norm=133.942, clip=100.000, loss_scale=4.608e+32, optim_step_time=0.182, optim0_lr0=4.964e-05, train_time=2.976
+[gpub001:0/64] 2023-07-15 07:55:26,256 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub001:0/64] 2023-07-15 07:55:39,865 (trainer:732) INFO: 52epoch:train:7301-7400batch: iter_time=1.232e-04, forward_time=0.148, loss_ctc=73.022, loss_att=61.882, acc=0.708, loss=65.224, backward_time=1.056, grad_norm=127.732, clip=100.000, loss_scale=6.159e+32, optim_step_time=0.182, optim0_lr0=4.964e-05, train_time=2.886
+[gpub001:0/64] 2023-07-15 07:58:00,543 (trainer:732) INFO: 52epoch:train:7401-7500batch: iter_time=1.102e-04, forward_time=0.146, loss_ctc=65.887, loss_att=51.810, acc=0.721, loss=56.033, backward_time=1.031, grad_norm=112.643, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.963e-05, train_time=2.813
+[gpub001:0/64] 2023-07-15 07:58:11,614 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub001:0/64] 2023-07-15 07:58:29,689 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 07:58:33,138 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fca5d71ffd0>)
+[gpub001:0/64] 2023-07-15 07:58:33,138 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub001:0/64] 2023-07-15 07:58:33,144 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 08:04:38,481 (trainer:732) INFO: 52epoch:train:7501-7600batch: iter_time=2.524, forward_time=0.173, loss_ctc=68.274, loss_att=52.729, acc=0.716, loss=57.393, backward_time=1.043, grad_norm=122.527, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.963e-05, train_time=7.958
+[gpub001:0/64] 2023-07-15 08:06:55,378 (trainer:732) INFO: 52epoch:train:7601-7700batch: iter_time=1.197e-04, forward_time=0.150, loss_ctc=66.701, loss_att=51.740, acc=0.717, loss=56.228, backward_time=1.030, grad_norm=132.874, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.962e-05, train_time=2.738
+[gpub001:0/64] 2023-07-15 08:09:14,728 (trainer:732) INFO: 52epoch:train:7701-7800batch: iter_time=1.194e-04, forward_time=0.163, loss_ctc=66.481, loss_att=47.413, acc=0.730, loss=53.133, backward_time=1.030, grad_norm=123.853, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.201, optim0_lr0=4.962e-05, train_time=2.787
+[gpub001:0/64] 2023-07-15 08:11:09,763 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub001:0/64] 2023-07-15 08:11:31,528 (trainer:732) INFO: 52epoch:train:7801-7900batch: iter_time=1.227e-04, forward_time=0.147, loss_ctc=76.969, loss_att=61.099, acc=0.712, loss=65.860, backward_time=1.030, grad_norm=147.788, clip=100.000, loss_scale=2.980e+32, optim_step_time=0.182, optim0_lr0=4.961e-05, train_time=2.736
+[gpub001:0/64] 2023-07-15 08:13:47,018 (trainer:732) INFO: 52epoch:train:7901-8000batch: iter_time=1.201e-04, forward_time=0.146, loss_ctc=58.050, loss_att=42.970, acc=0.735, loss=47.494, backward_time=1.025, grad_norm=116.421, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.961e-05, train_time=2.710
+[gpub001:0/64] 2023-07-15 08:16:02,954 (trainer:732) INFO: 52epoch:train:8001-8100batch: iter_time=1.129e-04, forward_time=0.146, loss_ctc=60.651, loss_att=47.371, acc=0.734, loss=51.355, backward_time=1.027, grad_norm=123.406, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.960e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 08:18:19,062 (trainer:732) INFO: 52epoch:train:8101-8200batch: iter_time=1.261e-04, forward_time=0.147, loss_ctc=72.709, loss_att=62.133, acc=0.722, loss=65.306, backward_time=1.028, grad_norm=112.891, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.960e-05, train_time=2.722
+[gpub001:0/64] 2023-07-15 08:20:35,779 (trainer:732) INFO: 52epoch:train:8201-8300batch: iter_time=1.170e-04, forward_time=0.146, loss_ctc=72.592, loss_att=59.622, acc=0.726, loss=63.513, backward_time=1.028, grad_norm=112.657, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.960e-05, train_time=2.734
+[gpub001:0/64] 2023-07-15 08:21:28,081 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub001:0/64] 2023-07-15 08:21:46,305 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 08:21:50,018 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fce991aae60>)
+[gpub001:0/64] 2023-07-15 08:21:50,018 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub001:0/64] 2023-07-15 08:21:50,025 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 08:27:03,659 (trainer:732) INFO: 52epoch:train:8301-8400batch: iter_time=1.383, forward_time=0.161, loss_ctc=63.582, loss_att=46.401, acc=0.711, loss=51.555, backward_time=1.043, grad_norm=110.495, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.959e-05, train_time=7.757
+[gpub001:0/64] 2023-07-15 08:29:20,503 (trainer:732) INFO: 52epoch:train:8401-8500batch: iter_time=1.169e-04, forward_time=0.146, loss_ctc=71.706, loss_att=52.790, acc=0.718, loss=58.465, backward_time=1.029, grad_norm=132.484, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.959e-05, train_time=2.737
+[gpub001:0/64] 2023-07-15 08:31:36,508 (trainer:732) INFO: 52epoch:train:8501-8600batch: iter_time=1.157e-04, forward_time=0.146, loss_ctc=69.582, loss_att=53.043, acc=0.721, loss=58.005, backward_time=1.030, grad_norm=129.666, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.958e-05, train_time=2.720
+[gpub001:0/64] 2023-07-15 08:33:53,300 (trainer:732) INFO: 52epoch:train:8601-8700batch: iter_time=1.138e-04, forward_time=0.146, loss_ctc=63.441, loss_att=46.306, acc=0.719, loss=51.447, backward_time=1.030, grad_norm=134.435, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.958e-05, train_time=2.736
+[gpub001:0/64] 2023-07-15 08:36:12,547 (trainer:732) INFO: 52epoch:train:8701-8800batch: iter_time=1.142e-04, forward_time=0.146, loss_ctc=72.367, loss_att=57.240, acc=0.706, loss=61.778, backward_time=1.040, grad_norm=144.562, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.957e-05, train_time=2.785
+[gpub001:0/64] 2023-07-15 08:38:28,158 (trainer:732) INFO: 52epoch:train:8801-8900batch: iter_time=1.144e-04, forward_time=0.146, loss_ctc=54.536, loss_att=41.153, acc=0.739, loss=45.168, backward_time=1.028, grad_norm=114.551, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.957e-05, train_time=2.712
+[gpub001:0/64] 2023-07-15 08:40:58,526 (trainer:732) INFO: 52epoch:train:8901-9000batch: iter_time=1.149e-04, forward_time=0.147, loss_ctc=69.535, loss_att=56.601, acc=0.709, loss=60.481, backward_time=1.044, grad_norm=139.440, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.956e-05, train_time=3.007
+[gpub001:0/64] 2023-07-15 08:43:14,542 (trainer:732) INFO: 52epoch:train:9001-9100batch: iter_time=1.258e-04, forward_time=0.146, loss_ctc=76.360, loss_att=63.389, acc=0.715, loss=67.281, backward_time=1.029, grad_norm=114.369, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.956e-05, train_time=2.720
+[gpub001:0/64] 2023-07-15 08:44:48,439 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub001:0/64] 2023-07-15 08:45:06,514 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 08:45:09,993 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fca5d708940>)
+[gpub001:0/64] 2023-07-15 08:45:09,993 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub001:0/64] 2023-07-15 08:45:09,999 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 08:50:38,705 (trainer:732) INFO: 52epoch:train:9101-9200batch: iter_time=1.386, forward_time=0.204, loss_ctc=64.693, loss_att=46.910, acc=0.724, loss=52.245, backward_time=1.042, grad_norm=132.581, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.186, optim0_lr0=4.955e-05, train_time=8.882
+[gpub001:0/64] 2023-07-15 08:52:56,341 (trainer:732) INFO: 52epoch:train:9201-9300batch: iter_time=1.218e-04, forward_time=0.149, loss_ctc=67.583, loss_att=50.815, acc=0.725, loss=55.846, backward_time=1.032, grad_norm=120.075, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.955e-05, train_time=2.753
+[gpub001:0/64] 2023-07-15 08:55:13,695 (trainer:732) INFO: 52epoch:train:9301-9400batch: iter_time=1.130e-04, forward_time=0.149, loss_ctc=72.507, loss_att=57.099, acc=0.717, loss=61.721, backward_time=1.030, grad_norm=148.100, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.954e-05, train_time=2.747
+[gpub001:0/64] 2023-07-15 08:57:29,816 (trainer:732) INFO: 52epoch:train:9401-9500batch: iter_time=1.235e-04, forward_time=0.147, loss_ctc=61.723, loss_att=44.944, acc=0.730, loss=49.978, backward_time=1.028, grad_norm=107.006, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.954e-05, train_time=2.722
+[gpub001:0/64] 2023-07-15 08:59:46,404 (trainer:732) INFO: 52epoch:train:9501-9600batch: iter_time=1.182e-04, forward_time=0.147, loss_ctc=73.882, loss_att=57.515, acc=0.715, loss=62.425, backward_time=1.031, grad_norm=134.396, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.953e-05, train_time=2.732
+[gpub001:0/64] 2023-07-15 09:02:02,343 (trainer:732) INFO: 52epoch:train:9601-9700batch: iter_time=1.242e-04, forward_time=0.145, loss_ctc=57.834, loss_att=43.892, acc=0.733, loss=48.075, backward_time=1.027, grad_norm=105.315, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.953e-05, train_time=2.719
+[gpub001:0/64] 2023-07-15 09:04:18,628 (trainer:732) INFO: 52epoch:train:9701-9800batch: iter_time=1.178e-04, forward_time=0.145, loss_ctc=69.046, loss_att=55.521, acc=0.724, loss=59.578, backward_time=1.028, grad_norm=134.539, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.952e-05, train_time=2.725
+[gpub001:0/64] 2023-07-15 09:06:40,864 (trainer:732) INFO: 52epoch:train:9801-9900batch: iter_time=1.086e-04, forward_time=0.146, loss_ctc=71.846, loss_att=59.131, acc=0.721, loss=62.945, backward_time=1.034, grad_norm=118.400, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.952e-05, train_time=2.844
+[gpub001:0/64] 2023-07-15 09:08:57,505 (trainer:732) INFO: 52epoch:train:9901-10000batch: iter_time=1.061e-04, forward_time=0.148, loss_ctc=67.461, loss_att=52.187, acc=0.727, loss=56.769, backward_time=1.031, grad_norm=132.078, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.951e-05, train_time=2.733
+[gpub001:0/64] 2023-07-15 09:23:19,047 (trainer:338) INFO: 52epoch results: [train] iter_time=0.232, forward_time=0.151, loss_ctc=68.405, loss_att=53.118, acc=0.717, loss=57.704, backward_time=1.034, grad_norm=132.000, clip=100.000, loss_scale=3.065e+32, optim_step_time=0.182, optim0_lr0=4.975e-05, train_time=3.365, time=4 hours, 40 minutes and 41.9 seconds, total_count=490000, gpu_max_cached_mem_GB=37.635, [valid] loss_ctc=43.162, cer_ctc=0.252, loss_att=38.727, acc=0.677, cer=0.407, wer=0.996, loss=40.058, time=8 minutes and 9.5 seconds, total_count=50094, gpu_max_cached_mem_GB=37.635, [att_plot] time=5 minutes and 54.24 seconds, total_count=0, gpu_max_cached_mem_GB=37.635
+[gpub001:0/64] 2023-07-15 09:23:36,296 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub001:0/64] 2023-07-15 09:23:36,354 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/47epoch.pth
+[gpub001:0/64] 2023-07-15 09:23:36,354 (trainer:272) INFO: 53/60epoch started. Estimated time to finish: 1 day, 15 hours and 44 minutes
+[gpub001:0/64] 2023-07-15 09:23:37,760 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub001:0/64] 2023-07-15 09:23:55,704 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 09:23:59,027 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd5d5770eb0>)
+[gpub001:0/64] 2023-07-15 09:23:59,027 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub001:0/64] 2023-07-15 09:23:59,041 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 09:31:30,451 (trainer:732) INFO: 53epoch:train:1-100batch: iter_time=3.311, forward_time=0.176, loss_ctc=77.249, loss_att=58.785, acc=0.708, loss=64.324, backward_time=1.043, grad_norm=155.630, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.185, optim0_lr0=4.951e-05, train_time=9.474
+[gpub001:0/64] 2023-07-15 09:33:55,969 (trainer:732) INFO: 53epoch:train:101-200batch: iter_time=1.288e-04, forward_time=0.189, loss_ctc=71.700, loss_att=52.162, acc=0.707, loss=58.023, backward_time=1.035, grad_norm=122.394, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=4.950e-05, train_time=2.911
+[gpub001:0/64] 2023-07-15 09:36:26,554 (trainer:732) INFO: 53epoch:train:201-300batch: iter_time=0.001, forward_time=0.236, loss_ctc=66.496, loss_att=46.521, acc=0.739, loss=52.513, backward_time=1.048, grad_norm=117.834, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.186, optim0_lr0=4.950e-05, train_time=3.011
+[gpub001:0/64] 2023-07-15 09:39:01,453 (trainer:732) INFO: 53epoch:train:301-400batch: iter_time=9.101e-04, forward_time=0.287, loss_ctc=75.211, loss_att=58.135, acc=0.711, loss=63.258, backward_time=1.053, grad_norm=108.586, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.190, optim0_lr0=4.949e-05, train_time=3.098
+[gpub001:0/64] 2023-07-15 09:41:30,212 (trainer:732) INFO: 53epoch:train:401-500batch: iter_time=3.592e-04, forward_time=0.238, loss_ctc=68.220, loss_att=49.582, acc=0.727, loss=55.173, backward_time=1.045, grad_norm=122.577, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.189, optim0_lr0=4.949e-05, train_time=2.975
+[gpub001:0/64] 2023-07-15 09:43:59,845 (trainer:732) INFO: 53epoch:train:501-600batch: iter_time=0.003, forward_time=0.238, loss_ctc=65.657, loss_att=47.631, acc=0.729, loss=53.039, backward_time=1.049, grad_norm=152.877, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.188, optim0_lr0=4.948e-05, train_time=2.992
+[gpub001:0/64] 2023-07-15 09:46:31,374 (trainer:732) INFO: 53epoch:train:601-700batch: iter_time=1.153e-04, forward_time=0.177, loss_ctc=72.494, loss_att=54.683, acc=0.710, loss=60.026, backward_time=1.055, grad_norm=148.168, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.188, optim0_lr0=4.948e-05, train_time=3.031
+[gpub001:0/64] 2023-07-15 09:49:00,984 (trainer:732) INFO: 53epoch:train:701-800batch: iter_time=6.123e-04, forward_time=0.231, loss_ctc=65.196, loss_att=47.527, acc=0.726, loss=52.828, backward_time=1.042, grad_norm=130.075, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.186, optim0_lr0=4.947e-05, train_time=2.992
+[gpub001:0/64] 2023-07-15 09:49:58,288 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub001:0/64] 2023-07-15 09:50:16,392 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 09:50:19,748 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fca783d7f70>)
+[gpub001:0/64] 2023-07-15 09:50:19,748 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub001:0/64] 2023-07-15 09:50:19,754 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 09:58:24,759 (trainer:732) INFO: 53epoch:train:801-900batch: iter_time=4.199, forward_time=0.197, loss_ctc=70.653, loss_att=50.295, acc=0.719, loss=56.402, backward_time=1.041, grad_norm=118.018, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.947e-05, train_time=11.275
+[gpub001:0/64] 2023-07-15 10:00:41,776 (trainer:732) INFO: 53epoch:train:901-1000batch: iter_time=1.306e-04, forward_time=0.151, loss_ctc=77.224, loss_att=55.334, acc=0.705, loss=61.901, backward_time=1.032, grad_norm=132.329, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.946e-05, train_time=2.740
+[gpub001:0/64] 2023-07-15 10:02:57,402 (trainer:732) INFO: 53epoch:train:1001-1100batch: iter_time=1.317e-04, forward_time=0.148, loss_ctc=64.572, loss_att=46.014, acc=0.728, loss=51.581, backward_time=1.027, grad_norm=123.422, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.946e-05, train_time=2.712
+[gpub001:0/64] 2023-07-15 10:05:13,259 (trainer:732) INFO: 53epoch:train:1101-1200batch: iter_time=1.391e-04, forward_time=0.148, loss_ctc=73.301, loss_att=55.843, acc=0.715, loss=61.080, backward_time=1.029, grad_norm=135.140, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.945e-05, train_time=2.717
+[gpub001:0/64] 2023-07-15 10:07:28,967 (trainer:732) INFO: 53epoch:train:1201-1300batch: iter_time=1.231e-04, forward_time=0.147, loss_ctc=63.739, loss_att=47.917, acc=0.729, loss=52.664, backward_time=1.028, grad_norm=114.091, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.945e-05, train_time=2.714
+[gpub001:0/64] 2023-07-15 10:09:44,862 (trainer:732) INFO: 53epoch:train:1301-1400batch: iter_time=1.252e-04, forward_time=0.149, loss_ctc=67.179, loss_att=47.097, acc=0.730, loss=53.122, backward_time=1.028, grad_norm=116.673, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.944e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 10:12:00,515 (trainer:732) INFO: 53epoch:train:1401-1500batch: iter_time=1.255e-04, forward_time=0.148, loss_ctc=68.130, loss_att=49.836, acc=0.714, loss=55.324, backward_time=1.027, grad_norm=129.206, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.944e-05, train_time=2.713
+[gpub001:0/64] 2023-07-15 10:14:17,608 (trainer:732) INFO: 53epoch:train:1501-1600batch: iter_time=1.335e-04, forward_time=0.147, loss_ctc=67.907, loss_att=53.286, acc=0.708, loss=57.672, backward_time=1.028, grad_norm=135.533, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.943e-05, train_time=2.742
+[gpub001:0/64] 2023-07-15 10:15:56,480 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub001:0/64] 2023-07-15 10:16:14,877 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 10:16:18,381 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd11e397d30>)
+[gpub001:0/64] 2023-07-15 10:16:18,381 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub001:0/64] 2023-07-15 10:16:18,387 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 10:20:43,071 (trainer:732) INFO: 53epoch:train:1601-1700batch: iter_time=1.340, forward_time=0.148, loss_ctc=70.666, loss_att=56.816, acc=0.715, loss=60.971, backward_time=1.036, grad_norm=162.033, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.943e-05, train_time=7.709
+[gpub001:0/64] 2023-07-15 10:22:59,760 (trainer:732) INFO: 53epoch:train:1701-1800batch: iter_time=1.258e-04, forward_time=0.147, loss_ctc=71.652, loss_att=52.533, acc=0.712, loss=58.269, backward_time=1.033, grad_norm=141.266, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.943e-05, train_time=2.734
+[gpub001:0/64] 2023-07-15 10:25:15,620 (trainer:732) INFO: 53epoch:train:1801-1900batch: iter_time=1.295e-04, forward_time=0.145, loss_ctc=76.945, loss_att=54.785, acc=0.701, loss=61.433, backward_time=1.028, grad_norm=120.255, clip=100.000, loss_scale=1.882e+32, optim_step_time=0.182, optim0_lr0=4.942e-05, train_time=2.717
+[gpub001:0/64] 2023-07-15 10:27:32,921 (trainer:732) INFO: 53epoch:train:1901-2000batch: iter_time=1.302e-04, forward_time=0.146, loss_ctc=65.557, loss_att=44.420, acc=0.733, loss=50.761, backward_time=1.026, grad_norm=170.277, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.942e-05, train_time=2.746
+[gpub001:0/64] 2023-07-15 10:29:54,896 (trainer:732) INFO: 53epoch:train:2001-2100batch: iter_time=1.297e-04, forward_time=0.146, loss_ctc=78.932, loss_att=60.005, acc=0.708, loss=65.683, backward_time=1.032, grad_norm=154.538, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.941e-05, train_time=2.839
+[gpub001:0/64] 2023-07-15 10:32:19,721 (trainer:732) INFO: 53epoch:train:2101-2200batch: iter_time=1.312e-04, forward_time=0.146, loss_ctc=65.440, loss_att=48.548, acc=0.725, loss=53.615, backward_time=1.039, grad_norm=135.394, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.941e-05, train_time=2.896
+[gpub001:0/64] 2023-07-15 10:34:37,291 (trainer:732) INFO: 53epoch:train:2201-2300batch: iter_time=1.376e-04, forward_time=0.147, loss_ctc=63.624, loss_att=44.869, acc=0.738, loss=50.495, backward_time=1.031, grad_norm=144.749, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.940e-05, train_time=2.751
+[gpub001:0/64] 2023-07-15 10:36:56,681 (trainer:732) INFO: 53epoch:train:2301-2400batch: iter_time=1.248e-04, forward_time=0.145, loss_ctc=71.643, loss_att=51.467, acc=0.712, loss=57.520, backward_time=1.028, grad_norm=180.650, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.940e-05, train_time=2.788
+[gpub001:0/64] 2023-07-15 10:39:14,003 (trainer:732) INFO: 53epoch:train:2401-2500batch: iter_time=1.187e-04, forward_time=0.146, loss_ctc=64.355, loss_att=47.519, acc=0.719, loss=52.570, backward_time=1.029, grad_norm=114.242, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.939e-05, train_time=2.746
+[gpub001:0/64] 2023-07-15 10:39:17,582 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub001:0/64] 2023-07-15 10:39:35,923 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 10:39:39,455 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fca783f46a0>)
+[gpub001:0/64] 2023-07-15 10:39:39,455 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub001:0/64] 2023-07-15 10:39:39,461 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 10:44:46,978 (trainer:732) INFO: 53epoch:train:2501-2600batch: iter_time=1.885, forward_time=0.176, loss_ctc=73.411, loss_att=53.483, acc=0.721, loss=59.461, backward_time=1.044, grad_norm=123.712, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.939e-05, train_time=6.659
+[gpub001:0/64] 2023-07-15 10:47:15,365 (trainer:732) INFO: 53epoch:train:2601-2700batch: iter_time=1.000e-04, forward_time=0.145, loss_ctc=75.773, loss_att=56.053, acc=0.699, loss=61.969, backward_time=1.046, grad_norm=116.565, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.938e-05, train_time=2.968
+[gpub001:0/64] 2023-07-15 10:49:45,331 (trainer:732) INFO: 53epoch:train:2701-2800batch: iter_time=1.085e-04, forward_time=0.145, loss_ctc=65.403, loss_att=45.525, acc=0.728, loss=51.488, backward_time=1.046, grad_norm=116.454, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.938e-05, train_time=2.999
+[gpub001:0/64] 2023-07-15 10:52:05,173 (trainer:732) INFO: 53epoch:train:2801-2900batch: iter_time=1.075e-04, forward_time=0.144, loss_ctc=73.294, loss_att=53.142, acc=0.721, loss=59.188, backward_time=1.038, grad_norm=148.478, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.937e-05, train_time=2.797
+[gpub001:0/64] 2023-07-15 10:54:39,743 (trainer:732) INFO: 53epoch:train:2901-3000batch: iter_time=1.066e-04, forward_time=0.144, loss_ctc=67.600, loss_att=51.971, acc=0.725, loss=56.660, backward_time=1.042, grad_norm=138.355, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.937e-05, train_time=3.091
+[gpub001:0/64] 2023-07-15 10:56:58,255 (trainer:732) INFO: 53epoch:train:3001-3100batch: iter_time=1.071e-04, forward_time=0.144, loss_ctc=65.833, loss_att=46.073, acc=0.732, loss=52.001, backward_time=1.031, grad_norm=112.000, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.936e-05, train_time=2.770
+[gpub001:0/64] 2023-07-15 10:59:19,483 (trainer:732) INFO: 53epoch:train:3101-3200batch: iter_time=1.012e-04, forward_time=0.145, loss_ctc=66.802, loss_att=49.191, acc=0.717, loss=54.475, backward_time=1.039, grad_norm=159.228, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.936e-05, train_time=2.824
+[gpub001:0/64] 2023-07-15 11:00:27,528 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub001:0/64] 2023-07-15 11:01:42,885 (trainer:732) INFO: 53epoch:train:3201-3300batch: iter_time=1.020e-04, forward_time=0.145, loss_ctc=69.338, loss_att=52.658, acc=0.720, loss=57.662, backward_time=1.040, grad_norm=112.301, clip=100.000, loss_scale=2.351e+32, optim_step_time=0.182, optim0_lr0=4.935e-05, train_time=2.868
+[gpub001:0/64] 2023-07-15 11:02:40,035 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub001:0/64] 2023-07-15 11:02:58,063 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 11:03:01,531 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd5bf5bb4f0>)
+[gpub001:0/64] 2023-07-15 11:03:01,531 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub001:0/64] 2023-07-15 11:03:01,537 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 11:07:57,739 (trainer:732) INFO: 53epoch:train:3301-3400batch: iter_time=2.006, forward_time=0.145, loss_ctc=69.350, loss_att=48.327, acc=0.728, loss=54.634, backward_time=1.039, grad_norm=118.392, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.935e-05, train_time=7.497
+[gpub001:0/64] 2023-07-15 11:10:14,439 (trainer:732) INFO: 53epoch:train:3401-3500batch: iter_time=1.282e-04, forward_time=0.147, loss_ctc=77.528, loss_att=60.141, acc=0.707, loss=65.357, backward_time=1.032, grad_norm=137.504, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.934e-05, train_time=2.734
+[gpub001:0/64] 2023-07-15 11:12:30,025 (trainer:732) INFO: 53epoch:train:3501-3600batch: iter_time=1.138e-04, forward_time=0.145, loss_ctc=67.358, loss_att=47.997, acc=0.719, loss=53.805, backward_time=1.026, grad_norm=117.273, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.934e-05, train_time=2.711
+[gpub001:0/64] 2023-07-15 11:14:48,416 (trainer:732) INFO: 53epoch:train:3601-3700batch: iter_time=1.272e-04, forward_time=0.146, loss_ctc=73.623, loss_att=53.854, acc=0.738, loss=59.785, backward_time=1.029, grad_norm=143.444, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.933e-05, train_time=2.768
+[gpub001:0/64] 2023-07-15 11:17:21,226 (trainer:732) INFO: 53epoch:train:3701-3800batch: iter_time=1.341e-04, forward_time=0.146, loss_ctc=70.255, loss_att=50.718, acc=0.718, loss=56.579, backward_time=1.047, grad_norm=167.446, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.933e-05, train_time=3.056
+[gpub001:0/64] 2023-07-15 11:19:38,219 (trainer:732) INFO: 53epoch:train:3801-3900batch: iter_time=1.312e-04, forward_time=0.148, loss_ctc=65.971, loss_att=48.571, acc=0.738, loss=53.791, backward_time=1.029, grad_norm=126.572, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.187, optim0_lr0=4.932e-05, train_time=2.740
+[gpub001:0/64] 2023-07-15 11:21:58,735 (trainer:732) INFO: 53epoch:train:3901-4000batch: iter_time=1.312e-04, forward_time=0.147, loss_ctc=63.471, loss_att=47.210, acc=0.729, loss=52.089, backward_time=1.030, grad_norm=141.710, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.932e-05, train_time=2.810
+[gpub001:0/64] 2023-07-15 11:24:16,739 (trainer:732) INFO: 53epoch:train:4001-4100batch: iter_time=1.362e-04, forward_time=0.148, loss_ctc=71.783, loss_att=52.931, acc=0.721, loss=58.586, backward_time=1.028, grad_norm=117.704, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.931e-05, train_time=2.760
+[gpub001:0/64] 2023-07-15 11:25:58,082 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub001:0/64] 2023-07-15 11:26:16,209 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 11:26:19,575 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbd9d1e7a0>)
+[gpub001:0/64] 2023-07-15 11:26:19,575 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub001:0/64] 2023-07-15 11:26:19,581 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 11:31:12,862 (trainer:732) INFO: 53epoch:train:4101-4200batch: iter_time=1.396, forward_time=0.174, loss_ctc=67.126, loss_att=50.278, acc=0.730, loss=55.332, backward_time=1.038, grad_norm=122.884, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.931e-05, train_time=8.322
+[gpub001:0/64] 2023-07-15 11:33:32,008 (trainer:732) INFO: 53epoch:train:4201-4300batch: iter_time=1.106e-04, forward_time=0.146, loss_ctc=72.073, loss_att=52.426, acc=0.717, loss=58.320, backward_time=1.031, grad_norm=122.383, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.931e-05, train_time=2.783
+[gpub001:0/64] 2023-07-15 11:35:47,563 (trainer:732) INFO: 53epoch:train:4301-4400batch: iter_time=9.438e-05, forward_time=0.144, loss_ctc=75.067, loss_att=52.917, acc=0.710, loss=59.562, backward_time=1.028, grad_norm=140.286, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.930e-05, train_time=2.711
+[gpub001:0/64] 2023-07-15 11:38:03,264 (trainer:732) INFO: 53epoch:train:4401-4500batch: iter_time=1.051e-04, forward_time=0.146, loss_ctc=63.278, loss_att=44.313, acc=0.735, loss=50.003, backward_time=1.027, grad_norm=155.846, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.930e-05, train_time=2.714
+[gpub001:0/64] 2023-07-15 11:40:20,170 (trainer:732) INFO: 53epoch:train:4501-4600batch: iter_time=1.002e-04, forward_time=0.145, loss_ctc=75.680, loss_att=58.305, acc=0.715, loss=63.517, backward_time=1.032, grad_norm=150.291, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.929e-05, train_time=2.738
+[gpub001:0/64] 2023-07-15 11:42:36,442 (trainer:732) INFO: 53epoch:train:4601-4700batch: iter_time=1.032e-04, forward_time=0.144, loss_ctc=65.197, loss_att=48.186, acc=0.727, loss=53.289, backward_time=1.027, grad_norm=138.175, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.929e-05, train_time=2.725
+[gpub001:0/64] 2023-07-15 11:44:54,859 (trainer:732) INFO: 53epoch:train:4701-4800batch: iter_time=9.797e-05, forward_time=0.145, loss_ctc=64.764, loss_att=45.735, acc=0.737, loss=51.444, backward_time=1.029, grad_norm=220.036, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.928e-05, train_time=2.768
+[gpub001:0/64] 2023-07-15 11:47:16,838 (trainer:732) INFO: 53epoch:train:4801-4900batch: iter_time=1.102e-04, forward_time=0.146, loss_ctc=70.186, loss_att=49.914, acc=0.714, loss=55.995, backward_time=1.034, grad_norm=126.685, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.928e-05, train_time=2.839
+[gpub001:0/64] 2023-07-15 11:49:35,518 (trainer:732) INFO: 53epoch:train:4901-5000batch: iter_time=1.101e-04, forward_time=0.144, loss_ctc=63.751, loss_att=47.279, acc=0.720, loss=52.221, backward_time=1.036, grad_norm=128.634, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.927e-05, train_time=2.773
+[gpub001:0/64] 2023-07-15 11:49:40,092 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub001:0/64] 2023-07-15 11:49:58,371 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 11:50:01,785 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fce991a4d30>)
+[gpub001:0/64] 2023-07-15 11:50:01,785 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub001:0/64] 2023-07-15 11:50:01,791 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 11:56:06,405 (trainer:732) INFO: 53epoch:train:5001-5100batch: iter_time=1.688, forward_time=0.158, loss_ctc=76.559, loss_att=57.521, acc=0.703, loss=63.233, backward_time=1.040, grad_norm=125.506, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.927e-05, train_time=7.818
+[gpub001:0/64] 2023-07-15 11:58:22,014 (trainer:732) INFO: 53epoch:train:5101-5200batch: iter_time=1.050e-04, forward_time=0.145, loss_ctc=69.719, loss_att=48.716, acc=0.715, loss=55.017, backward_time=1.026, grad_norm=133.259, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.926e-05, train_time=2.712
+[gpub001:0/64] 2023-07-15 12:00:37,642 (trainer:732) INFO: 53epoch:train:5201-5300batch: iter_time=1.061e-04, forward_time=0.145, loss_ctc=66.930, loss_att=46.500, acc=0.734, loss=52.629, backward_time=1.027, grad_norm=125.915, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.926e-05, train_time=2.712
+[gpub001:0/64] 2023-07-15 12:02:53,478 (trainer:732) INFO: 53epoch:train:5301-5400batch: iter_time=1.105e-04, forward_time=0.146, loss_ctc=73.301, loss_att=55.694, acc=0.715, loss=60.976, backward_time=1.028, grad_norm=137.805, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.925e-05, train_time=2.717
+[gpub001:0/64] 2023-07-15 12:05:09,897 (trainer:732) INFO: 53epoch:train:5401-5500batch: iter_time=1.072e-04, forward_time=0.146, loss_ctc=67.056, loss_att=49.311, acc=0.723, loss=54.635, backward_time=1.028, grad_norm=120.485, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.925e-05, train_time=2.728
+[gpub001:0/64] 2023-07-15 12:07:26,024 (trainer:732) INFO: 53epoch:train:5501-5600batch: iter_time=1.192e-04, forward_time=0.147, loss_ctc=63.690, loss_att=46.252, acc=0.732, loss=51.484, backward_time=1.029, grad_norm=139.681, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.924e-05, train_time=2.722
+[gpub001:0/64] 2023-07-15 12:09:41,948 (trainer:732) INFO: 53epoch:train:5601-5700batch: iter_time=1.552e-04, forward_time=0.147, loss_ctc=70.104, loss_att=52.001, acc=0.712, loss=57.432, backward_time=1.028, grad_norm=107.690, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.924e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 12:11:58,251 (trainer:732) INFO: 53epoch:train:5701-5800batch: iter_time=1.338e-04, forward_time=0.148, loss_ctc=63.833, loss_att=47.985, acc=0.723, loss=52.739, backward_time=1.029, grad_norm=129.181, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.923e-05, train_time=2.726
+[gpub001:0/64] 2023-07-15 12:12:53,156 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub001:0/64] 2023-07-15 12:13:10,963 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 12:13:14,420 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd0c8c9caf0>)
+[gpub001:0/64] 2023-07-15 12:13:14,420 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub001:0/64] 2023-07-15 12:13:14,439 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 12:18:21,859 (trainer:732) INFO: 53epoch:train:5801-5900batch: iter_time=1.831, forward_time=0.146, loss_ctc=71.893, loss_att=49.578, acc=0.732, loss=56.273, backward_time=1.038, grad_norm=140.020, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.923e-05, train_time=7.672
+[gpub001:0/64] 2023-07-15 12:20:39,651 (trainer:732) INFO: 53epoch:train:5901-6000batch: iter_time=1.334e-04, forward_time=0.153, loss_ctc=77.718, loss_att=60.620, acc=0.708, loss=65.749, backward_time=1.031, grad_norm=115.613, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.922e-05, train_time=2.756
+[gpub001:0/64] 2023-07-15 12:22:55,460 (trainer:732) INFO: 53epoch:train:6001-6100batch: iter_time=1.329e-04, forward_time=0.147, loss_ctc=66.395, loss_att=47.215, acc=0.723, loss=52.969, backward_time=1.028, grad_norm=119.839, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.922e-05, train_time=2.716
+[gpub001:0/64] 2023-07-15 12:25:14,502 (trainer:732) INFO: 53epoch:train:6101-6200batch: iter_time=0.003, forward_time=0.146, loss_ctc=73.384, loss_att=54.398, acc=0.735, loss=60.094, backward_time=1.038, grad_norm=134.904, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.921e-05, train_time=2.781
+[gpub001:0/64] 2023-07-15 12:27:31,260 (trainer:732) INFO: 53epoch:train:6201-6300batch: iter_time=1.497e-04, forward_time=0.148, loss_ctc=68.480, loss_att=49.380, acc=0.721, loss=55.110, backward_time=1.029, grad_norm=133.346, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.921e-05, train_time=2.735
+[gpub001:0/64] 2023-07-15 12:29:47,555 (trainer:732) INFO: 53epoch:train:6301-6400batch: iter_time=9.751e-05, forward_time=0.147, loss_ctc=64.980, loss_att=48.650, acc=0.736, loss=53.549, backward_time=1.029, grad_norm=109.673, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.920e-05, train_time=2.726
+[gpub001:0/64] 2023-07-15 12:32:03,537 (trainer:732) INFO: 53epoch:train:6401-6500batch: iter_time=9.952e-05, forward_time=0.147, loss_ctc=64.249, loss_att=47.186, acc=0.732, loss=52.305, backward_time=1.029, grad_norm=131.597, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.920e-05, train_time=2.719
+[gpub001:0/64] 2023-07-15 12:34:25,293 (trainer:732) INFO: 53epoch:train:6501-6600batch: iter_time=6.416e-04, forward_time=0.158, loss_ctc=70.757, loss_att=53.176, acc=0.719, loss=58.450, backward_time=1.030, grad_norm=142.896, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=4.920e-05, train_time=2.835
+[gpub001:0/64] 2023-07-15 12:36:07,690 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub001:0/64] 2023-07-15 12:36:25,678 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 12:36:29,157 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd5f0f674f0>)
+[gpub001:0/64] 2023-07-15 12:36:29,157 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub001:0/64] 2023-07-15 12:36:29,164 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 12:42:08,942 (trainer:732) INFO: 53epoch:train:6601-6700batch: iter_time=1.626, forward_time=0.195, loss_ctc=66.551, loss_att=48.083, acc=0.738, loss=53.624, backward_time=1.040, grad_norm=118.665, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.919e-05, train_time=9.271
+[gpub001:0/64] 2023-07-15 12:44:26,138 (trainer:732) INFO: 53epoch:train:6701-6800batch: iter_time=1.118e-04, forward_time=0.145, loss_ctc=72.189, loss_att=55.718, acc=0.714, loss=60.660, backward_time=1.030, grad_norm=123.601, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.919e-05, train_time=2.745
+[gpub001:0/64] 2023-07-15 12:46:42,970 (trainer:732) INFO: 53epoch:train:6801-6900batch: iter_time=1.227e-04, forward_time=0.147, loss_ctc=74.928, loss_att=53.819, acc=0.718, loss=60.152, backward_time=1.032, grad_norm=125.290, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.918e-05, train_time=2.736
+[gpub001:0/64] 2023-07-15 12:48:58,893 (trainer:732) INFO: 53epoch:train:6901-7000batch: iter_time=1.136e-04, forward_time=0.146, loss_ctc=63.121, loss_att=45.095, acc=0.738, loss=50.503, backward_time=1.027, grad_norm=109.171, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.918e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 12:51:14,725 (trainer:732) INFO: 53epoch:train:7001-7100batch: iter_time=1.081e-04, forward_time=0.145, loss_ctc=77.407, loss_att=58.128, acc=0.716, loss=63.911, backward_time=1.029, grad_norm=119.319, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.917e-05, train_time=2.716
+[gpub001:0/64] 2023-07-15 12:53:30,552 (trainer:732) INFO: 53epoch:train:7101-7200batch: iter_time=1.094e-04, forward_time=0.147, loss_ctc=64.425, loss_att=47.865, acc=0.737, loss=52.833, backward_time=1.028, grad_norm=136.329, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.917e-05, train_time=2.716
+[gpub001:0/64] 2023-07-15 12:55:46,313 (trainer:732) INFO: 53epoch:train:7201-7300batch: iter_time=1.091e-04, forward_time=0.147, loss_ctc=64.912, loss_att=45.342, acc=0.743, loss=51.213, backward_time=1.029, grad_norm=110.998, clip=100.000, loss_scale=2.499e+32, optim_step_time=0.182, optim0_lr0=4.916e-05, train_time=2.715
+[gpub001:0/64] 2023-07-15 12:58:02,139 (trainer:732) INFO: 53epoch:train:7301-7400batch: iter_time=1.144e-04, forward_time=0.147, loss_ctc=70.223, loss_att=50.001, acc=0.723, loss=56.068, backward_time=1.028, grad_norm=128.640, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.916e-05, train_time=2.716
+[gpub001:0/64] 2023-07-15 13:00:18,214 (trainer:732) INFO: 53epoch:train:7401-7500batch: iter_time=1.159e-04, forward_time=0.148, loss_ctc=62.811, loss_att=47.105, acc=0.732, loss=51.817, backward_time=1.030, grad_norm=123.147, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.915e-05, train_time=2.721
+[gpub001:0/64] 2023-07-15 13:00:22,641 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub001:0/64] 2023-07-15 13:00:40,613 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 13:00:44,325 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fce46a1f790>)
+[gpub001:0/64] 2023-07-15 13:00:44,325 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub001:0/64] 2023-07-15 13:00:44,332 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 13:06:53,659 (trainer:732) INFO: 53epoch:train:7501-7600batch: iter_time=1.682, forward_time=0.153, loss_ctc=71.689, loss_att=50.993, acc=0.731, loss=57.202, backward_time=1.056, grad_norm=110.911, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.915e-05, train_time=7.909
+[gpub001:0/64] 2023-07-15 13:09:10,347 (trainer:732) INFO: 53epoch:train:7601-7700batch: iter_time=1.160e-04, forward_time=0.146, loss_ctc=75.829, loss_att=57.328, acc=0.712, loss=62.878, backward_time=1.028, grad_norm=137.383, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.914e-05, train_time=2.734
+[gpub001:0/64] 2023-07-15 13:11:26,829 (trainer:732) INFO: 53epoch:train:7701-7800batch: iter_time=1.304e-04, forward_time=0.148, loss_ctc=65.217, loss_att=45.189, acc=0.736, loss=51.198, backward_time=1.029, grad_norm=122.205, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.914e-05, train_time=2.729
+[gpub001:0/64] 2023-07-15 13:13:42,554 (trainer:732) INFO: 53epoch:train:7801-7900batch: iter_time=1.287e-04, forward_time=0.146, loss_ctc=74.146, loss_att=54.219, acc=0.730, loss=60.197, backward_time=1.028, grad_norm=119.101, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.913e-05, train_time=2.714
+[gpub001:0/64] 2023-07-15 13:15:58,492 (trainer:732) INFO: 53epoch:train:7901-8000batch: iter_time=1.298e-04, forward_time=0.147, loss_ctc=64.820, loss_att=50.432, acc=0.736, loss=54.748, backward_time=1.029, grad_norm=136.726, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.913e-05, train_time=2.719
+[gpub001:0/64] 2023-07-15 13:18:14,196 (trainer:732) INFO: 53epoch:train:8001-8100batch: iter_time=1.249e-04, forward_time=0.147, loss_ctc=65.159, loss_att=46.648, acc=0.739, loss=52.201, backward_time=1.027, grad_norm=129.032, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.912e-05, train_time=2.714
+[gpub001:0/64] 2023-07-15 13:20:30,190 (trainer:732) INFO: 53epoch:train:8101-8200batch: iter_time=1.194e-04, forward_time=0.148, loss_ctc=67.450, loss_att=49.032, acc=0.722, loss=54.557, backward_time=1.029, grad_norm=149.096, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.912e-05, train_time=2.720
+[gpub001:0/64] 2023-07-15 13:22:47,766 (trainer:732) INFO: 53epoch:train:8201-8300batch: iter_time=1.230e-04, forward_time=0.147, loss_ctc=69.780, loss_att=53.305, acc=0.725, loss=58.248, backward_time=1.029, grad_norm=135.432, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.911e-05, train_time=2.751
+[gpub001:0/64] 2023-07-15 13:23:44,787 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub001:0/64] 2023-07-15 13:24:02,966 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 13:24:06,390 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fc9501831f0>)
+[gpub001:0/64] 2023-07-15 13:24:06,390 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub001:0/64] 2023-07-15 13:24:06,396 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 13:30:26,163 (trainer:732) INFO: 53epoch:train:8301-8400batch: iter_time=2.133, forward_time=0.174, loss_ctc=68.152, loss_att=51.251, acc=0.719, loss=56.321, backward_time=1.042, grad_norm=127.178, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.911e-05, train_time=9.168
+[gpub001:0/64] 2023-07-15 13:32:43,842 (trainer:732) INFO: 53epoch:train:8401-8500batch: iter_time=1.123e-04, forward_time=0.147, loss_ctc=75.378, loss_att=53.951, acc=0.719, loss=60.379, backward_time=1.030, grad_norm=120.972, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.910e-05, train_time=2.753
+[gpub001:0/64] 2023-07-15 13:35:00,654 (trainer:732) INFO: 53epoch:train:8501-8600batch: iter_time=1.160e-04, forward_time=0.147, loss_ctc=63.746, loss_att=45.123, acc=0.736, loss=50.710, backward_time=1.027, grad_norm=118.053, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.910e-05, train_time=2.736
+[gpub001:0/64] 2023-07-15 13:37:16,881 (trainer:732) INFO: 53epoch:train:8601-8700batch: iter_time=1.145e-04, forward_time=0.145, loss_ctc=72.822, loss_att=55.363, acc=0.722, loss=60.601, backward_time=1.029, grad_norm=139.587, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.910e-05, train_time=2.724
+[gpub001:0/64] 2023-07-15 13:39:32,720 (trainer:732) INFO: 53epoch:train:8701-8800batch: iter_time=1.193e-04, forward_time=0.146, loss_ctc=62.997, loss_att=46.111, acc=0.740, loss=51.177, backward_time=1.028, grad_norm=115.444, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.909e-05, train_time=2.717
+[gpub001:0/64] 2023-07-15 13:41:48,463 (trainer:732) INFO: 53epoch:train:8801-8900batch: iter_time=1.169e-04, forward_time=0.147, loss_ctc=66.723, loss_att=46.736, acc=0.734, loss=52.732, backward_time=1.028, grad_norm=102.729, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.909e-05, train_time=2.715
+[gpub001:0/64] 2023-07-15 13:44:04,390 (trainer:732) INFO: 53epoch:train:8901-9000batch: iter_time=1.211e-04, forward_time=0.147, loss_ctc=65.966, loss_att=49.188, acc=0.724, loss=54.222, backward_time=1.029, grad_norm=110.737, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.908e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 13:46:20,504 (trainer:732) INFO: 53epoch:train:9001-9100batch: iter_time=1.170e-04, forward_time=0.147, loss_ctc=67.534, loss_att=51.762, acc=0.723, loss=56.493, backward_time=1.030, grad_norm=127.205, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.908e-05, train_time=2.722
+[gpub001:0/64] 2023-07-15 13:47:59,767 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub001:0/64] 2023-07-15 13:48:18,017 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 13:48:21,456 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fc90f27e440>)
+[gpub001:0/64] 2023-07-15 13:48:21,456 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub001:0/64] 2023-07-15 13:48:21,462 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 13:52:48,491 (trainer:732) INFO: 53epoch:train:9101-9200batch: iter_time=1.496, forward_time=0.181, loss_ctc=70.548, loss_att=52.593, acc=0.730, loss=57.980, backward_time=1.037, grad_norm=137.642, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.907e-05, train_time=7.758
+[gpub001:0/64] 2023-07-15 13:55:08,582 (trainer:732) INFO: 53epoch:train:9201-9300batch: iter_time=1.120e-04, forward_time=0.145, loss_ctc=71.911, loss_att=54.358, acc=0.714, loss=59.624, backward_time=1.034, grad_norm=117.736, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.907e-05, train_time=2.803
+[gpub001:0/64] 2023-07-15 13:57:24,812 (trainer:732) INFO: 53epoch:train:9301-9400batch: iter_time=1.046e-04, forward_time=0.144, loss_ctc=75.774, loss_att=53.635, acc=0.711, loss=60.277, backward_time=1.026, grad_norm=116.262, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.906e-05, train_time=2.724
+[gpub001:0/64] 2023-07-15 13:59:47,479 (trainer:732) INFO: 53epoch:train:9401-9500batch: iter_time=1.084e-04, forward_time=0.145, loss_ctc=63.534, loss_att=44.227, acc=0.735, loss=50.019, backward_time=1.037, grad_norm=112.332, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.906e-05, train_time=2.853
+[gpub001:0/64] 2023-07-15 14:02:07,901 (trainer:732) INFO: 53epoch:train:9501-9600batch: iter_time=1.118e-04, forward_time=0.146, loss_ctc=75.989, loss_att=57.625, acc=0.716, loss=63.134, backward_time=1.035, grad_norm=128.818, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.905e-05, train_time=2.808
+[gpub001:0/64] 2023-07-15 14:04:30,560 (trainer:732) INFO: 53epoch:train:9601-9700batch: iter_time=1.133e-04, forward_time=0.145, loss_ctc=64.849, loss_att=47.746, acc=0.733, loss=52.877, backward_time=1.032, grad_norm=145.957, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.905e-05, train_time=2.853
+[gpub001:0/64] 2023-07-15 14:06:49,135 (trainer:732) INFO: 53epoch:train:9701-9800batch: iter_time=1.010e-04, forward_time=0.145, loss_ctc=64.302, loss_att=46.018, acc=0.737, loss=51.503, backward_time=1.028, grad_norm=115.494, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.904e-05, train_time=2.771
+[gpub001:0/64] 2023-07-15 14:09:06,265 (trainer:732) INFO: 53epoch:train:9801-9900batch: iter_time=1.128e-04, forward_time=0.146, loss_ctc=70.320, loss_att=50.257, acc=0.719, loss=56.276, backward_time=1.032, grad_norm=123.049, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.904e-05, train_time=2.742
+[gpub001:0/64] 2023-07-15 14:11:24,322 (trainer:732) INFO: 53epoch:train:9901-10000batch: iter_time=1.171e-04, forward_time=0.145, loss_ctc=62.892, loss_att=47.066, acc=0.722, loss=51.814, backward_time=1.028, grad_norm=133.856, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.903e-05, train_time=2.761
+[gpub001:0/64] 2023-07-15 14:25:24,779 (trainer:338) INFO: 53epoch results: [train] iter_time=0.246, forward_time=0.155, loss_ctc=69.208, loss_att=50.743, acc=0.723, loss=56.282, backward_time=1.033, grad_norm=130.642, clip=100.000, loss_scale=2.290e+32, optim_step_time=0.182, optim0_lr0=4.927e-05, train_time=3.453, time=4 hours, 48 minutes and 11.54 seconds, total_count=500000, gpu_max_cached_mem_GB=37.635, [valid] loss_ctc=43.074, cer_ctc=0.248, loss_att=38.316, acc=0.682, cer=0.391, wer=0.994, loss=39.744, time=7 minutes and 22.59 seconds, total_count=51106, gpu_max_cached_mem_GB=37.635, [att_plot] time=6 minutes and 14.16 seconds, total_count=0, gpu_max_cached_mem_GB=37.635
+[gpub001:0/64] 2023-07-15 14:25:40,654 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub001:0/64] 2023-07-15 14:25:40,674 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/48epoch.pth
+[gpub001:0/64] 2023-07-15 14:25:40,674 (trainer:272) INFO: 54/60epoch started. Estimated time to finish: 1 day, 10 hours and 52 minutes
+[gpub001:0/64] 2023-07-15 14:25:40,788 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub001:0/64] 2023-07-15 14:25:59,055 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 14:26:03,030 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd5da479330>)
+[gpub001:0/64] 2023-07-15 14:26:03,030 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub001:0/64] 2023-07-15 14:26:03,051 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 14:32:22,669 (trainer:732) INFO: 54epoch:train:1-100batch: iter_time=2.563, forward_time=0.175, loss_ctc=63.502, loss_att=48.578, acc=0.708, loss=53.055, backward_time=1.049, grad_norm=140.341, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.187, optim0_lr0=4.903e-05, train_time=8.037
+[gpub001:0/64] 2023-07-15 14:34:38,802 (trainer:732) INFO: 54epoch:train:101-200batch: iter_time=1.105e-04, forward_time=0.145, loss_ctc=62.392, loss_att=47.095, acc=0.700, loss=51.684, backward_time=1.029, grad_norm=120.836, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.902e-05, train_time=2.723
+[gpub001:0/64] 2023-07-15 14:36:54,626 (trainer:732) INFO: 54epoch:train:201-300batch: iter_time=9.869e-05, forward_time=0.145, loss_ctc=72.046, loss_att=53.141, acc=0.703, loss=58.812, backward_time=1.028, grad_norm=147.156, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.902e-05, train_time=2.716
+[gpub001:0/64] 2023-07-15 14:39:10,400 (trainer:732) INFO: 54epoch:train:301-400batch: iter_time=1.070e-04, forward_time=0.144, loss_ctc=68.492, loss_att=54.163, acc=0.695, loss=58.462, backward_time=1.028, grad_norm=132.331, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.902e-05, train_time=2.715
+[gpub001:0/64] 2023-07-15 14:41:40,557 (trainer:732) INFO: 54epoch:train:401-500batch: iter_time=1.053e-04, forward_time=0.144, loss_ctc=74.081, loss_att=54.631, acc=0.712, loss=60.466, backward_time=1.039, grad_norm=142.817, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.901e-05, train_time=3.003
+[gpub001:0/64] 2023-07-15 14:44:08,475 (trainer:732) INFO: 54epoch:train:501-600batch: iter_time=1.074e-04, forward_time=0.146, loss_ctc=75.789, loss_att=56.471, acc=0.714, loss=62.266, backward_time=1.041, grad_norm=129.097, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.901e-05, train_time=2.958
+[gpub001:0/64] 2023-07-15 14:46:25,669 (trainer:732) INFO: 54epoch:train:601-700batch: iter_time=1.051e-04, forward_time=0.145, loss_ctc=80.391, loss_att=63.078, acc=0.698, loss=68.272, backward_time=1.030, grad_norm=144.568, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.900e-05, train_time=2.744
+[gpub001:0/64] 2023-07-15 14:48:45,282 (trainer:732) INFO: 54epoch:train:701-800batch: iter_time=9.712e-05, forward_time=0.144, loss_ctc=76.898, loss_att=54.873, acc=0.711, loss=61.480, backward_time=1.034, grad_norm=129.027, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.900e-05, train_time=2.792
+[gpub001:0/64] 2023-07-15 14:49:40,812 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub001:0/64] 2023-07-15 14:49:58,541 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 14:50:02,149 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd16cb35b10>)
+[gpub001:0/64] 2023-07-15 14:50:02,149 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub001:0/64] 2023-07-15 14:50:02,155 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 14:55:15,387 (trainer:732) INFO: 54epoch:train:801-900batch: iter_time=1.314, forward_time=0.146, loss_ctc=67.481, loss_att=53.521, acc=0.714, loss=57.709, backward_time=1.050, grad_norm=123.737, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.899e-05, train_time=7.802
+[gpub001:0/64] 2023-07-15 14:57:33,121 (trainer:732) INFO: 54epoch:train:901-1000batch: iter_time=1.229e-04, forward_time=0.147, loss_ctc=61.392, loss_att=45.351, acc=0.703, loss=50.163, backward_time=1.028, grad_norm=131.178, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.899e-05, train_time=2.754
+[gpub001:0/64] 2023-07-15 14:59:32,594 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub001:0/64] 2023-07-15 14:59:48,916 (trainer:732) INFO: 54epoch:train:1001-1100batch: iter_time=1.176e-04, forward_time=0.148, loss_ctc=65.249, loss_att=46.994, acc=0.724, loss=52.470, backward_time=1.028, grad_norm=120.404, clip=100.000, loss_scale=3.047e+32, optim_step_time=0.182, optim0_lr0=4.898e-05, train_time=2.716
+[gpub001:0/64] 2023-07-15 15:02:05,028 (trainer:732) INFO: 54epoch:train:1101-1200batch: iter_time=1.215e-04, forward_time=0.147, loss_ctc=68.386, loss_att=53.369, acc=0.710, loss=57.874, backward_time=1.030, grad_norm=121.179, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.898e-05, train_time=2.722
+[gpub001:0/64] 2023-07-15 15:04:21,637 (trainer:732) INFO: 54epoch:train:1201-1300batch: iter_time=1.120e-04, forward_time=0.148, loss_ctc=72.716, loss_att=55.502, acc=0.716, loss=60.666, backward_time=1.033, grad_norm=135.679, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.897e-05, train_time=2.732
+[gpub001:0/64] 2023-07-15 15:06:37,386 (trainer:732) INFO: 54epoch:train:1301-1400batch: iter_time=1.122e-04, forward_time=0.147, loss_ctc=70.008, loss_att=51.346, acc=0.719, loss=56.945, backward_time=1.029, grad_norm=128.324, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.897e-05, train_time=2.715
+[gpub001:0/64] 2023-07-15 15:08:54,748 (trainer:732) INFO: 54epoch:train:1401-1500batch: iter_time=1.246e-04, forward_time=0.149, loss_ctc=83.968, loss_att=69.519, acc=0.699, loss=73.854, backward_time=1.032, grad_norm=138.800, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.896e-05, train_time=2.747
+[gpub001:0/64] 2023-07-15 15:11:10,946 (trainer:732) INFO: 54epoch:train:1501-1600batch: iter_time=1.227e-04, forward_time=0.148, loss_ctc=72.417, loss_att=52.717, acc=0.726, loss=58.627, backward_time=1.031, grad_norm=125.644, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.896e-05, train_time=2.724
+[gpub001:0/64] 2023-07-15 15:12:42,398 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub001:0/64] 2023-07-15 15:13:00,574 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 15:13:04,259 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fca75d1d750>)
+[gpub001:0/64] 2023-07-15 15:13:04,259 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub001:0/64] 2023-07-15 15:13:04,265 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 15:17:27,833 (trainer:732) INFO: 54epoch:train:1601-1700batch: iter_time=1.373, forward_time=0.166, loss_ctc=71.162, loss_att=54.576, acc=0.714, loss=59.552, backward_time=1.038, grad_norm=130.731, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.186, optim0_lr0=4.895e-05, train_time=7.537
+[gpub001:0/64] 2023-07-15 15:19:44,876 (trainer:732) INFO: 54epoch:train:1701-1800batch: iter_time=1.002e-04, forward_time=0.146, loss_ctc=57.674, loss_att=42.891, acc=0.710, loss=47.326, backward_time=1.033, grad_norm=146.138, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.895e-05, train_time=2.741
+[gpub001:0/64] 2023-07-15 15:22:00,770 (trainer:732) INFO: 54epoch:train:1801-1900batch: iter_time=1.005e-04, forward_time=0.145, loss_ctc=69.982, loss_att=52.765, acc=0.709, loss=57.930, backward_time=1.029, grad_norm=110.570, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.894e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 15:24:16,473 (trainer:732) INFO: 54epoch:train:1901-2000batch: iter_time=1.014e-04, forward_time=0.145, loss_ctc=68.795, loss_att=50.053, acc=0.721, loss=55.676, backward_time=1.027, grad_norm=128.134, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.894e-05, train_time=2.714
+[gpub001:0/64] 2023-07-15 15:26:32,120 (trainer:732) INFO: 54epoch:train:2001-2100batch: iter_time=1.016e-04, forward_time=0.144, loss_ctc=71.032, loss_att=54.634, acc=0.702, loss=59.553, backward_time=1.026, grad_norm=146.644, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.894e-05, train_time=2.713
+[gpub001:0/64] 2023-07-15 15:28:47,771 (trainer:732) INFO: 54epoch:train:2101-2200batch: iter_time=1.116e-04, forward_time=0.144, loss_ctc=69.011, loss_att=51.902, acc=0.714, loss=57.035, backward_time=1.027, grad_norm=146.707, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.893e-05, train_time=2.713
+[gpub001:0/64] 2023-07-15 15:31:09,057 (trainer:732) INFO: 54epoch:train:2201-2300batch: iter_time=1.051e-04, forward_time=0.145, loss_ctc=77.368, loss_att=56.839, acc=0.720, loss=62.997, backward_time=1.037, grad_norm=146.981, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.893e-05, train_time=2.826
+[gpub001:0/64] 2023-07-15 15:33:27,573 (trainer:732) INFO: 54epoch:train:2301-2400batch: iter_time=1.123e-04, forward_time=0.145, loss_ctc=72.933, loss_att=60.662, acc=0.700, loss=64.343, backward_time=1.031, grad_norm=140.913, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.892e-05, train_time=2.770
+[gpub001:0/64] 2023-07-15 15:36:01,415 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub001:0/64] 2023-07-15 15:36:19,553 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 15:36:23,193 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fca75d4b460>)
+[gpub001:0/64] 2023-07-15 15:36:23,194 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub001:0/64] 2023-07-15 15:36:23,200 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 15:39:55,199 (trainer:732) INFO: 54epoch:train:2401-2500batch: iter_time=2.445, forward_time=0.145, loss_ctc=74.369, loss_att=54.412, acc=0.716, loss=60.399, backward_time=1.038, grad_norm=132.718, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.892e-05, train_time=7.752
+[gpub001:0/64] 2023-07-15 15:42:12,710 (trainer:732) INFO: 54epoch:train:2501-2600batch: iter_time=1.524e-04, forward_time=0.147, loss_ctc=59.869, loss_att=46.039, acc=0.696, loss=50.188, backward_time=1.033, grad_norm=131.162, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.891e-05, train_time=2.750
+[gpub001:0/64] 2023-07-15 15:44:29,535 (trainer:732) INFO: 54epoch:train:2601-2700batch: iter_time=1.293e-04, forward_time=0.147, loss_ctc=65.703, loss_att=49.009, acc=0.712, loss=54.017, backward_time=1.025, grad_norm=128.350, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.891e-05, train_time=2.736
+[gpub001:0/64] 2023-07-15 15:46:45,197 (trainer:732) INFO: 54epoch:train:2701-2800batch: iter_time=1.495e-04, forward_time=0.147, loss_ctc=68.702, loss_att=53.287, acc=0.707, loss=57.911, backward_time=1.027, grad_norm=154.384, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.890e-05, train_time=2.713
+[gpub001:0/64] 2023-07-15 15:49:01,013 (trainer:732) INFO: 54epoch:train:2801-2900batch: iter_time=1.303e-04, forward_time=0.147, loss_ctc=73.798, loss_att=55.447, acc=0.706, loss=60.952, backward_time=1.029, grad_norm=127.920, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.890e-05, train_time=2.716
+[gpub001:0/64] 2023-07-15 15:49:11,705 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub001:0/64] 2023-07-15 15:51:16,520 (trainer:732) INFO: 54epoch:train:2901-3000batch: iter_time=1.349e-04, forward_time=0.146, loss_ctc=67.917, loss_att=49.080, acc=0.724, loss=54.731, backward_time=1.029, grad_norm=136.985, clip=100.000, loss_scale=8.610e+31, optim_step_time=0.182, optim0_lr0=4.889e-05, train_time=2.710
+[gpub001:0/64] 2023-07-15 15:53:32,664 (trainer:732) INFO: 54epoch:train:3001-3100batch: iter_time=1.471e-04, forward_time=0.148, loss_ctc=81.457, loss_att=61.670, acc=0.702, loss=67.606, backward_time=1.030, grad_norm=170.754, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.889e-05, train_time=2.723
+[gpub001:0/64] 2023-07-15 15:55:52,060 (trainer:732) INFO: 54epoch:train:3101-3200batch: iter_time=1.276e-04, forward_time=0.147, loss_ctc=74.030, loss_att=59.359, acc=0.705, loss=63.760, backward_time=1.030, grad_norm=138.690, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.888e-05, train_time=2.788
+[gpub001:0/64] 2023-07-15 15:58:14,921 (trainer:732) INFO: 54epoch:train:3201-3300batch: iter_time=1.442e-04, forward_time=0.146, loss_ctc=71.516, loss_att=52.026, acc=0.713, loss=57.873, backward_time=1.035, grad_norm=134.585, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.888e-05, train_time=2.857
+[gpub001:0/64] 2023-07-15 15:59:06,473 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub001:0/64] 2023-07-15 15:59:24,546 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 15:59:28,036 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd5b09eb490>)
+[gpub001:0/64] 2023-07-15 15:59:28,036 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub001:0/64] 2023-07-15 15:59:28,042 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 16:04:12,014 (trainer:732) INFO: 54epoch:train:3301-3400batch: iter_time=1.395, forward_time=0.180, loss_ctc=67.158, loss_att=49.393, acc=0.713, loss=54.722, backward_time=1.042, grad_norm=131.945, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.887e-05, train_time=7.141
+[gpub001:0/64] 2023-07-15 16:06:28,728 (trainer:732) INFO: 54epoch:train:3401-3500batch: iter_time=9.250e-05, forward_time=0.146, loss_ctc=65.344, loss_att=46.346, acc=0.721, loss=52.046, backward_time=1.028, grad_norm=126.382, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.887e-05, train_time=2.735
+[gpub001:0/64] 2023-07-15 16:08:45,609 (trainer:732) INFO: 54epoch:train:3501-3600batch: iter_time=9.346e-05, forward_time=0.147, loss_ctc=69.324, loss_att=54.156, acc=0.716, loss=58.706, backward_time=1.030, grad_norm=127.329, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.887e-05, train_time=2.737
+[gpub001:0/64] 2023-07-15 16:11:02,343 (trainer:732) INFO: 54epoch:train:3601-3700batch: iter_time=9.335e-05, forward_time=0.146, loss_ctc=68.249, loss_att=52.531, acc=0.713, loss=57.246, backward_time=1.034, grad_norm=131.595, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.886e-05, train_time=2.734
+[gpub001:0/64] 2023-07-15 16:13:18,306 (trainer:732) INFO: 54epoch:train:3701-3800batch: iter_time=9.645e-05, forward_time=0.146, loss_ctc=67.231, loss_att=51.222, acc=0.716, loss=56.025, backward_time=1.030, grad_norm=145.759, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.886e-05, train_time=2.719
+[gpub001:0/64] 2023-07-15 16:15:35,928 (trainer:732) INFO: 54epoch:train:3801-3900batch: iter_time=9.782e-05, forward_time=0.146, loss_ctc=76.405, loss_att=57.298, acc=0.718, loss=63.030, backward_time=1.030, grad_norm=118.733, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.885e-05, train_time=2.752
+[gpub001:0/64] 2023-07-15 16:17:53,102 (trainer:732) INFO: 54epoch:train:3901-4000batch: iter_time=9.738e-05, forward_time=0.146, loss_ctc=77.861, loss_att=59.602, acc=0.715, loss=65.080, backward_time=1.032, grad_norm=143.778, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.885e-05, train_time=2.743
+[gpub001:0/64] 2023-07-15 16:20:12,772 (trainer:732) INFO: 54epoch:train:4001-4100batch: iter_time=9.171e-05, forward_time=0.146, loss_ctc=71.078, loss_att=52.212, acc=0.724, loss=57.872, backward_time=1.031, grad_norm=122.391, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.884e-05, train_time=2.793
+[gpub001:0/64] 2023-07-15 16:21:56,724 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub001:0/64] 2023-07-15 16:22:14,655 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 16:22:18,028 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fc90bbb5870>)
+[gpub001:0/64] 2023-07-15 16:22:18,028 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub001:0/64] 2023-07-15 16:22:18,035 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 16:29:57,478 (trainer:732) INFO: 54epoch:train:4101-4200batch: iter_time=4.421, forward_time=0.186, loss_ctc=72.658, loss_att=54.839, acc=0.713, loss=60.185, backward_time=1.041, grad_norm=113.223, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.185, optim0_lr0=4.884e-05, train_time=11.694
+[gpub001:0/64] 2023-07-15 16:32:14,169 (trainer:732) INFO: 54epoch:train:4201-4300batch: iter_time=1.315e-04, forward_time=0.149, loss_ctc=62.009, loss_att=45.248, acc=0.711, loss=50.276, backward_time=1.028, grad_norm=132.170, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.883e-05, train_time=2.734
+[gpub001:0/64] 2023-07-15 16:34:31,503 (trainer:732) INFO: 54epoch:train:4301-4400batch: iter_time=1.108e-04, forward_time=0.146, loss_ctc=68.740, loss_att=51.311, acc=0.716, loss=56.540, backward_time=1.028, grad_norm=148.146, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=4.883e-05, train_time=2.746
+[gpub001:0/64] 2023-07-15 16:36:47,096 (trainer:732) INFO: 54epoch:train:4401-4500batch: iter_time=1.169e-04, forward_time=0.145, loss_ctc=64.509, loss_att=49.340, acc=0.715, loss=53.891, backward_time=1.026, grad_norm=142.094, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.882e-05, train_time=2.712
+[gpub001:0/64] 2023-07-15 16:39:14,472 (trainer:732) INFO: 54epoch:train:4501-4600batch: iter_time=5.487e-04, forward_time=0.188, loss_ctc=71.996, loss_att=52.884, acc=0.721, loss=58.617, backward_time=1.057, grad_norm=114.514, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.191, optim0_lr0=4.882e-05, train_time=2.945
+[gpub001:0/64] 2023-07-15 16:41:40,016 (trainer:732) INFO: 54epoch:train:4601-4700batch: iter_time=1.099e-04, forward_time=0.216, loss_ctc=69.267, loss_att=53.635, acc=0.719, loss=58.325, backward_time=1.040, grad_norm=162.003, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.186, optim0_lr0=4.881e-05, train_time=2.913
+[gpub001:0/64] 2023-07-15 16:43:56,617 (trainer:732) INFO: 54epoch:train:4701-4800batch: iter_time=1.111e-04, forward_time=0.147, loss_ctc=79.072, loss_att=61.795, acc=0.708, loss=66.978, backward_time=1.032, grad_norm=130.009, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.881e-05, train_time=2.732
+[gpub001:0/64] 2023-07-15 16:46:13,780 (trainer:732) INFO: 54epoch:train:4801-4900batch: iter_time=1.238e-04, forward_time=0.146, loss_ctc=75.323, loss_att=51.827, acc=0.719, loss=58.876, backward_time=1.030, grad_norm=137.087, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.880e-05, train_time=2.743
+[gpub001:0/64] 2023-07-15 16:48:31,776 (trainer:732) INFO: 54epoch:train:4901-5000batch: iter_time=1.177e-04, forward_time=0.146, loss_ctc=74.927, loss_att=57.150, acc=0.708, loss=62.483, backward_time=1.033, grad_norm=132.363, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.880e-05, train_time=2.760
+[gpub001:0/64] 2023-07-15 16:48:53,054 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub001:0/64] 2023-07-15 16:49:11,204 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 16:49:14,657 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fc90bbf35b0>)
+[gpub001:0/64] 2023-07-15 16:49:14,657 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub001:0/64] 2023-07-15 16:49:14,723 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 16:55:54,724 (trainer:732) INFO: 54epoch:train:5001-5100batch: iter_time=2.962, forward_time=0.147, loss_ctc=58.843, loss_att=46.145, acc=0.708, loss=49.955, backward_time=1.045, grad_norm=115.373, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.880e-05, train_time=8.859
+[gpub001:0/64] 2023-07-15 16:58:11,635 (trainer:732) INFO: 54epoch:train:5101-5200batch: iter_time=1.173e-04, forward_time=0.145, loss_ctc=66.317, loss_att=46.348, acc=0.724, loss=52.339, backward_time=1.031, grad_norm=131.819, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.879e-05, train_time=2.738
+[gpub001:0/64] 2023-07-15 17:00:27,591 (trainer:732) INFO: 54epoch:train:5201-5300batch: iter_time=1.180e-04, forward_time=0.146, loss_ctc=67.278, loss_att=51.549, acc=0.714, loss=56.268, backward_time=1.029, grad_norm=135.720, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.879e-05, train_time=2.719
+[gpub001:0/64] 2023-07-15 17:02:43,684 (trainer:732) INFO: 54epoch:train:5301-5400batch: iter_time=1.163e-04, forward_time=0.147, loss_ctc=73.708, loss_att=57.069, acc=0.716, loss=62.061, backward_time=1.030, grad_norm=153.196, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.878e-05, train_time=2.722
+[gpub001:0/64] 2023-07-15 17:05:10,383 (trainer:732) INFO: 54epoch:train:5401-5500batch: iter_time=1.142e-04, forward_time=0.146, loss_ctc=68.066, loss_att=48.978, acc=0.724, loss=54.704, backward_time=1.038, grad_norm=142.518, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.878e-05, train_time=2.934
+[gpub001:0/64] 2023-07-15 17:07:31,542 (trainer:732) INFO: 54epoch:train:5501-5600batch: iter_time=1.149e-04, forward_time=0.148, loss_ctc=79.121, loss_att=60.754, acc=0.714, loss=66.264, backward_time=1.037, grad_norm=156.564, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.877e-05, train_time=2.823
+[gpub001:0/64] 2023-07-15 17:09:48,129 (trainer:732) INFO: 54epoch:train:5601-5700batch: iter_time=1.147e-04, forward_time=0.148, loss_ctc=71.339, loss_att=55.929, acc=0.723, loss=60.552, backward_time=1.033, grad_norm=154.635, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.877e-05, train_time=2.732
+[gpub001:0/64] 2023-07-15 17:12:04,010 (trainer:732) INFO: 54epoch:train:5701-5800batch: iter_time=1.086e-04, forward_time=0.147, loss_ctc=72.126, loss_att=54.273, acc=0.721, loss=59.629, backward_time=1.028, grad_norm=132.905, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.876e-05, train_time=2.717
+[gpub001:0/64] 2023-07-15 17:12:54,371 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub001:0/64] 2023-07-15 17:13:12,535 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 17:13:15,973 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fc8f31f3670>)
+[gpub001:0/64] 2023-07-15 17:13:15,973 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub001:0/64] 2023-07-15 17:13:15,979 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 17:18:33,665 (trainer:732) INFO: 54epoch:train:5801-5900batch: iter_time=1.397, forward_time=0.233, loss_ctc=70.879, loss_att=52.656, acc=0.723, loss=58.123, backward_time=1.073, grad_norm=140.446, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.184, optim0_lr0=4.876e-05, train_time=7.793
+[gpub001:0/64] 2023-07-15 17:21:02,892 (trainer:732) INFO: 54epoch:train:5901-6000batch: iter_time=1.398e-04, forward_time=0.165, loss_ctc=60.858, loss_att=44.904, acc=0.700, loss=49.690, backward_time=1.040, grad_norm=132.788, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.185, optim0_lr0=4.875e-05, train_time=2.984
+[gpub001:0/64] 2023-07-15 17:23:19,553 (trainer:732) INFO: 54epoch:train:6001-6100batch: iter_time=1.350e-04, forward_time=0.145, loss_ctc=64.750, loss_att=48.839, acc=0.720, loss=53.613, backward_time=1.028, grad_norm=136.365, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.875e-05, train_time=2.733
+[gpub001:0/64] 2023-07-15 17:25:35,434 (trainer:732) INFO: 54epoch:train:6101-6200batch: iter_time=1.440e-04, forward_time=0.147, loss_ctc=66.947, loss_att=50.736, acc=0.710, loss=55.599, backward_time=1.026, grad_norm=127.831, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=4.874e-05, train_time=2.717
+[gpub001:0/64] 2023-07-15 17:27:59,502 (trainer:732) INFO: 54epoch:train:6201-6300batch: iter_time=1.252e-04, forward_time=0.147, loss_ctc=71.889, loss_att=54.562, acc=0.718, loss=59.761, backward_time=1.036, grad_norm=116.556, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.874e-05, train_time=2.881
+[gpub001:0/64] 2023-07-15 17:30:18,402 (trainer:732) INFO: 54epoch:train:6301-6400batch: iter_time=1.250e-04, forward_time=0.147, loss_ctc=66.848, loss_att=48.903, acc=0.725, loss=54.286, backward_time=1.032, grad_norm=118.817, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.874e-05, train_time=2.778
+[gpub001:0/64] 2023-07-15 17:32:34,639 (trainer:732) INFO: 54epoch:train:6401-6500batch: iter_time=1.384e-04, forward_time=0.146, loss_ctc=79.035, loss_att=65.024, acc=0.701, loss=69.227, backward_time=1.030, grad_norm=146.629, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.873e-05, train_time=2.724
+[gpub001:0/64] 2023-07-15 17:34:53,111 (trainer:732) INFO: 54epoch:train:6501-6600batch: iter_time=1.287e-04, forward_time=0.145, loss_ctc=73.072, loss_att=54.719, acc=0.716, loss=60.225, backward_time=1.030, grad_norm=141.338, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.873e-05, train_time=2.769
+[gpub001:0/64] 2023-07-15 17:36:40,428 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub001:0/64] 2023-07-15 17:36:58,440 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 17:37:01,910 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fc8f31f77c0>)
+[gpub001:0/64] 2023-07-15 17:37:01,911 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub001:0/64] 2023-07-15 17:37:01,917 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 17:41:31,451 (trainer:732) INFO: 54epoch:train:6601-6700batch: iter_time=1.482, forward_time=0.207, loss_ctc=70.480, loss_att=52.884, acc=0.716, loss=58.163, backward_time=1.044, grad_norm=119.526, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.186, optim0_lr0=4.872e-05, train_time=7.967
+[gpub001:0/64] 2023-07-15 17:43:48,493 (trainer:732) INFO: 54epoch:train:6701-6800batch: iter_time=1.090e-04, forward_time=0.148, loss_ctc=56.961, loss_att=42.537, acc=0.721, loss=46.864, backward_time=1.031, grad_norm=127.255, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.872e-05, train_time=2.741
+[gpub001:0/64] 2023-07-15 17:46:04,798 (trainer:732) INFO: 54epoch:train:6801-6900batch: iter_time=1.087e-04, forward_time=0.147, loss_ctc=66.029, loss_att=48.816, acc=0.717, loss=53.980, backward_time=1.029, grad_norm=110.226, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.871e-05, train_time=2.726
+[gpub001:0/64] 2023-07-15 17:48:20,547 (trainer:732) INFO: 54epoch:train:6901-7000batch: iter_time=1.089e-04, forward_time=0.146, loss_ctc=68.985, loss_att=52.724, acc=0.718, loss=57.602, backward_time=1.026, grad_norm=155.820, clip=100.000, loss_scale=1.558e+32, optim_step_time=0.182, optim0_lr0=4.871e-05, train_time=2.715
+[gpub001:0/64] 2023-07-15 17:50:36,889 (trainer:732) INFO: 54epoch:train:7001-7100batch: iter_time=1.080e-04, forward_time=0.147, loss_ctc=69.839, loss_att=54.243, acc=0.714, loss=58.922, backward_time=1.029, grad_norm=144.642, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.870e-05, train_time=2.727
+[gpub001:0/64] 2023-07-15 17:52:52,346 (trainer:732) INFO: 54epoch:train:7101-7200batch: iter_time=1.146e-04, forward_time=0.145, loss_ctc=67.013, loss_att=51.423, acc=0.715, loss=56.100, backward_time=1.025, grad_norm=131.762, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.870e-05, train_time=2.709
+[gpub001:0/64] 2023-07-15 17:55:09,258 (trainer:732) INFO: 54epoch:train:7201-7300batch: iter_time=1.155e-04, forward_time=0.148, loss_ctc=73.813, loss_att=55.438, acc=0.729, loss=60.950, backward_time=1.031, grad_norm=134.728, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.869e-05, train_time=2.738
+[gpub001:0/64] 2023-07-15 17:57:25,341 (trainer:732) INFO: 54epoch:train:7301-7400batch: iter_time=1.162e-04, forward_time=0.147, loss_ctc=77.874, loss_att=60.697, acc=0.708, loss=65.850, backward_time=1.030, grad_norm=138.884, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.869e-05, train_time=2.721
+[gpub001:0/64] 2023-07-15 17:59:41,257 (trainer:732) INFO: 54epoch:train:7401-7500batch: iter_time=1.076e-04, forward_time=0.147, loss_ctc=72.240, loss_att=53.009, acc=0.727, loss=58.778, backward_time=1.029, grad_norm=151.515, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.868e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 17:59:45,954 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub001:0/64] 2023-07-15 18:00:04,194 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 18:00:07,635 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbec0fceb0>)
+[gpub001:0/64] 2023-07-15 18:00:07,635 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub001:0/64] 2023-07-15 18:00:07,736 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 18:05:23,755 (trainer:732) INFO: 54epoch:train:7501-7600batch: iter_time=1.578, forward_time=0.148, loss_ctc=63.070, loss_att=46.075, acc=0.730, loss=51.173, backward_time=1.046, grad_norm=104.077, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.868e-05, train_time=6.850
+[gpub001:0/64] 2023-07-15 18:07:39,960 (trainer:732) INFO: 54epoch:train:7601-7700batch: iter_time=1.174e-04, forward_time=0.147, loss_ctc=61.589, loss_att=43.882, acc=0.723, loss=49.194, backward_time=1.027, grad_norm=131.469, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.868e-05, train_time=2.724
+[gpub001:0/64] 2023-07-15 18:09:56,626 (trainer:732) INFO: 54epoch:train:7701-7800batch: iter_time=1.127e-04, forward_time=0.149, loss_ctc=67.589, loss_att=49.630, acc=0.721, loss=55.018, backward_time=1.029, grad_norm=119.043, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.867e-05, train_time=2.733
+[gpub001:0/64] 2023-07-15 18:12:12,863 (trainer:732) INFO: 54epoch:train:7801-7900batch: iter_time=1.177e-04, forward_time=0.147, loss_ctc=68.700, loss_att=54.220, acc=0.708, loss=58.564, backward_time=1.028, grad_norm=137.783, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.867e-05, train_time=2.725
+[gpub001:0/64] 2023-07-15 18:14:41,639 (trainer:732) INFO: 54epoch:train:7901-8000batch: iter_time=5.809e-04, forward_time=0.239, loss_ctc=69.066, loss_att=52.174, acc=0.726, loss=57.241, backward_time=1.045, grad_norm=143.195, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.187, optim0_lr0=4.866e-05, train_time=2.975
+[gpub001:0/64] 2023-07-15 18:17:01,347 (trainer:732) INFO: 54epoch:train:8001-8100batch: iter_time=1.186e-04, forward_time=0.170, loss_ctc=74.493, loss_att=53.218, acc=0.731, loss=59.601, backward_time=1.032, grad_norm=137.223, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.866e-05, train_time=2.794
+[gpub001:0/64] 2023-07-15 18:19:39,609 (trainer:732) INFO: 54epoch:train:8101-8200batch: iter_time=1.190e-04, forward_time=0.154, loss_ctc=76.741, loss_att=61.285, acc=0.712, loss=65.922, backward_time=1.056, grad_norm=126.126, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.865e-05, train_time=3.165
+[gpub001:0/64] 2023-07-15 18:21:56,191 (trainer:732) INFO: 54epoch:train:8201-8300batch: iter_time=1.161e-04, forward_time=0.148, loss_ctc=74.008, loss_att=52.223, acc=0.727, loss=58.758, backward_time=1.032, grad_norm=115.839, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.865e-05, train_time=2.731
+[gpub001:0/64] 2023-07-15 18:23:02,635 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub001:0/64] 2023-07-15 18:23:20,841 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 18:23:24,297 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fc99417feb0>)
+[gpub001:0/64] 2023-07-15 18:23:24,297 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub001:0/64] 2023-07-15 18:23:24,303 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 18:29:23,956 (trainer:732) INFO: 54epoch:train:8301-8400batch: iter_time=2.345, forward_time=0.153, loss_ctc=64.437, loss_att=50.931, acc=0.730, loss=54.983, backward_time=1.080, grad_norm=113.050, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.864e-05, train_time=8.955
+[gpub001:0/64] 2023-07-15 18:31:53,086 (trainer:732) INFO: 54epoch:train:8401-8500batch: iter_time=1.081e-04, forward_time=0.147, loss_ctc=60.709, loss_att=44.071, acc=0.715, loss=49.063, backward_time=1.058, grad_norm=119.273, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.864e-05, train_time=2.982
+[gpub001:0/64] 2023-07-15 18:34:12,345 (trainer:732) INFO: 54epoch:train:8501-8600batch: iter_time=1.015e-04, forward_time=0.146, loss_ctc=63.847, loss_att=44.978, acc=0.734, loss=50.639, backward_time=1.044, grad_norm=106.758, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.863e-05, train_time=2.785
+[gpub001:0/64] 2023-07-15 18:36:33,395 (trainer:732) INFO: 54epoch:train:8601-8700batch: iter_time=1.047e-04, forward_time=0.145, loss_ctc=67.021, loss_att=51.807, acc=0.717, loss=56.371, backward_time=1.036, grad_norm=147.733, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.863e-05, train_time=2.821
+[gpub001:0/64] 2023-07-15 18:38:52,658 (trainer:732) INFO: 54epoch:train:8701-8800batch: iter_time=9.446e-05, forward_time=0.146, loss_ctc=71.996, loss_att=54.637, acc=0.721, loss=59.845, backward_time=1.031, grad_norm=134.907, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.862e-05, train_time=2.785
+[gpub001:0/64] 2023-07-15 18:41:08,460 (trainer:732) INFO: 54epoch:train:8801-8900batch: iter_time=9.579e-05, forward_time=0.147, loss_ctc=67.011, loss_att=49.373, acc=0.731, loss=54.664, backward_time=1.028, grad_norm=118.447, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.862e-05, train_time=2.716
+[gpub001:0/64] 2023-07-15 18:43:31,785 (trainer:732) INFO: 54epoch:train:8901-9000batch: iter_time=9.115e-05, forward_time=0.147, loss_ctc=80.349, loss_att=63.204, acc=0.712, loss=68.347, backward_time=1.040, grad_norm=116.323, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.862e-05, train_time=2.866
+[gpub001:0/64] 2023-07-15 18:45:47,919 (trainer:732) INFO: 54epoch:train:9001-9100batch: iter_time=1.010e-04, forward_time=0.146, loss_ctc=72.525, loss_att=52.621, acc=0.728, loss=58.592, backward_time=1.031, grad_norm=131.623, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.861e-05, train_time=2.722
+[gpub001:0/64] 2023-07-15 18:47:20,788 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub001:0/64] 2023-07-15 18:47:38,830 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 18:47:42,516 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fc970e1f4f0>)
+[gpub001:0/64] 2023-07-15 18:47:42,516 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub001:0/64] 2023-07-15 18:47:42,522 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 18:51:38,331 (trainer:732) INFO: 54epoch:train:9101-9200batch: iter_time=1.531, forward_time=0.173, loss_ctc=69.490, loss_att=52.765, acc=0.723, loss=57.782, backward_time=1.037, grad_norm=124.708, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.861e-05, train_time=7.008
+[gpub001:0/64] 2023-07-15 18:53:54,968 (trainer:732) INFO: 54epoch:train:9201-9300batch: iter_time=1.110e-04, forward_time=0.147, loss_ctc=56.277, loss_att=41.943, acc=0.713, loss=46.244, backward_time=1.031, grad_norm=107.239, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.860e-05, train_time=2.733
+[gpub001:0/64] 2023-07-15 18:56:11,597 (trainer:732) INFO: 54epoch:train:9301-9400batch: iter_time=1.083e-04, forward_time=0.147, loss_ctc=66.657, loss_att=51.226, acc=0.713, loss=55.856, backward_time=1.029, grad_norm=120.878, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.860e-05, train_time=2.732
+[gpub001:0/64] 2023-07-15 18:58:28,213 (trainer:732) INFO: 54epoch:train:9401-9500batch: iter_time=1.148e-04, forward_time=0.146, loss_ctc=68.048, loss_att=49.821, acc=0.723, loss=55.289, backward_time=1.032, grad_norm=148.242, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.859e-05, train_time=2.732
+[gpub001:0/64] 2023-07-15 19:00:44,029 (trainer:732) INFO: 54epoch:train:9501-9600batch: iter_time=1.119e-04, forward_time=0.146, loss_ctc=70.255, loss_att=54.181, acc=0.708, loss=59.003, backward_time=1.028, grad_norm=132.345, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.859e-05, train_time=2.716
+[gpub001:0/64] 2023-07-15 19:03:00,419 (trainer:732) INFO: 54epoch:train:9601-9700batch: iter_time=1.089e-04, forward_time=0.146, loss_ctc=66.069, loss_att=50.762, acc=0.716, loss=55.354, backward_time=1.028, grad_norm=135.698, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.858e-05, train_time=2.728
+[gpub001:0/64] 2023-07-15 19:05:16,211 (trainer:732) INFO: 54epoch:train:9701-9800batch: iter_time=1.160e-04, forward_time=0.146, loss_ctc=76.996, loss_att=58.173, acc=0.719, loss=63.820, backward_time=1.029, grad_norm=132.925, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.858e-05, train_time=2.716
+[gpub001:0/64] 2023-07-15 19:07:32,296 (trainer:732) INFO: 54epoch:train:9801-9900batch: iter_time=1.342e-04, forward_time=0.147, loss_ctc=71.876, loss_att=58.022, acc=0.710, loss=62.178, backward_time=1.030, grad_norm=155.420, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.857e-05, train_time=2.721
+[gpub001:0/64] 2023-07-15 19:09:47,953 (trainer:732) INFO: 54epoch:train:9901-10000batch: iter_time=1.147e-04, forward_time=0.146, loss_ctc=72.341, loss_att=53.585, acc=0.723, loss=59.212, backward_time=1.026, grad_norm=135.675, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.857e-05, train_time=2.713
+[gpub001:0/64] 2023-07-15 19:23:03,469 (trainer:338) INFO: 54epoch results: [train] iter_time=0.248, forward_time=0.152, loss_ctc=69.871, loss_att=52.737, acc=0.715, loss=57.878, backward_time=1.034, grad_norm=133.170, clip=100.000, loss_scale=1.474e+32, optim_step_time=0.182, optim0_lr0=4.880e-05, train_time=3.409, time=4 hours, 44 minutes and 19.81 seconds, total_count=510000, gpu_max_cached_mem_GB=37.635, [valid] loss_ctc=41.546, cer_ctc=0.245, loss_att=36.383, acc=0.679, cer=0.423, wer=1.000, loss=37.932, time=7 minutes and 4.7 seconds, total_count=52118, gpu_max_cached_mem_GB=37.635, [att_plot] time=5 minutes and 58.2 seconds, total_count=0, gpu_max_cached_mem_GB=37.635
+[gpub001:0/64] 2023-07-15 19:23:19,345 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub001:0/64] 2023-07-15 19:23:19,357 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/49epoch.pth
+[gpub001:0/64] 2023-07-15 19:23:19,357 (trainer:272) INFO: 55/60epoch started. Estimated time to finish: 1 day, 5 hours and 52 minutes
+[gpub001:0/64] 2023-07-15 19:23:19,377 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub001:0/64] 2023-07-15 19:23:37,050 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 19:23:40,335 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbec99fe50>)
+[gpub001:0/64] 2023-07-15 19:23:40,335 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub001:0/64] 2023-07-15 19:23:40,341 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 19:31:02,897 (trainer:732) INFO: 55epoch:train:1-100batch: iter_time=3.212, forward_time=0.179, loss_ctc=66.161, loss_att=47.370, acc=0.712, loss=53.007, backward_time=1.042, grad_norm=114.013, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=4.856e-05, train_time=9.270
+[gpub001:0/64] 2023-07-15 19:33:19,196 (trainer:732) INFO: 55epoch:train:101-200batch: iter_time=1.151e-04, forward_time=0.146, loss_ctc=80.026, loss_att=57.239, acc=0.710, loss=64.075, backward_time=1.030, grad_norm=155.693, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.856e-05, train_time=2.726
+[gpub001:0/64] 2023-07-15 19:35:36,675 (trainer:732) INFO: 55epoch:train:201-300batch: iter_time=1.160e-04, forward_time=0.146, loss_ctc=70.409, loss_att=49.481, acc=0.714, loss=55.759, backward_time=1.028, grad_norm=126.941, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.856e-05, train_time=2.749
+[gpub001:0/64] 2023-07-15 19:37:52,657 (trainer:732) INFO: 55epoch:train:301-400batch: iter_time=1.215e-04, forward_time=0.146, loss_ctc=73.324, loss_att=56.085, acc=0.696, loss=61.256, backward_time=1.028, grad_norm=136.348, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.855e-05, train_time=2.719
+[gpub001:0/64] 2023-07-15 19:40:11,554 (trainer:732) INFO: 55epoch:train:401-500batch: iter_time=1.186e-04, forward_time=0.145, loss_ctc=68.927, loss_att=51.897, acc=0.701, loss=57.006, backward_time=1.027, grad_norm=132.079, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.855e-05, train_time=2.778
+[gpub001:0/64] 2023-07-15 19:42:27,271 (trainer:732) INFO: 55epoch:train:501-600batch: iter_time=1.254e-04, forward_time=0.145, loss_ctc=70.748, loss_att=54.161, acc=0.713, loss=59.137, backward_time=1.026, grad_norm=129.751, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.854e-05, train_time=2.714
+[gpub001:0/64] 2023-07-15 19:44:43,089 (trainer:732) INFO: 55epoch:train:601-700batch: iter_time=1.243e-04, forward_time=0.146, loss_ctc=70.260, loss_att=56.700, acc=0.709, loss=60.768, backward_time=1.027, grad_norm=134.754, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.854e-05, train_time=2.716
+[gpub001:0/64] 2023-07-15 19:47:02,647 (trainer:732) INFO: 55epoch:train:701-800batch: iter_time=1.169e-04, forward_time=0.159, loss_ctc=61.315, loss_att=47.512, acc=0.708, loss=51.653, backward_time=1.032, grad_norm=119.787, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.853e-05, train_time=2.791
+[gpub001:0/64] 2023-07-15 19:47:56,741 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub001:0/64] 2023-07-15 19:48:14,565 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 19:48:17,927 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd5af1ddde0>)
+[gpub001:0/64] 2023-07-15 19:48:17,927 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub001:0/64] 2023-07-15 19:48:17,934 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 19:52:24,523 (trainer:732) INFO: 55epoch:train:801-900batch: iter_time=1.522, forward_time=0.203, loss_ctc=69.155, loss_att=52.160, acc=0.709, loss=57.258, backward_time=1.045, grad_norm=146.382, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.853e-05, train_time=6.437
+[gpub001:0/64] 2023-07-15 19:54:41,572 (trainer:732) INFO: 55epoch:train:901-1000batch: iter_time=1.320e-04, forward_time=0.146, loss_ctc=70.035, loss_att=56.263, acc=0.706, loss=60.395, backward_time=1.032, grad_norm=129.876, clip=100.000, loss_scale=3.115e+32, optim_step_time=0.182, optim0_lr0=4.852e-05, train_time=2.741
+[gpub001:0/64] 2023-07-15 19:56:57,064 (trainer:732) INFO: 55epoch:train:1001-1100batch: iter_time=1.091e-04, forward_time=0.144, loss_ctc=78.100, loss_att=53.099, acc=0.718, loss=60.599, backward_time=1.025, grad_norm=139.253, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.852e-05, train_time=2.710
+[gpub001:0/64] 2023-07-15 19:59:12,943 (trainer:732) INFO: 55epoch:train:1101-1200batch: iter_time=1.165e-04, forward_time=0.144, loss_ctc=74.777, loss_att=55.606, acc=0.702, loss=61.357, backward_time=1.027, grad_norm=126.416, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.851e-05, train_time=2.717
+[gpub001:0/64] 2023-07-15 20:01:28,825 (trainer:732) INFO: 55epoch:train:1201-1300batch: iter_time=1.217e-04, forward_time=0.145, loss_ctc=69.023, loss_att=53.038, acc=0.705, loss=57.833, backward_time=1.029, grad_norm=128.178, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.851e-05, train_time=2.717
+[gpub001:0/64] 2023-07-15 20:03:45,287 (trainer:732) INFO: 55epoch:train:1301-1400batch: iter_time=1.178e-04, forward_time=0.147, loss_ctc=65.828, loss_att=47.995, acc=0.715, loss=53.345, backward_time=1.029, grad_norm=139.823, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.851e-05, train_time=2.729
+[gpub001:0/64] 2023-07-15 20:04:15,026 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub001:0/64] 2023-07-15 20:06:00,814 (trainer:732) INFO: 55epoch:train:1401-1500batch: iter_time=8.046e-04, forward_time=0.145, loss_ctc=70.081, loss_att=54.160, acc=0.716, loss=58.936, backward_time=1.028, grad_norm=161.967, clip=100.000, loss_scale=1.954e+32, optim_step_time=0.182, optim0_lr0=4.850e-05, train_time=2.710
+[gpub001:0/64] 2023-07-15 20:08:16,706 (trainer:732) INFO: 55epoch:train:1501-1600batch: iter_time=1.300e-04, forward_time=0.148, loss_ctc=65.333, loss_att=50.645, acc=0.717, loss=55.052, backward_time=1.028, grad_norm=125.542, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.850e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 20:09:58,752 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub001:0/64] 2023-07-15 20:10:16,629 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 20:10:20,102 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fcc1487bc10>)
+[gpub001:0/64] 2023-07-15 20:10:20,102 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub001:0/64] 2023-07-15 20:10:20,108 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 20:15:05,333 (trainer:732) INFO: 55epoch:train:1601-1700batch: iter_time=2.640, forward_time=0.161, loss_ctc=69.772, loss_att=56.145, acc=0.702, loss=60.233, backward_time=1.045, grad_norm=123.498, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.849e-05, train_time=8.172
+[gpub001:0/64] 2023-07-15 20:17:22,257 (trainer:732) INFO: 55epoch:train:1701-1800batch: iter_time=1.131e-04, forward_time=0.146, loss_ctc=71.715, loss_att=51.174, acc=0.722, loss=57.336, backward_time=1.032, grad_norm=144.148, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.849e-05, train_time=2.738
+[gpub001:0/64] 2023-07-15 20:19:38,257 (trainer:732) INFO: 55epoch:train:1801-1900batch: iter_time=1.163e-04, forward_time=0.146, loss_ctc=71.559, loss_att=54.084, acc=0.725, loss=59.327, backward_time=1.029, grad_norm=122.104, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.848e-05, train_time=2.720
+[gpub001:0/64] 2023-07-15 20:21:54,506 (trainer:732) INFO: 55epoch:train:1901-2000batch: iter_time=1.228e-04, forward_time=0.146, loss_ctc=72.459, loss_att=49.201, acc=0.735, loss=56.178, backward_time=1.029, grad_norm=122.229, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.848e-05, train_time=2.725
+[gpub001:0/64] 2023-07-15 20:24:10,321 (trainer:732) INFO: 55epoch:train:2001-2100batch: iter_time=1.121e-04, forward_time=0.146, loss_ctc=72.859, loss_att=55.435, acc=0.707, loss=60.662, backward_time=1.027, grad_norm=135.154, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.847e-05, train_time=2.716
+[gpub001:0/64] 2023-07-15 20:26:25,933 (trainer:732) INFO: 55epoch:train:2101-2200batch: iter_time=1.085e-04, forward_time=0.145, loss_ctc=69.278, loss_att=52.189, acc=0.717, loss=57.316, backward_time=1.026, grad_norm=183.531, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.847e-05, train_time=2.712
+[gpub001:0/64] 2023-07-15 20:28:41,637 (trainer:732) INFO: 55epoch:train:2201-2300batch: iter_time=1.061e-04, forward_time=0.146, loss_ctc=67.008, loss_att=48.170, acc=0.728, loss=53.821, backward_time=1.027, grad_norm=136.904, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.846e-05, train_time=2.714
+[gpub001:0/64] 2023-07-15 20:30:57,552 (trainer:732) INFO: 55epoch:train:2301-2400batch: iter_time=1.115e-04, forward_time=0.145, loss_ctc=69.173, loss_att=54.867, acc=0.725, loss=59.159, backward_time=1.028, grad_norm=129.937, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.846e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 20:33:13,592 (trainer:732) INFO: 55epoch:train:2401-2500batch: iter_time=1.065e-04, forward_time=0.146, loss_ctc=64.746, loss_att=49.739, acc=0.715, loss=54.241, backward_time=1.028, grad_norm=119.142, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.846e-05, train_time=2.721
+[gpub001:0/64] 2023-07-15 20:33:16,890 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub001:0/64] 2023-07-15 20:33:34,658 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 20:33:38,071 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fca75f93520>)
+[gpub001:0/64] 2023-07-15 20:33:38,071 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub001:0/64] 2023-07-15 20:33:38,078 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 20:39:15,364 (trainer:732) INFO: 55epoch:train:2501-2600batch: iter_time=1.324, forward_time=0.155, loss_ctc=75.159, loss_att=52.705, acc=0.723, loss=59.441, backward_time=1.050, grad_norm=172.779, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.845e-05, train_time=7.235
+[gpub001:0/64] 2023-07-15 20:41:32,183 (trainer:732) INFO: 55epoch:train:2601-2700batch: iter_time=1.182e-04, forward_time=0.147, loss_ctc=69.915, loss_att=53.588, acc=0.721, loss=58.486, backward_time=1.031, grad_norm=134.920, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.845e-05, train_time=2.736
+[gpub001:0/64] 2023-07-15 20:43:48,482 (trainer:732) INFO: 55epoch:train:2701-2800batch: iter_time=1.217e-04, forward_time=0.147, loss_ctc=76.857, loss_att=50.857, acc=0.733, loss=58.657, backward_time=1.031, grad_norm=125.091, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.844e-05, train_time=2.726
+[gpub001:0/64] 2023-07-15 20:46:04,499 (trainer:732) INFO: 55epoch:train:2801-2900batch: iter_time=1.144e-04, forward_time=0.146, loss_ctc=70.084, loss_att=55.592, acc=0.713, loss=59.939, backward_time=1.030, grad_norm=156.149, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.844e-05, train_time=2.720
+[gpub001:0/64] 2023-07-15 20:48:20,501 (trainer:732) INFO: 55epoch:train:2901-3000batch: iter_time=1.091e-04, forward_time=0.146, loss_ctc=65.972, loss_att=49.663, acc=0.717, loss=54.556, backward_time=1.030, grad_norm=162.432, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.843e-05, train_time=2.720
+[gpub001:0/64] 2023-07-15 20:50:45,633 (trainer:732) INFO: 55epoch:train:3001-3100batch: iter_time=1.104e-04, forward_time=0.194, loss_ctc=66.366, loss_att=46.152, acc=0.727, loss=52.216, backward_time=1.050, grad_norm=136.185, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.189, optim0_lr0=4.843e-05, train_time=2.901
+[gpub001:0/64] 2023-07-15 20:53:05,403 (trainer:732) INFO: 55epoch:train:3101-3200batch: iter_time=1.162e-04, forward_time=0.173, loss_ctc=72.329, loss_att=57.968, acc=0.722, loss=62.276, backward_time=1.030, grad_norm=129.959, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.842e-05, train_time=2.796
+[gpub001:0/64] 2023-07-15 20:55:21,845 (trainer:732) INFO: 55epoch:train:3201-3300batch: iter_time=1.217e-04, forward_time=0.146, loss_ctc=60.730, loss_att=48.110, acc=0.725, loss=51.896, backward_time=1.030, grad_norm=110.140, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.842e-05, train_time=2.729
+[gpub001:0/64] 2023-07-15 20:56:24,324 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub001:0/64] 2023-07-15 20:56:42,220 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 20:56:45,641 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fcbbb4db4f0>)
+[gpub001:0/64] 2023-07-15 20:56:45,641 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub001:0/64] 2023-07-15 20:56:45,671 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 21:01:16,198 (trainer:732) INFO: 55epoch:train:3301-3400batch: iter_time=2.037, forward_time=0.159, loss_ctc=65.648, loss_att=46.081, acc=0.723, loss=51.951, backward_time=1.054, grad_norm=141.567, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.841e-05, train_time=7.087
+[gpub001:0/64] 2023-07-15 21:03:32,649 (trainer:732) INFO: 55epoch:train:3401-3500batch: iter_time=1.188e-04, forward_time=0.146, loss_ctc=68.666, loss_att=55.199, acc=0.722, loss=59.239, backward_time=1.030, grad_norm=148.789, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.841e-05, train_time=2.729
+[gpub001:0/64] 2023-07-15 21:05:48,957 (trainer:732) INFO: 55epoch:train:3501-3600batch: iter_time=1.139e-04, forward_time=0.147, loss_ctc=75.185, loss_att=50.223, acc=0.733, loss=57.711, backward_time=1.031, grad_norm=142.225, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.841e-05, train_time=2.726
+[gpub001:0/64] 2023-07-15 21:08:05,090 (trainer:732) INFO: 55epoch:train:3601-3700batch: iter_time=1.175e-04, forward_time=0.147, loss_ctc=72.633, loss_att=54.354, acc=0.712, loss=59.838, backward_time=1.029, grad_norm=156.626, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.840e-05, train_time=2.722
+[gpub001:0/64] 2023-07-15 21:10:21,444 (trainer:732) INFO: 55epoch:train:3701-3800batch: iter_time=1.204e-04, forward_time=0.147, loss_ctc=69.561, loss_att=51.973, acc=0.725, loss=57.249, backward_time=1.030, grad_norm=129.593, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.840e-05, train_time=2.727
+[gpub001:0/64] 2023-07-15 21:12:37,572 (trainer:732) INFO: 55epoch:train:3801-3900batch: iter_time=1.255e-04, forward_time=0.146, loss_ctc=64.566, loss_att=47.497, acc=0.722, loss=52.618, backward_time=1.027, grad_norm=133.138, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.839e-05, train_time=2.722
+[gpub001:0/64] 2023-07-15 21:14:53,769 (trainer:732) INFO: 55epoch:train:3901-4000batch: iter_time=1.269e-04, forward_time=0.146, loss_ctc=67.393, loss_att=53.835, acc=0.726, loss=57.902, backward_time=1.029, grad_norm=140.927, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.839e-05, train_time=2.724
+[gpub001:0/64] 2023-07-15 21:17:09,543 (trainer:732) INFO: 55epoch:train:4001-4100batch: iter_time=1.338e-04, forward_time=0.146, loss_ctc=65.438, loss_att=51.648, acc=0.716, loss=55.785, backward_time=1.027, grad_norm=112.494, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.838e-05, train_time=2.715
+[gpub001:0/64] 2023-07-15 21:18:46,330 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub001:0/64] 2023-07-15 21:19:04,591 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 21:19:08,060 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbf648ffd0>)
+[gpub001:0/64] 2023-07-15 21:19:08,060 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub001:0/64] 2023-07-15 21:19:08,066 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 21:23:46,402 (trainer:732) INFO: 55epoch:train:4101-4200batch: iter_time=1.571, forward_time=0.171, loss_ctc=69.400, loss_att=56.671, acc=0.707, loss=60.489, backward_time=1.040, grad_norm=139.666, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=4.838e-05, train_time=7.937
+[gpub001:0/64] 2023-07-15 21:26:03,353 (trainer:732) INFO: 55epoch:train:4201-4300batch: iter_time=1.202e-04, forward_time=0.146, loss_ctc=71.370, loss_att=51.067, acc=0.719, loss=57.158, backward_time=1.033, grad_norm=124.956, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.837e-05, train_time=2.739
+[gpub001:0/64] 2023-07-15 21:28:19,368 (trainer:732) INFO: 55epoch:train:4301-4400batch: iter_time=1.251e-04, forward_time=0.147, loss_ctc=70.788, loss_att=55.523, acc=0.716, loss=60.102, backward_time=1.030, grad_norm=145.654, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.837e-05, train_time=2.720
+[gpub001:0/64] 2023-07-15 21:30:35,376 (trainer:732) INFO: 55epoch:train:4401-4500batch: iter_time=1.251e-04, forward_time=0.146, loss_ctc=73.170, loss_att=49.309, acc=0.727, loss=56.467, backward_time=1.030, grad_norm=177.689, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.836e-05, train_time=2.720
+[gpub001:0/64] 2023-07-15 21:32:57,674 (trainer:732) INFO: 55epoch:train:4501-4600batch: iter_time=1.231e-04, forward_time=0.170, loss_ctc=71.775, loss_att=55.086, acc=0.701, loss=60.093, backward_time=1.065, grad_norm=138.671, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.836e-05, train_time=2.845
+[gpub001:0/64] 2023-07-15 21:35:18,977 (trainer:732) INFO: 55epoch:train:4601-4700batch: iter_time=1.292e-04, forward_time=0.163, loss_ctc=68.115, loss_att=52.067, acc=0.705, loss=56.882, backward_time=1.030, grad_norm=146.040, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.836e-05, train_time=2.826
+[gpub001:0/64] 2023-07-15 21:37:37,938 (trainer:732) INFO: 55epoch:train:4701-4800batch: iter_time=1.370e-04, forward_time=0.146, loss_ctc=67.563, loss_att=48.505, acc=0.725, loss=54.222, backward_time=1.041, grad_norm=114.887, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.835e-05, train_time=2.779
+[gpub001:0/64] 2023-07-15 21:39:53,753 (trainer:732) INFO: 55epoch:train:4801-4900batch: iter_time=1.201e-04, forward_time=0.146, loss_ctc=69.293, loss_att=55.110, acc=0.720, loss=59.365, backward_time=1.028, grad_norm=141.733, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.835e-05, train_time=2.715
+[gpub001:0/64] 2023-07-15 21:42:09,547 (trainer:732) INFO: 55epoch:train:4901-5000batch: iter_time=1.178e-04, forward_time=0.146, loss_ctc=62.874, loss_att=48.554, acc=0.716, loss=52.850, backward_time=1.029, grad_norm=127.693, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.834e-05, train_time=2.717
+[gpub001:0/64] 2023-07-15 21:42:13,217 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub001:0/64] 2023-07-15 21:42:31,219 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 21:42:34,649 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd5b09af7c0>)
+[gpub001:0/64] 2023-07-15 21:42:34,649 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub001:0/64] 2023-07-15 21:42:34,655 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 21:48:26,273 (trainer:732) INFO: 55epoch:train:5001-5100batch: iter_time=1.355, forward_time=0.182, loss_ctc=63.212, loss_att=44.533, acc=0.732, loss=50.136, backward_time=1.043, grad_norm=148.605, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.834e-05, train_time=7.534
+[gpub001:0/64] 2023-07-15 21:50:42,538 (trainer:732) INFO: 55epoch:train:5101-5200batch: iter_time=9.720e-05, forward_time=0.144, loss_ctc=75.917, loss_att=55.962, acc=0.723, loss=61.949, backward_time=1.030, grad_norm=125.059, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.833e-05, train_time=2.725
+[gpub001:0/64] 2023-07-15 21:52:58,854 (trainer:732) INFO: 55epoch:train:5201-5300batch: iter_time=8.952e-05, forward_time=0.144, loss_ctc=68.540, loss_att=46.952, acc=0.732, loss=53.429, backward_time=1.030, grad_norm=136.243, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.833e-05, train_time=2.726
+[gpub001:0/64] 2023-07-15 21:55:19,419 (trainer:732) INFO: 55epoch:train:5301-5400batch: iter_time=8.648e-05, forward_time=0.144, loss_ctc=72.817, loss_att=54.724, acc=0.714, loss=60.152, backward_time=1.034, grad_norm=142.694, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.832e-05, train_time=2.811
+[gpub001:0/64] 2023-07-15 21:57:36,018 (trainer:732) INFO: 55epoch:train:5401-5500batch: iter_time=9.088e-05, forward_time=0.144, loss_ctc=68.000, loss_att=50.279, acc=0.722, loss=55.595, backward_time=1.030, grad_norm=120.000, clip=100.000, loss_scale=2.888e+32, optim_step_time=0.182, optim0_lr0=4.832e-05, train_time=2.732
+[gpub001:0/64] 2023-07-15 21:59:52,599 (trainer:732) INFO: 55epoch:train:5501-5600batch: iter_time=9.922e-05, forward_time=0.145, loss_ctc=70.562, loss_att=53.030, acc=0.727, loss=58.290, backward_time=1.031, grad_norm=151.714, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.831e-05, train_time=2.731
+[gpub001:0/64] 2023-07-15 22:02:08,924 (trainer:732) INFO: 55epoch:train:5601-5700batch: iter_time=1.006e-04, forward_time=0.144, loss_ctc=67.520, loss_att=56.170, acc=0.720, loss=59.575, backward_time=1.030, grad_norm=179.978, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.831e-05, train_time=2.726
+[gpub001:0/64] 2023-07-15 22:04:27,557 (trainer:732) INFO: 55epoch:train:5701-5800batch: iter_time=9.311e-05, forward_time=0.144, loss_ctc=60.189, loss_att=46.927, acc=0.714, loss=50.906, backward_time=1.031, grad_norm=118.818, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.831e-05, train_time=2.772
+[gpub001:0/64] 2023-07-15 22:05:31,121 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub001:0/64] 2023-07-15 22:05:48,967 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 22:05:52,430 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fc968d6bf70>)
+[gpub001:0/64] 2023-07-15 22:05:52,430 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub001:0/64] 2023-07-15 22:05:52,437 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 22:12:10,087 (trainer:732) INFO: 55epoch:train:5801-5900batch: iter_time=3.061, forward_time=0.185, loss_ctc=75.455, loss_att=58.296, acc=0.718, loss=63.444, backward_time=1.105, grad_norm=123.117, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=4.830e-05, train_time=9.250
+[gpub001:0/64] 2023-07-15 22:14:27,175 (trainer:732) INFO: 55epoch:train:5901-6000batch: iter_time=1.074e-04, forward_time=0.145, loss_ctc=68.118, loss_att=46.658, acc=0.734, loss=53.096, backward_time=1.031, grad_norm=177.767, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.830e-05, train_time=2.742
+[gpub001:0/64] 2023-07-15 22:16:43,727 (trainer:732) INFO: 55epoch:train:6001-6100batch: iter_time=1.079e-04, forward_time=0.146, loss_ctc=79.566, loss_att=57.355, acc=0.725, loss=64.019, backward_time=1.031, grad_norm=166.617, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.829e-05, train_time=2.731
+[gpub001:0/64] 2023-07-15 22:18:59,637 (trainer:732) INFO: 55epoch:train:6101-6200batch: iter_time=1.009e-04, forward_time=0.144, loss_ctc=69.002, loss_att=52.731, acc=0.722, loss=57.612, backward_time=1.029, grad_norm=119.107, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.829e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 22:21:16,210 (trainer:732) INFO: 55epoch:train:6201-6300batch: iter_time=1.116e-04, forward_time=0.145, loss_ctc=68.288, loss_att=50.084, acc=0.721, loss=55.545, backward_time=1.030, grad_norm=139.701, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.828e-05, train_time=2.731
+[gpub001:0/64] 2023-07-15 22:23:44,539 (trainer:732) INFO: 55epoch:train:6301-6400batch: iter_time=1.056e-04, forward_time=0.144, loss_ctc=67.344, loss_att=48.425, acc=0.728, loss=54.101, backward_time=1.045, grad_norm=133.354, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.828e-05, train_time=2.966
+[gpub001:0/64] 2023-07-15 22:26:07,503 (trainer:732) INFO: 55epoch:train:6401-6500batch: iter_time=1.176e-04, forward_time=0.152, loss_ctc=72.129, loss_att=56.674, acc=0.724, loss=61.311, backward_time=1.051, grad_norm=128.706, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.827e-05, train_time=2.859
+[gpub001:0/64] 2023-07-15 22:28:29,981 (trainer:732) INFO: 55epoch:train:6501-6600batch: iter_time=1.072e-04, forward_time=0.146, loss_ctc=62.301, loss_att=49.950, acc=0.726, loss=53.655, backward_time=1.040, grad_norm=125.862, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.827e-05, train_time=2.849
+[gpub001:0/64] 2023-07-15 22:30:06,709 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub001:0/64] 2023-07-15 22:30:25,064 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 22:30:28,526 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fc8ef08ca60>)
+[gpub001:0/64] 2023-07-15 22:30:28,526 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub001:0/64] 2023-07-15 22:30:28,532 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 22:36:04,422 (trainer:732) INFO: 55epoch:train:6601-6700batch: iter_time=1.567, forward_time=0.148, loss_ctc=66.536, loss_att=49.542, acc=0.714, loss=54.640, backward_time=1.031, grad_norm=147.774, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.827e-05, train_time=9.089
+[gpub001:0/64] 2023-07-15 22:38:24,208 (trainer:732) INFO: 55epoch:train:6701-6800batch: iter_time=1.030e-04, forward_time=0.167, loss_ctc=70.272, loss_att=49.751, acc=0.724, loss=55.907, backward_time=1.037, grad_norm=112.465, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.826e-05, train_time=2.796
+[gpub001:0/64] 2023-07-15 22:40:41,823 (trainer:732) INFO: 55epoch:train:6801-6900batch: iter_time=9.641e-05, forward_time=0.146, loss_ctc=70.101, loss_att=53.067, acc=0.720, loss=58.177, backward_time=1.032, grad_norm=122.968, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.826e-05, train_time=2.752
+[gpub001:0/64] 2023-07-15 22:42:59,328 (trainer:732) INFO: 55epoch:train:6901-7000batch: iter_time=9.366e-05, forward_time=0.145, loss_ctc=72.478, loss_att=49.337, acc=0.730, loss=56.279, backward_time=1.029, grad_norm=129.336, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.825e-05, train_time=2.750
+[gpub001:0/64] 2023-07-15 22:45:16,265 (trainer:732) INFO: 55epoch:train:7001-7100batch: iter_time=1.199e-04, forward_time=0.145, loss_ctc=72.021, loss_att=55.230, acc=0.700, loss=60.268, backward_time=1.029, grad_norm=126.971, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.825e-05, train_time=2.739
+[gpub001:0/64] 2023-07-15 22:47:32,473 (trainer:732) INFO: 55epoch:train:7101-7200batch: iter_time=1.102e-04, forward_time=0.145, loss_ctc=67.237, loss_att=51.993, acc=0.709, loss=56.566, backward_time=1.029, grad_norm=119.930, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.824e-05, train_time=2.724
+[gpub001:0/64] 2023-07-15 22:49:48,357 (trainer:732) INFO: 55epoch:train:7201-7300batch: iter_time=1.017e-04, forward_time=0.145, loss_ctc=67.345, loss_att=47.923, acc=0.726, loss=53.750, backward_time=1.029, grad_norm=118.761, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.824e-05, train_time=2.717
+[gpub001:0/64] 2023-07-15 22:52:04,142 (trainer:732) INFO: 55epoch:train:7301-7400batch: iter_time=1.125e-04, forward_time=0.144, loss_ctc=68.462, loss_att=53.569, acc=0.725, loss=58.037, backward_time=1.028, grad_norm=138.314, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.823e-05, train_time=2.715
+[gpub001:0/64] 2023-07-15 22:54:24,008 (trainer:732) INFO: 55epoch:train:7401-7500batch: iter_time=1.110e-04, forward_time=0.145, loss_ctc=63.726, loss_att=49.352, acc=0.717, loss=53.664, backward_time=1.035, grad_norm=118.056, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.823e-05, train_time=2.797
+[gpub001:0/64] 2023-07-15 22:54:29,095 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub001:0/64] 2023-07-15 22:54:47,130 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 22:54:50,600 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fcbbad37fd0>)
+[gpub001:0/64] 2023-07-15 22:54:50,600 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub001:0/64] 2023-07-15 22:54:50,606 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 22:59:53,940 (trainer:732) INFO: 55epoch:train:7501-7600batch: iter_time=1.342, forward_time=0.179, loss_ctc=75.400, loss_att=51.040, acc=0.730, loss=58.348, backward_time=1.045, grad_norm=136.571, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.822e-05, train_time=6.598
+[gpub001:0/64] 2023-07-15 23:02:10,577 (trainer:732) INFO: 55epoch:train:7601-7700batch: iter_time=1.152e-04, forward_time=0.146, loss_ctc=69.202, loss_att=52.650, acc=0.727, loss=57.616, backward_time=1.028, grad_norm=137.867, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.822e-05, train_time=2.733
+[gpub001:0/64] 2023-07-15 23:04:27,339 (trainer:732) INFO: 55epoch:train:7701-7800batch: iter_time=1.118e-04, forward_time=0.147, loss_ctc=75.729, loss_att=50.295, acc=0.738, loss=57.925, backward_time=1.033, grad_norm=129.910, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.822e-05, train_time=2.735
+[gpub001:0/64] 2023-07-15 23:06:43,267 (trainer:732) INFO: 55epoch:train:7801-7900batch: iter_time=1.329e-04, forward_time=0.147, loss_ctc=68.754, loss_att=54.027, acc=0.720, loss=58.445, backward_time=1.030, grad_norm=130.834, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.821e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 23:09:14,681 (trainer:732) INFO: 55epoch:train:7901-8000batch: iter_time=1.080e-04, forward_time=0.146, loss_ctc=65.383, loss_att=49.653, acc=0.720, loss=54.372, backward_time=1.082, grad_norm=143.167, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.821e-05, train_time=3.028
+[gpub001:0/64] 2023-07-15 23:11:49,953 (trainer:732) INFO: 55epoch:train:8001-8100batch: iter_time=1.179e-04, forward_time=0.148, loss_ctc=65.409, loss_att=44.547, acc=0.734, loss=50.806, backward_time=1.053, grad_norm=133.133, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.820e-05, train_time=3.105
+[gpub001:0/64] 2023-07-15 23:14:06,705 (trainer:732) INFO: 55epoch:train:8101-8200batch: iter_time=1.099e-04, forward_time=0.148, loss_ctc=71.658, loss_att=56.942, acc=0.730, loss=61.357, backward_time=1.035, grad_norm=138.515, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.820e-05, train_time=2.735
+[gpub001:0/64] 2023-07-15 23:16:22,766 (trainer:732) INFO: 55epoch:train:8201-8300batch: iter_time=1.156e-04, forward_time=0.146, loss_ctc=60.074, loss_att=47.491, acc=0.727, loss=51.266, backward_time=1.030, grad_norm=119.828, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.819e-05, train_time=2.721
+[gpub001:0/64] 2023-07-15 23:17:21,807 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub001:0/64] 2023-07-15 23:17:40,310 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 23:17:44,092 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd0723934f0>)
+[gpub001:0/64] 2023-07-15 23:17:44,092 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub001:0/64] 2023-07-15 23:17:44,098 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 23:22:06,581 (trainer:732) INFO: 55epoch:train:8301-8400batch: iter_time=1.931, forward_time=0.162, loss_ctc=70.513, loss_att=50.076, acc=0.728, loss=56.207, backward_time=1.053, grad_norm=120.350, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.819e-05, train_time=6.876
+[gpub001:0/64] 2023-07-15 23:24:24,068 (trainer:732) INFO: 55epoch:train:8401-8500batch: iter_time=1.156e-04, forward_time=0.146, loss_ctc=66.708, loss_att=47.369, acc=0.724, loss=53.171, backward_time=1.031, grad_norm=116.972, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.818e-05, train_time=2.750
+[gpub001:0/64] 2023-07-15 23:26:42,851 (trainer:732) INFO: 55epoch:train:8501-8600batch: iter_time=1.086e-04, forward_time=0.167, loss_ctc=78.244, loss_att=58.054, acc=0.714, loss=64.111, backward_time=1.031, grad_norm=173.126, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.818e-05, train_time=2.775
+[gpub001:0/64] 2023-07-15 23:29:02,728 (trainer:732) INFO: 55epoch:train:8601-8700batch: iter_time=1.328e-04, forward_time=0.155, loss_ctc=69.989, loss_att=54.707, acc=0.715, loss=59.291, backward_time=1.031, grad_norm=149.360, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.818e-05, train_time=2.797
+[gpub001:0/64] 2023-07-15 23:31:26,010 (trainer:732) INFO: 55epoch:train:8701-8800batch: iter_time=1.131e-04, forward_time=0.196, loss_ctc=67.734, loss_att=49.675, acc=0.708, loss=55.093, backward_time=1.032, grad_norm=151.352, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.199, optim0_lr0=4.817e-05, train_time=2.865
+[gpub001:0/64] 2023-07-15 23:33:45,012 (trainer:732) INFO: 55epoch:train:8801-8900batch: iter_time=1.067e-04, forward_time=0.145, loss_ctc=66.717, loss_att=48.204, acc=0.719, loss=53.758, backward_time=1.032, grad_norm=131.081, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.817e-05, train_time=2.780
+[gpub001:0/64] 2023-07-15 23:36:00,960 (trainer:732) INFO: 55epoch:train:8901-9000batch: iter_time=1.112e-04, forward_time=0.146, loss_ctc=70.848, loss_att=55.620, acc=0.725, loss=60.188, backward_time=1.029, grad_norm=143.176, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.816e-05, train_time=2.719
+[gpub001:0/64] 2023-07-15 23:38:18,241 (trainer:732) INFO: 55epoch:train:9001-9100batch: iter_time=1.011e-04, forward_time=0.150, loss_ctc=62.371, loss_att=49.098, acc=0.726, loss=53.080, backward_time=1.032, grad_norm=147.200, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.816e-05, train_time=2.745
+[gpub001:0/64] 2023-07-15 23:40:07,256 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub001:0/64] 2023-07-15 23:40:25,764 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 23:40:29,558 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fcbbd06e440>)
+[gpub001:0/64] 2023-07-15 23:40:29,558 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub001:0/64] 2023-07-15 23:40:29,564 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 23:45:21,086 (trainer:732) INFO: 55epoch:train:9101-9200batch: iter_time=2.793, forward_time=0.191, loss_ctc=66.256, loss_att=48.196, acc=0.723, loss=53.614, backward_time=1.044, grad_norm=112.598, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.815e-05, train_time=8.457
+[gpub001:0/64] 2023-07-15 23:47:43,332 (trainer:732) INFO: 55epoch:train:9201-9300batch: iter_time=9.859e-05, forward_time=0.145, loss_ctc=70.085, loss_att=49.743, acc=0.725, loss=55.846, backward_time=1.042, grad_norm=152.031, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.815e-05, train_time=2.845
+[gpub001:0/64] 2023-07-15 23:50:02,620 (trainer:732) INFO: 55epoch:train:9301-9400batch: iter_time=9.019e-05, forward_time=0.145, loss_ctc=69.933, loss_att=52.729, acc=0.721, loss=57.890, backward_time=1.040, grad_norm=139.705, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.814e-05, train_time=2.786
+[gpub001:0/64] 2023-07-15 23:52:24,700 (trainer:732) INFO: 55epoch:train:9401-9500batch: iter_time=1.298e-04, forward_time=0.146, loss_ctc=72.083, loss_att=48.816, acc=0.733, loss=55.796, backward_time=1.032, grad_norm=144.424, clip=100.000, loss_scale=5.776e+32, optim_step_time=0.181, optim0_lr0=4.814e-05, train_time=2.841
+[gpub001:0/64] 2023-07-15 23:54:49,710 (trainer:732) INFO: 55epoch:train:9501-9600batch: iter_time=1.283e-04, forward_time=0.196, loss_ctc=70.036, loss_att=53.272, acc=0.706, loss=58.301, backward_time=1.036, grad_norm=201.540, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.186, optim0_lr0=4.814e-05, train_time=2.899
+[gpub001:0/64] 2023-07-15 23:57:14,693 (trainer:732) INFO: 55epoch:train:9601-9700batch: iter_time=1.226e-04, forward_time=0.148, loss_ctc=67.979, loss_att=52.018, acc=0.708, loss=56.807, backward_time=1.042, grad_norm=195.141, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=4.813e-05, train_time=2.900
+[gpub001:0/64] 2023-07-15 23:59:36,152 (trainer:732) INFO: 55epoch:train:9701-9800batch: iter_time=1.320e-04, forward_time=0.146, loss_ctc=68.810, loss_att=49.128, acc=0.729, loss=55.032, backward_time=1.052, grad_norm=143.908, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=4.813e-05, train_time=2.829
+[gpub001:0/64] 2023-07-16 00:02:00,185 (trainer:732) INFO: 55epoch:train:9801-9900batch: iter_time=1.242e-04, forward_time=0.147, loss_ctc=66.852, loss_att=52.866, acc=0.723, loss=57.062, backward_time=1.040, grad_norm=130.349, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=4.812e-05, train_time=2.881
+[gpub001:0/64] 2023-07-16 00:04:16,085 (trainer:732) INFO: 55epoch:train:9901-10000batch: iter_time=1.235e-04, forward_time=0.147, loss_ctc=62.767, loss_att=47.965, acc=0.721, loss=52.406, backward_time=1.029, grad_norm=113.224, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=4.812e-05, train_time=2.718
+[gpub001:0/64] 2023-07-16 00:16:37,923 (trainer:338) INFO: 55epoch results: [train] iter_time=0.244, forward_time=0.152, loss_ctc=69.296, loss_att=51.744, acc=0.719, loss=57.010, backward_time=1.035, grad_norm=137.602, clip=100.000, loss_scale=2.636e+32, optim_step_time=0.182, optim0_lr0=4.834e-05, train_time=3.371, time=4 hours, 41 minutes and 10.78 seconds, total_count=520000, gpu_max_cached_mem_GB=37.635, [valid] loss_ctc=42.158, cer_ctc=0.245, loss_att=35.877, acc=0.700, cer=0.361, wer=0.989, loss=37.761, time=6 minutes and 6.77 seconds, total_count=53130, gpu_max_cached_mem_GB=37.635, [att_plot] time=6 minutes and 0.96 seconds, total_count=0, gpu_max_cached_mem_GB=37.635
+[gpub001:0/64] 2023-07-16 00:16:57,201 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub001:0/64] 2023-07-16 00:16:57,394 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/38epoch.pth, exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/50epoch.pth
+[gpub001:0/64] 2023-07-16 00:16:57,395 (trainer:272) INFO: 56/60epoch started. Estimated time to finish: 1 day, 49 minutes and 46.32 seconds
+[gpub001:0/64] 2023-07-16 00:16:59,197 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub001:0/64] 2023-07-16 00:17:17,371 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-16 00:17:22,642 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd145a0c880>)
+[gpub001:0/64] 2023-07-16 00:17:22,642 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub001:0/64] 2023-07-16 00:17:22,740 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-16 00:25:03,887 (trainer:732) INFO: 56epoch:train:1-100batch: iter_time=3.403, forward_time=0.191, loss_ctc=67.004, loss_att=52.989, acc=0.697, loss=57.193, backward_time=1.047, grad_norm=134.202, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=4.811e-05, train_time=9.711
+[gpub001:0/64] 2023-07-16 00:26:04,353 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub001:0/64] 2023-07-16 00:27:28,104 (trainer:732) INFO: 56epoch:train:101-200batch: iter_time=9.493e-05, forward_time=0.145, loss_ctc=68.220, loss_att=50.035, acc=0.712, loss=55.491, backward_time=1.051, grad_norm=141.648, clip=100.000, loss_scale=4.570e+32, optim_step_time=0.182, optim0_lr0=4.811e-05, train_time=2.884
+[gpub001:0/64] 2023-07-16 00:29:45,320 (trainer:732) INFO: 56epoch:train:201-300batch: iter_time=1.056e-04, forward_time=0.143, loss_ctc=82.452, loss_att=59.724, acc=0.705, loss=66.542, backward_time=1.029, grad_norm=146.750, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.810e-05, train_time=2.744
+[gpub001:0/64] 2023-07-16 00:32:02,788 (trainer:732) INFO: 56epoch:train:301-400batch: iter_time=9.678e-05, forward_time=0.144, loss_ctc=72.797, loss_att=51.224, acc=0.713, loss=57.695, backward_time=1.028, grad_norm=135.271, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.810e-05, train_time=2.749
+[gpub001:0/64] 2023-07-16 00:34:19,996 (trainer:732) INFO: 56epoch:train:401-500batch: iter_time=1.025e-04, forward_time=0.145, loss_ctc=64.462, loss_att=47.713, acc=0.715, loss=52.738, backward_time=1.028, grad_norm=123.919, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.810e-05, train_time=2.744
+[gpub001:0/64] 2023-07-16 00:36:48,635 (trainer:732) INFO: 56epoch:train:501-600batch: iter_time=2.047e-04, forward_time=0.231, loss_ctc=68.197, loss_att=52.344, acc=0.718, loss=57.100, backward_time=1.043, grad_norm=143.676, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=4.809e-05, train_time=2.972
+[gpub001:0/64] 2023-07-16 00:39:11,940 (trainer:732) INFO: 56epoch:train:601-700batch: iter_time=7.600e-04, forward_time=0.198, loss_ctc=77.810, loss_att=56.459, acc=0.705, loss=62.865, backward_time=1.034, grad_norm=136.066, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=4.809e-05, train_time=2.866
+[gpub001:0/64] 2023-07-16 00:41:30,733 (trainer:732) INFO: 56epoch:train:701-800batch: iter_time=9.654e-05, forward_time=0.146, loss_ctc=68.615, loss_att=51.269, acc=0.712, loss=56.472, backward_time=1.030, grad_norm=132.539, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.808e-05, train_time=2.776
+srun: Job step aborted: Waiting up to 32 seconds for job step to finish.
+slurmstepd: error: *** STEP 2157595.0 ON gpub001 CANCELLED AT 2023-07-16T00:41:51 ***