diff --git "a/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.2.log" "b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.2.log"
new file mode 100644--- /dev/null
+++ "b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.2.log"
@@ -0,0 +1,4663 @@
+# Running on gpub002.delta.ncsa.illinois.edu
+# Started at Wed Jul 12 13:15:16 CDT 2023
+# SLURMD_NODENAME=gpub002
+# SLURM_CLUSTER_NAME=delta
+# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf
+# SLURM_CPUS_ON_NODE=64
+# SLURM_CPUS_PER_TASK=64
+# SLURM_EXPORT_ENV=PATH
+# SLURM_GET_USER_ENV=1
+# SLURM_GPUS_ON_NODE=4
+# SLURM_GTIDS=0
+# SLURM_JOBID=2147805
+# SLURM_JOB_ACCOUNT=bbjs-delta-gpu
+# SLURM_JOB_CPUS_PER_NODE='64(x16)'
+# SLURM_JOB_GID=202
+# SLURM_JOB_GPUS=0,1,2,3
+# SLURM_JOB_ID=2147805
+# SLURM_JOB_NAME=exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.log
+# SLURM_JOB_NODELIST='gpub[002,008,010-011,019,027-028,030,050-053,073-074,078,084]'
+# SLURM_JOB_NUM_NODES=16
+# SLURM_JOB_PARTITION=gpuA40x4
+# SLURM_JOB_QOS=bbjs-delta-gpu
+# SLURM_JOB_UID=68077
+# SLURM_JOB_USER=peng6
+# SLURM_LOCALID=0
+# SLURM_MEM_PER_NODE=240000
+# SLURM_NNODES=16
+# SLURM_NODEID=0
+# SLURM_NODELIST='gpub[002,008,010-011,019,027-028,030,050-053,073-074,078,084]'
+# SLURM_NODE_ALIASES='(null)'
+# SLURM_OPEN_MODE=a
+# SLURM_PRIO_PROCESS=0
+# SLURM_PROCID=0
+# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1
+# SLURM_SUBMIT_HOST=dt-login02.delta.internal.ncsa.edu
+# SLURM_TASKS_PER_NODE='1(x16)'
+# SLURM_TASK_PID=2108111
+# SLURM_TOPOLOGY_ADDR=ss00.ss09.gpub002
+# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node
+# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9728:109
+# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1
+[gpub002:0/64] 2023-07-12 13:18:48,677 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0
+[gpub002:0/64] 2023-07-12 13:18:49,830 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 64 nodes.
+[gpub002:0/64] 2023-07-12 13:18:49,864 (s2t:483) INFO: Vocabulary size: 50002
+[gpub002:0/64] 2023-07-12 13:19:04,645 (abs_task:1201) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True
+[gpub002:0/64] 2023-07-12 13:19:04,653 (abs_task:1202) INFO: Model structure:
+ESPnetS2TModel(
+  (frontend): DefaultFrontend(
+    (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True)
+    (frontend): Frontend()
+    (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False)
+  )
+  (specaug): SpecAug(
+    (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq)
+    (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time)
+  )
+  (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True)
+  (encoder): TransformerEncoder(
+    (embed): Conv2dSubsampling(
+      (conv): Sequential(
+        (0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2))
+        (1): ReLU()
+        (2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2))
+        (3): ReLU()
+      )
+      (out): Sequential(
+        (0): Linear(in_features=19456, out_features=1024, bias=True)
+        (1): PositionalEncoding(
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+      )
+    )
+    (encoders): MultiSequential(
+      (0): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (1): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (2): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (3): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (4): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (5): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (6): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (7): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (8): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (9): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (10): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (11): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (12): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (13): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (14): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (15): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (16): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (17): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (18): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (19): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (20): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (21): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (22): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (23): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+    (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+  )
+  (decoder): TransformerDecoder(
+    (embed): Sequential(
+      (0): Embedding(50002, 1024)
+      (1): PositionalEncoding(
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+    (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+    (output_layer): Linear(in_features=1024, out_features=50002, bias=True)
+    (decoders): MultiSequential(
+      (0): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (1): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (2): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (3): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (4): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (5): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (6): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (7): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (8): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (9): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (10): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (11): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (12): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (13): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (14): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (15): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (16): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (17): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (18): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (19): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (20): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (21): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (22): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (23): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+  )
+  (criterion_att): LabelSmoothingLoss(
+    (criterion): KLDivLoss()
+  )
+  (ctc): CTC(
+    (ctc_lo): Linear(in_features=1024, out_features=50002, bias=True)
+    (ctc_loss): CTCLoss()
+  )
+)
+
+Model summary:
+    Class Name: ESPnetS2TModel
+    Total Number of model parameters: 888.51 M
+    Number of trainable parameters: 888.51 M (100.0%)
+    Size: 3.55 GB
+    Type: torch.float32
+[gpub002:0/64] 2023-07-12 13:19:04,653 (abs_task:1205) INFO: Optimizer:
+AdamW (
+Parameter Group 0
+    amsgrad: False
+    betas: [0.9, 0.98]
+    capturable: False
+    eps: 1e-06
+    foreach: None
+    initial_lr: 0.00025
+    lr: 2.5e-08
+    maximize: False
+    weight_decay: 0.0
+)
+[gpub002:0/64] 2023-07-12 13:19:04,653 (abs_task:1206) INFO: Scheduler: WarmupLR(warmup_steps=10000)
+[gpub002:0/64] 2023-07-12 13:19:04,667 (abs_task:1215) INFO: Saving the configuration in exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/config.yaml
+[gpub002:0/64] 2023-07-12 13:19:05,366 (abs_task:1272) INFO: Loading pretrained params from /scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v2/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e18_d18_lr5e-4_warmup20k_raw_bpe50000/valid.acc.ave.pth
+[gpub002:0/64] 2023-07-12 13:19:13,983 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 13:19:14,187 (abs_task:1570) INFO: [valid] dataset:
+ESPnetDataset(
+  speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"}
+  text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"}
+  text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"}
+  text: {"path": "dump/raw/dev/text", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbf9ea0beb0>)
+[gpub002:0/64] 2023-07-12 13:19:14,187 (abs_task:1571) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=1012, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, 
+[gpub002:0/64] 2023-07-12 13:19:14,194 (abs_task:1572) INFO: [valid] mini-batch sizes summary: N-batch=1012, mean=128.1, min=128, max=129
+[gpub002:0/64] 2023-07-12 13:19:14,680 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 13:19:15,003 (abs_task:1570) INFO: [plot_att] dataset:
+ESPnetDataset(
+  speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"}
+  text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"}
+  text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"}
+  text: {"path": "dump/raw/dev/text", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbf9ea0bb50>)
+[gpub002:0/64] 2023-07-12 13:19:15,003 (abs_task:1571) INFO: [plot_att] Batch sampler: UnsortedBatchSampler(N-batch=129591, batch_size=1, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, 
+[gpub002:0/64] 2023-07-12 13:19:15,003 (abs_task:1572) INFO: [plot_att] mini-batch sizes summary: N-batch=3, mean=1.0, min=1, max=1
+[gpub002:0/64] 2023-07-12 13:19:42,133 (trainer:159) INFO: The training was resumed using exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/checkpoint.pth
+gpub002:2108199:2108199 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0>
+gpub002:2108199:2108199 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub002:2108199:2108199 [0] NCCL INFO cudaDriverVersion 12010
+NCCL version 2.14.3+cuda11.7
+[gpub002:0/64] 2023-07-12 13:19:47,191 (trainer:284) INFO: 40/50epoch started
+[gpub002:0/64] 2023-07-12 13:19:47,237 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub002:0/64] 2023-07-12 13:20:04,995 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 13:20:08,308 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd89b625f0>)
+[gpub002:0/64] 2023-07-12 13:20:08,308 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub002:0/64] 2023-07-12 13:20:08,314 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+gpub011:1718215:1718215 [0] NCCL INFO cudaDriverVersion 12010
+gpub011:1718215:1718215 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.111<0>
+gpub011:1718215:1718215 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub011:1718215:1718290 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.111<0>
+gpub011:1718215:1718290 [0] NCCL INFO Using network IB
+gpub011:1718215:1718290 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub011:1718215:1718290 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->28
+gpub011:1718215:1718290 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0
+gpub011:1718215:1718290 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0
+gpub011:1718215:1718290 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC
+gpub011:1718215:1718290 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC
+gpub011:1718215:1718290 [0] NCCL INFO Connected all rings
+gpub011:1718215:1718290 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/IB/0
+gpub011:1718215:1718290 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/IB/0
+gpub011:1718215:1718290 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [send] via NET/IB/0
+gpub011:1718215:1718290 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [receive] via NET/IB/0
+gpub011:1718215:1718290 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/IB/0
+gpub011:1718215:1718290 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/IB/0
+gpub011:1718215:1718290 [0] NCCL INFO Connected all trees
+gpub011:1718215:1718290 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub011:1718215:1718290 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub011:1718215:1718290 [0] NCCL INFO comm 0x8e227720 rank 12 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub011:1718216:1718216 [1] NCCL INFO cudaDriverVersion 12010
+gpub011:1718216:1718216 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.111<0>
+gpub011:1718216:1718216 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub011:1718216:1718288 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.111<0>
+gpub011:1718216:1718288 [1] NCCL INFO Using network IB
+gpub011:1718216:1718288 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub011:1718216:1718288 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/20/-1->13->12
+gpub011:1718216:1718288 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC
+gpub011:1718216:1718288 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC
+gpub011:1718216:1718288 [1] NCCL INFO Connected all rings
+gpub011:1718216:1718288 [1] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [send] via NET/IB/0
+gpub011:1718216:1718288 [1] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [receive] via NET/IB/0
+gpub011:1718216:1718288 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC
+gpub011:1718216:1718288 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC
+gpub011:1718216:1718288 [1] NCCL INFO Connected all trees
+gpub011:1718216:1718288 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub011:1718216:1718288 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub011:1718216:1718288 [1] NCCL INFO comm 0x9d351fa0 rank 13 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub011:1718218:1718218 [3] NCCL INFO cudaDriverVersion 12010
+gpub011:1718218:1718218 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.111<0>
+gpub011:1718218:1718218 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub011:1718218:1718289 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.111<0>
+gpub011:1718218:1718289 [3] NCCL INFO Using network IB
+gpub011:1718218:1718289 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub011:1718218:1718289 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14
+gpub011:1718218:1718289 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0
+gpub011:1718218:1718289 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0
+gpub011:1718218:1718289 [3] NCCL INFO Connected all rings
+gpub011:1718218:1718289 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC
+gpub011:1718218:1718289 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC
+gpub011:1718218:1718289 [3] NCCL INFO Connected all trees
+gpub011:1718218:1718289 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub011:1718218:1718289 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub011:1718218:1718289 [3] NCCL INFO comm 0x4fae7090 rank 15 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub084:95632:95632 [3] NCCL INFO cudaDriverVersion 12010
+gpub084:95632:95632 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.184<0>
+gpub084:95632:95632 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub084:95632:95714 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.184<0>
+gpub084:95632:95714 [3] NCCL INFO Using network IB
+gpub084:95632:95714 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub084:95632:95714 [3] NCCL INFO Trees [0] -1/-1/-1->63->62 [1] -1/-1/-1->63->62
+gpub084:95632:95714 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0
+gpub084:95632:95714 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0
+gpub084:95632:95714 [3] NCCL INFO Connected all rings
+gpub084:95632:95714 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 62[85000] via P2P/IPC
+gpub084:95632:95714 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 62[85000] via P2P/IPC
+gpub084:95632:95714 [3] NCCL INFO Connected all trees
+gpub084:95632:95714 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub084:95632:95714 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub084:95632:95714 [3] NCCL INFO comm 0x9d28050 rank 63 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub011:1718217:1718217 [2] NCCL INFO cudaDriverVersion 12010
+gpub011:1718217:1718217 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.111<0>
+gpub011:1718217:1718217 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub011:1718217:1718291 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.111<0>
+gpub011:1718217:1718291 [2] NCCL INFO Using network IB
+gpub011:1718217:1718291 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub011:1718217:1718291 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13
+gpub011:1718217:1718291 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC
+gpub011:1718217:1718291 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC
+gpub011:1718217:1718291 [2] NCCL INFO Connected all rings
+gpub011:1718217:1718291 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC
+gpub011:1718217:1718291 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC
+gpub011:1718217:1718291 [2] NCCL INFO Connected all trees
+gpub011:1718217:1718291 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub011:1718217:1718291 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub011:1718217:1718291 [2] NCCL INFO comm 0x50a009a0 rank 14 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub019:2611991:2611991 [2] NCCL INFO cudaDriverVersion 12010
+gpub019:2611991:2611991 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.119<0>
+gpub019:2611991:2611991 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub019:2611991:2612065 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.119<0>
+gpub019:2611991:2612065 [2] NCCL INFO Using network IB
+gpub019:2611991:2612065 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub019:2611991:2612065 [2] NCCL INFO Trees [0] 19/-1/-1->18->17 [1] 19/-1/-1->18->17
+gpub019:2611991:2612065 [2] NCCL INFO Channel 00/0 : 18[85000] -> 19[c7000] via P2P/IPC
+gpub019:2611991:2612065 [2] NCCL INFO Channel 01/0 : 18[85000] -> 19[c7000] via P2P/IPC
+gpub019:2611991:2612065 [2] NCCL INFO Connected all rings
+gpub019:2611991:2612065 [2] NCCL INFO Channel 00/0 : 18[85000] -> 17[46000] via P2P/IPC
+gpub019:2611991:2612065 [2] NCCL INFO Channel 01/0 : 18[85000] -> 17[46000] via P2P/IPC
+gpub019:2611991:2612065 [2] NCCL INFO Connected all trees
+gpub019:2611991:2612065 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub019:2611991:2612065 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub019:2611991:2612065 [2] NCCL INFO comm 0x10048ab0 rank 18 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub084:95631:95631 [2] NCCL INFO cudaDriverVersion 12010
+gpub084:95631:95631 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.184<0>
+gpub084:95631:95631 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub084:95631:95712 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.184<0>
+gpub084:95631:95712 [2] NCCL INFO Using network IB
+gpub084:95631:95712 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub084:95631:95712 [2] NCCL INFO Trees [0] 63/-1/-1->62->61 [1] 63/-1/-1->62->61
+gpub084:95631:95712 [2] NCCL INFO Channel 00/0 : 62[85000] -> 63[c7000] via P2P/IPC
+gpub084:95631:95712 [2] NCCL INFO Channel 01/0 : 62[85000] -> 63[c7000] via P2P/IPC
+gpub084:95631:95712 [2] NCCL INFO Connected all rings
+gpub084:95631:95712 [2] NCCL INFO Channel 00/0 : 62[85000] -> 61[46000] via P2P/IPC
+gpub084:95631:95712 [2] NCCL INFO Channel 01/0 : 62[85000] -> 61[46000] via P2P/IPC
+gpub084:95631:95712 [2] NCCL INFO Connected all trees
+gpub084:95631:95712 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub084:95631:95712 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub084:95631:95712 [2] NCCL INFO comm 0x940c750 rank 62 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub074:3855653:3855653 [1] NCCL INFO cudaDriverVersion 12010
+gpub074:3855653:3855653 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0>
+gpub074:3855653:3855653 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub074:3855653:3855727 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0>
+gpub074:3855653:3855727 [1] NCCL INFO Using network IB
+gpub074:3855653:3855727 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub074:3855653:3855727 [1] NCCL INFO Trees [0] 54/-1/-1->53->52 [1] 54/56/-1->53->52
+gpub074:3855653:3855727 [1] NCCL INFO Channel 00/0 : 53[46000] -> 54[85000] via P2P/IPC
+gpub074:3855653:3855727 [1] NCCL INFO Channel 01/0 : 53[46000] -> 54[85000] via P2P/IPC
+gpub074:3855653:3855727 [1] NCCL INFO Connected all rings
+gpub074:3855653:3855727 [1] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [send] via NET/IB/0
+gpub074:3855653:3855727 [1] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [receive] via NET/IB/0
+gpub074:3855653:3855727 [1] NCCL INFO Channel 00/0 : 53[46000] -> 52[7000] via P2P/IPC
+gpub074:3855653:3855727 [1] NCCL INFO Channel 01/0 : 53[46000] -> 52[7000] via P2P/IPC
+gpub074:3855653:3855727 [1] NCCL INFO Connected all trees
+gpub074:3855653:3855727 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub074:3855653:3855727 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub074:3855653:3855727 [1] NCCL INFO comm 0xaa1acf00 rank 53 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub074:3855655:3855655 [3] NCCL INFO cudaDriverVersion 12010
+gpub074:3855655:3855655 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0>
+gpub074:3855655:3855655 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub074:3855655:3855725 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0>
+gpub074:3855655:3855725 [3] NCCL INFO Using network IB
+gpub074:3855655:3855725 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub074:3855655:3855725 [3] NCCL INFO Trees [0] -1/-1/-1->55->54 [1] -1/-1/-1->55->54
+gpub074:3855655:3855725 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0
+gpub074:3855655:3855725 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0
+gpub074:3855655:3855725 [3] NCCL INFO Connected all rings
+gpub074:3855655:3855725 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 54[85000] via P2P/IPC
+gpub074:3855655:3855725 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 54[85000] via P2P/IPC
+gpub074:3855655:3855725 [3] NCCL INFO Connected all trees
+gpub074:3855655:3855725 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub074:3855655:3855725 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub074:3855655:3855725 [3] NCCL INFO comm 0x509a28d0 rank 55 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub074:3855652:3855652 [0] NCCL INFO cudaDriverVersion 12010
+gpub074:3855652:3855652 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0>
+gpub074:3855652:3855652 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub074:3855652:3855726 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0>
+gpub074:3855652:3855726 [0] NCCL INFO Using network IB
+gpub074:3855652:3855726 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub074:3855652:3855726 [0] NCCL INFO Trees [0] 53/-1/-1->52->57 [1] 53/48/-1->52->45
+gpub074:3855652:3855726 [0] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0
+gpub074:3855652:3855726 [0] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0
+gpub074:3855652:3855726 [0] NCCL INFO Channel 00/0 : 52[7000] -> 53[46000] via P2P/IPC
+gpub074:3855652:3855726 [0] NCCL INFO Channel 01/0 : 52[7000] -> 53[46000] via P2P/IPC
+gpub074:3855652:3855726 [0] NCCL INFO Connected all rings
+gpub074:3855652:3855726 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [receive] via NET/IB/0
+gpub074:3855652:3855726 [0] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [send] via NET/IB/0
+gpub074:3855652:3855726 [0] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [receive] via NET/IB/0
+gpub074:3855652:3855726 [0] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [send] via NET/IB/0
+gpub074:3855652:3855726 [0] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [receive] via NET/IB/0
+gpub074:3855652:3855726 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [send] via NET/IB/0
+gpub074:3855652:3855726 [0] NCCL INFO Connected all trees
+gpub074:3855652:3855726 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub074:3855652:3855726 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub074:3855652:3855726 [0] NCCL INFO comm 0x8e164a10 rank 52 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub019:2611989:2611989 [0] NCCL INFO cudaDriverVersion 12010
+gpub019:2611989:2611989 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.119<0>
+gpub019:2611989:2611989 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub019:2611989:2612066 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.119<0>
+gpub019:2611989:2612066 [0] NCCL INFO Using network IB
+gpub019:2611989:2612066 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub019:2611989:2612066 [0] NCCL INFO Trees [0] 17/24/-1->16->33 [1] 17/-1/-1->16->20
+gpub019:2611989:2612066 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0
+gpub019:2611989:2612066 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0
+gpub019:2611989:2612066 [0] NCCL INFO Channel 00/0 : 16[7000] -> 17[46000] via P2P/IPC
+gpub019:2611989:2612066 [0] NCCL INFO Channel 01/0 : 16[7000] -> 17[46000] via P2P/IPC
+gpub019:2611989:2612066 [0] NCCL INFO Connected all rings
+gpub019:2611989:2612066 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [send] via NET/IB/0
+gpub019:2611989:2612066 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [send] via NET/IB/0
+gpub019:2611989:2612066 [0] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [send] via NET/IB/0
+gpub019:2611989:2612066 [0] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [receive] via NET/IB/0
+gpub019:2611989:2612066 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [receive] via NET/IB/0
+gpub019:2611989:2612066 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [receive] via NET/IB/0
+gpub019:2611989:2612066 [0] NCCL INFO Connected all trees
+gpub019:2611989:2612066 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub019:2611989:2612066 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub019:2611989:2612066 [0] NCCL INFO comm 0xa8ee89f0 rank 16 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub019:2611992:2611992 [3] NCCL INFO cudaDriverVersion 12010
+gpub019:2611992:2611992 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.119<0>
+gpub019:2611992:2611992 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub019:2611992:2612064 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.119<0>
+gpub019:2611992:2612064 [3] NCCL INFO Using network IB
+gpub019:2611992:2612064 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub019:2611992:2612064 [3] NCCL INFO Trees [0] -1/-1/-1->19->18 [1] -1/-1/-1->19->18
+gpub019:2611992:2612064 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0
+gpub019:2611992:2612064 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0
+gpub019:2611992:2612064 [3] NCCL INFO Connected all rings
+gpub019:2611992:2612064 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 18[85000] via P2P/IPC
+gpub019:2611992:2612064 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 18[85000] via P2P/IPC
+gpub019:2611992:2612064 [3] NCCL INFO Connected all trees
+gpub019:2611992:2612064 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub019:2611992:2612064 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub019:2611992:2612064 [3] NCCL INFO comm 0x4fcf2500 rank 19 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub028:3104067:3104067 [0] NCCL INFO cudaDriverVersion 12010
+gpub028:3104067:3104067 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.128<0>
+gpub028:3104067:3104067 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub028:3104067:3104152 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.128<0>
+gpub028:3104067:3104152 [0] NCCL INFO Using network IB
+gpub028:3104067:3104152 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub028:3104067:3104152 [0] NCCL INFO Trees [0] 25/28/-1->24->16 [1] 25/-1/-1->24->21
+gpub028:3104067:3104152 [0] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0
+gpub028:3104067:3104152 [0] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0
+gpub028:3104067:3104152 [0] NCCL INFO Channel 00/0 : 24[7000] -> 25[46000] via P2P/IPC
+gpub028:3104067:3104152 [0] NCCL INFO Channel 01/0 : 24[7000] -> 25[46000] via P2P/IPC
+gpub028:3104067:3104152 [0] NCCL INFO Connected all rings
+gpub008:2789793:2789793 [0] NCCL INFO cudaDriverVersion 12010
+gpub008:2789793:2789793 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0>
+gpub008:2789793:2789793 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub008:2789793:2789871 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.108<0>
+gpub008:2789793:2789871 [0] NCCL INFO Using network IB
+gpub008:2789793:2789871 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub008:2789793:2789871 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12
+gpub008:2789793:2789871 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0
+gpub008:2789793:2789871 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0
+gpub008:2789793:2789871 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC
+gpub008:2789793:2789871 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC
+gpub008:2789793:2789871 [0] NCCL INFO Connected all rings
+gpub028:3104067:3104152 [0] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [receive] via NET/IB/0
+gpub028:3104067:3104152 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [send] via NET/IB/0
+gpub028:3104067:3104152 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [receive] via NET/IB/0
+gpub028:3104067:3104152 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [send] via NET/IB/0
+gpub028:3104067:3104152 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [receive] via NET/IB/0
+gpub028:3104067:3104152 [0] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [send] via NET/IB/0
+gpub028:3104067:3104152 [0] NCCL INFO Connected all trees
+gpub028:3104067:3104152 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub028:3104067:3104152 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub028:3104067:3104152 [0] NCCL INFO comm 0xa17fea0 rank 24 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub008:2789793:2789871 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/IB/0
+gpub008:2789793:2789871 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/IB/0
+gpub008:2789793:2789871 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/IB/0
+gpub008:2789793:2789871 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/IB/0
+gpub008:2789793:2789871 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/IB/0
+gpub008:2789793:2789871 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/IB/0
+gpub008:2789793:2789871 [0] NCCL INFO Connected all trees
+gpub008:2789793:2789871 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub008:2789793:2789871 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub008:2789793:2789871 [0] NCCL INFO comm 0x9e41e050 rank 4 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub010:1746407:1746407 [0] NCCL INFO cudaDriverVersion 12010
+gpub010:1746407:1746407 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.110<0>
+gpub010:1746407:1746407 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub010:1746407:1746486 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.110<0>
+gpub010:1746407:1746486 [0] NCCL INFO Using network IB
+gpub010:1746407:1746486 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub010:1746407:1746486 [0] NCCL INFO Trees [0] 9/12/-1->8->17 [1] 9/-1/-1->8->5
+gpub010:1746407:1746486 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0
+gpub010:1746407:1746486 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0
+gpub010:1746407:1746486 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC
+gpub010:1746407:1746486 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC
+gpub010:1746407:1746486 [0] NCCL INFO Connected all rings
+gpub010:1746407:1746486 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/IB/0
+gpub010:1746407:1746486 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/IB/0
+gpub010:1746407:1746486 [0] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [send] via NET/IB/0
+gpub010:1746407:1746486 [0] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [receive] via NET/IB/0
+gpub010:1746407:1746486 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/IB/0
+gpub010:1746407:1746486 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/IB/0
+gpub010:1746407:1746486 [0] NCCL INFO Connected all trees
+gpub010:1746407:1746486 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub010:1746407:1746486 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub010:1746407:1746486 [0] NCCL INFO comm 0xa1f0110 rank 8 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub010:1746410:1746410 [3] NCCL INFO cudaDriverVersion 12010
+gpub010:1746410:1746410 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.110<0>
+gpub010:1746410:1746410 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub010:1746410:1746485 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.110<0>
+gpub010:1746410:1746485 [3] NCCL INFO Using network IB
+gpub010:1746410:1746485 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub010:1746410:1746485 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10
+gpub010:1746410:1746485 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0
+gpub010:1746410:1746485 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0
+gpub010:1746410:1746485 [3] NCCL INFO Connected all rings
+gpub010:1746410:1746485 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC
+gpub010:1746410:1746485 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC
+gpub010:1746410:1746485 [3] NCCL INFO Connected all trees
+gpub010:1746410:1746485 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub010:1746410:1746485 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub010:1746410:1746485 [3] NCCL INFO comm 0x95b8eb50 rank 11 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub027:3834396:3834396 [0] NCCL INFO cudaDriverVersion 12010
+gpub027:3834396:3834396 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0>
+gpub027:3834396:3834396 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub027:3834396:3834476 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0>
+gpub027:3834396:3834476 [0] NCCL INFO Using network IB
+gpub027:3834396:3834476 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub027:3834396:3834476 [0] NCCL INFO Trees [0] 21/-1/-1->20->25 [1] 21/16/-1->20->13
+gpub027:3834396:3834476 [0] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0
+gpub027:3834396:3834476 [0] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0
+gpub027:3834396:3834476 [0] NCCL INFO Channel 00/0 : 20[7000] -> 21[46000] via P2P/IPC
+gpub027:3834396:3834476 [0] NCCL INFO Channel 01/0 : 20[7000] -> 21[46000] via P2P/IPC
+gpub027:3834396:3834476 [0] NCCL INFO Connected all rings
+gpub027:3834396:3834476 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [receive] via NET/IB/0
+gpub027:3834396:3834476 [0] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [send] via NET/IB/0
+gpub027:3834396:3834476 [0] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [receive] via NET/IB/0
+gpub027:3834396:3834476 [0] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [send] via NET/IB/0
+gpub027:3834396:3834476 [0] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [receive] via NET/IB/0
+gpub027:3834396:3834476 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [send] via NET/IB/0
+gpub027:3834396:3834476 [0] NCCL INFO Connected all trees
+gpub027:3834396:3834476 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub027:3834396:3834476 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub027:3834396:3834476 [0] NCCL INFO comm 0x8b8afd50 rank 20 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub019:2611990:2611990 [1] NCCL INFO cudaDriverVersion 12010
+gpub019:2611990:2611990 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.119<0>
+gpub019:2611990:2611990 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub019:2611990:2612063 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.119<0>
+gpub019:2611990:2612063 [1] NCCL INFO Using network IB
+gpub019:2611990:2612063 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub019:2611990:2612063 [1] NCCL INFO Trees [0] 18/8/-1->17->16 [1] 18/-1/-1->17->16
+gpub019:2611990:2612063 [1] NCCL INFO Channel 00/0 : 17[46000] -> 18[85000] via P2P/IPC
+gpub019:2611990:2612063 [1] NCCL INFO Channel 01/0 : 17[46000] -> 18[85000] via P2P/IPC
+gpub019:2611990:2612063 [1] NCCL INFO Connected all rings
+gpub019:2611990:2612063 [1] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [receive] via NET/IB/0
+gpub019:2611990:2612063 [1] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [send] via NET/IB/0
+gpub019:2611990:2612063 [1] NCCL INFO Channel 00/0 : 17[46000] -> 16[7000] via P2P/IPC
+gpub019:2611990:2612063 [1] NCCL INFO Channel 01/0 : 17[46000] -> 16[7000] via P2P/IPC
+gpub019:2611990:2612063 [1] NCCL INFO Connected all trees
+gpub019:2611990:2612063 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub019:2611990:2612063 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub019:2611990:2612063 [1] NCCL INFO comm 0x8916a60 rank 17 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub030:2867869:2867869 [0] NCCL INFO cudaDriverVersion 12010
+gpub030:2867869:2867869 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0>
+gpub030:2867869:2867869 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub030:2867869:2867948 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0>
+gpub030:2867869:2867948 [0] NCCL INFO Using network IB
+gpub030:2867869:2867948 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub030:2867869:2867948 [0] NCCL INFO Trees [0] 29/-1/-1->28->24 [1] 29/12/-1->28->60
+gpub030:2867869:2867948 [0] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0
+gpub030:2867869:2867948 [0] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0
+gpub030:2867869:2867948 [0] NCCL INFO Channel 00/0 : 28[7000] -> 29[46000] via P2P/IPC
+gpub030:2867869:2867948 [0] NCCL INFO Channel 01/0 : 28[7000] -> 29[46000] via P2P/IPC
+gpub030:2867869:2867948 [0] NCCL INFO Connected all rings
+gpub030:2867869:2867948 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [receive] via NET/IB/0
+gpub030:2867869:2867948 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [receive] via NET/IB/0
+gpub030:2867869:2867948 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [receive] via NET/IB/0
+gpub030:2867869:2867948 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [send] via NET/IB/0
+gpub030:2867869:2867948 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [send] via NET/IB/0
+gpub030:2867869:2867948 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [send] via NET/IB/0
+gpub030:2867869:2867948 [0] NCCL INFO Connected all trees
+gpub030:2867869:2867948 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub030:2867869:2867948 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub030:2867869:2867948 [0] NCCL INFO comm 0x236c1590 rank 28 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub074:3855654:3855654 [2] NCCL INFO cudaDriverVersion 12010
+gpub074:3855654:3855654 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0>
+gpub074:3855654:3855654 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub074:3855654:3855724 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0>
+gpub074:3855654:3855724 [2] NCCL INFO Using network IB
+gpub074:3855654:3855724 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub074:3855654:3855724 [2] NCCL INFO Trees [0] 55/-1/-1->54->53 [1] 55/-1/-1->54->53
+gpub074:3855654:3855724 [2] NCCL INFO Channel 00/0 : 54[85000] -> 55[c7000] via P2P/IPC
+gpub074:3855654:3855724 [2] NCCL INFO Channel 01/0 : 54[85000] -> 55[c7000] via P2P/IPC
+gpub074:3855654:3855724 [2] NCCL INFO Connected all rings
+gpub074:3855654:3855724 [2] NCCL INFO Channel 00/0 : 54[85000] -> 53[46000] via P2P/IPC
+gpub074:3855654:3855724 [2] NCCL INFO Channel 01/0 : 54[85000] -> 53[46000] via P2P/IPC
+gpub074:3855654:3855724 [2] NCCL INFO Connected all trees
+gpub074:3855654:3855724 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub074:3855654:3855724 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub074:3855654:3855724 [2] NCCL INFO comm 0xba937820 rank 54 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub030:2867872:2867872 [3] NCCL INFO cudaDriverVersion 12010
+gpub030:2867872:2867872 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0>
+gpub030:2867872:2867872 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub030:2867872:2867950 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0>
+gpub030:2867872:2867950 [3] NCCL INFO Using network IB
+gpub030:2867872:2867950 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub030:2867872:2867950 [3] NCCL INFO Trees [0] -1/-1/-1->31->30 [1] -1/-1/-1->31->30
+gpub030:2867872:2867950 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0
+gpub030:2867872:2867950 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0
+gpub030:2867872:2867950 [3] NCCL INFO Connected all rings
+gpub030:2867872:2867950 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 30[85000] via P2P/IPC
+gpub030:2867872:2867950 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 30[85000] via P2P/IPC
+gpub030:2867872:2867950 [3] NCCL INFO Connected all trees
+gpub030:2867872:2867950 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub030:2867872:2867950 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub030:2867872:2867950 [3] NCCL INFO comm 0x8db50450 rank 31 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub078:387633:387633 [0] NCCL INFO cudaDriverVersion 12010
+gpub078:387633:387633 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0>
+gpub078:387633:387633 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub078:387633:387710 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0>
+gpub078:387633:387710 [0] NCCL INFO Using network IB
+gpub078:387633:387710 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub078:387633:387710 [0] NCCL INFO Trees [0] 57/60/-1->56->48 [1] 57/-1/-1->56->53
+gpub078:387633:387710 [0] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0
+gpub078:387633:387710 [0] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0
+gpub078:387633:387710 [0] NCCL INFO Channel 00/0 : 56[7000] -> 57[46000] via P2P/IPC
+gpub078:387633:387710 [0] NCCL INFO Channel 01/0 : 56[7000] -> 57[46000] via P2P/IPC
+gpub078:387633:387710 [0] NCCL INFO Connected all rings
+gpub078:387633:387710 [0] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [receive] via NET/IB/0
+gpub078:387633:387710 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [send] via NET/IB/0
+gpub078:387633:387710 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [receive] via NET/IB/0
+gpub078:387633:387710 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [send] via NET/IB/0
+gpub078:387633:387710 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [receive] via NET/IB/0
+gpub078:387633:387710 [0] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [send] via NET/IB/0
+gpub078:387633:387710 [0] NCCL INFO Connected all trees
+gpub078:387633:387710 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub078:387633:387710 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub078:387633:387710 [0] NCCL INFO comm 0x8b083970 rank 56 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub030:2867870:2867870 [1] NCCL INFO cudaDriverVersion 12010
+gpub030:2867870:2867870 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0>
+gpub030:2867870:2867870 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub030:2867870:2867949 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0>
+gpub030:2867870:2867949 [1] NCCL INFO Using network IB
+gpub030:2867870:2867949 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub030:2867870:2867949 [1] NCCL INFO Trees [0] 30/-1/-1->29->28 [1] 30/44/-1->29->28
+gpub030:2867870:2867949 [1] NCCL INFO Channel 00/0 : 29[46000] -> 30[85000] via P2P/IPC
+gpub030:2867870:2867949 [1] NCCL INFO Channel 01/0 : 29[46000] -> 30[85000] via P2P/IPC
+gpub030:2867870:2867949 [1] NCCL INFO Connected all rings
+gpub030:2867870:2867949 [1] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [send] via NET/IB/0
+gpub030:2867870:2867949 [1] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [receive] via NET/IB/0
+gpub030:2867870:2867949 [1] NCCL INFO Channel 00/0 : 29[46000] -> 28[7000] via P2P/IPC
+gpub030:2867870:2867949 [1] NCCL INFO Channel 01/0 : 29[46000] -> 28[7000] via P2P/IPC
+gpub030:2867870:2867949 [1] NCCL INFO Connected all trees
+gpub030:2867870:2867949 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub030:2867870:2867949 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub030:2867870:2867949 [1] NCCL INFO comm 0x9c29c010 rank 29 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub008:2789796:2789796 [3] NCCL INFO cudaDriverVersion 12010
+gpub008:2789796:2789796 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0>
+gpub008:2789796:2789796 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub008:2789796:2789872 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.108<0>
+gpub008:2789796:2789872 [3] NCCL INFO Using network IB
+gpub008:2789796:2789872 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub008:2789796:2789872 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6
+gpub008:2789796:2789872 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0
+gpub008:2789796:2789872 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0
+gpub008:2789796:2789872 [3] NCCL INFO Connected all rings
+gpub008:2789796:2789872 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC
+gpub008:2789796:2789872 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC
+gpub008:2789796:2789872 [3] NCCL INFO Connected all trees
+gpub008:2789796:2789872 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub008:2789796:2789872 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub008:2789796:2789872 [3] NCCL INFO comm 0x50597af0 rank 7 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub053:2037082:2037082 [0] NCCL INFO cudaDriverVersion 12010
+gpub053:2037082:2037082 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0>
+gpub053:2037082:2037082 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub053:2037082:2037160 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.153<0>
+gpub053:2037082:2037160 [0] NCCL INFO Using network IB
+gpub053:2037082:2037160 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub053:2037082:2037160 [0] NCCL INFO Trees [0] 45/-1/-1->44->40 [1] 45/36/-1->44->29
+gpub053:2037082:2037160 [0] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0
+gpub053:2037082:2037160 [0] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0
+gpub053:2037082:2037160 [0] NCCL INFO Channel 00/0 : 44[7000] -> 45[46000] via P2P/IPC
+gpub053:2037082:2037160 [0] NCCL INFO Channel 01/0 : 44[7000] -> 45[46000] via P2P/IPC
+gpub053:2037082:2037160 [0] NCCL INFO Connected all rings
+gpub053:2037082:2037160 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [receive] via NET/IB/0
+gpub053:2037082:2037160 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [receive] via NET/IB/0
+gpub053:2037082:2037160 [0] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [receive] via NET/IB/0
+gpub053:2037082:2037160 [0] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [send] via NET/IB/0
+gpub053:2037082:2037160 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [send] via NET/IB/0
+gpub053:2037082:2037160 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [send] via NET/IB/0
+gpub053:2037082:2037160 [0] NCCL INFO Connected all trees
+gpub053:2037082:2037160 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub053:2037082:2037160 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub053:2037082:2037160 [0] NCCL INFO comm 0x50aa6090 rank 44 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub027:3834399:3834399 [3] NCCL INFO cudaDriverVersion 12010
+gpub027:3834399:3834399 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0>
+gpub027:3834399:3834399 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub027:3834399:3834474 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0>
+gpub027:3834399:3834474 [3] NCCL INFO Using network IB
+gpub027:3834399:3834474 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub027:3834399:3834474 [3] NCCL INFO Trees [0] -1/-1/-1->23->22 [1] -1/-1/-1->23->22
+gpub027:3834399:3834474 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0
+gpub027:3834399:3834474 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0
+gpub027:3834399:3834474 [3] NCCL INFO Connected all rings
+gpub027:3834399:3834474 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 22[85000] via P2P/IPC
+gpub027:3834399:3834474 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 22[85000] via P2P/IPC
+gpub027:3834399:3834474 [3] NCCL INFO Connected all trees
+gpub027:3834399:3834474 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub027:3834399:3834474 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub027:3834399:3834474 [3] NCCL INFO comm 0x8f1f3890 rank 23 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub053:2037083:2037083 [1] NCCL INFO cudaDriverVersion 12010
+gpub053:2037083:2037083 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0>
+gpub053:2037083:2037083 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub053:2037083:2037161 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.153<0>
+gpub053:2037083:2037161 [1] NCCL INFO Using network IB
+gpub053:2037083:2037161 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub053:2037083:2037161 [1] NCCL INFO Trees [0] 46/-1/-1->45->44 [1] 46/52/-1->45->44
+gpub053:2037083:2037161 [1] NCCL INFO Channel 00/0 : 45[46000] -> 46[85000] via P2P/IPC
+gpub053:2037083:2037161 [1] NCCL INFO Channel 01/0 : 45[46000] -> 46[85000] via P2P/IPC
+gpub053:2037083:2037161 [1] NCCL INFO Connected all rings
+gpub053:2037083:2037161 [1] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [send] via NET/IB/0
+gpub053:2037083:2037161 [1] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [receive] via NET/IB/0
+gpub053:2037083:2037161 [1] NCCL INFO Channel 00/0 : 45[46000] -> 44[7000] via P2P/IPC
+gpub053:2037083:2037161 [1] NCCL INFO Channel 01/0 : 45[46000] -> 44[7000] via P2P/IPC
+gpub053:2037083:2037161 [1] NCCL INFO Connected all trees
+gpub053:2037083:2037161 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub053:2037083:2037161 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub053:2037083:2037161 [1] NCCL INFO comm 0x4f89c530 rank 45 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub008:2789795:2789795 [2] NCCL INFO cudaDriverVersion 12010
+gpub008:2789795:2789795 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0>
+gpub008:2789795:2789795 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub008:2789795:2789874 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.108<0>
+gpub008:2789795:2789874 [2] NCCL INFO Using network IB
+gpub008:2789795:2789874 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub008:2789795:2789874 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5
+gpub008:2789795:2789874 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC
+gpub008:2789795:2789874 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC
+gpub008:2789795:2789874 [2] NCCL INFO Connected all rings
+gpub008:2789795:2789874 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC
+gpub008:2789795:2789874 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC
+gpub008:2789795:2789874 [2] NCCL INFO Connected all trees
+gpub008:2789795:2789874 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub008:2789795:2789874 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub008:2789795:2789874 [2] NCCL INFO comm 0xb7cc7790 rank 6 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub078:387636:387636 [3] NCCL INFO cudaDriverVersion 12010
+gpub078:387636:387636 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0>
+gpub078:387636:387636 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub078:387636:387711 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0>
+gpub078:387636:387711 [3] NCCL INFO Using network IB
+gpub078:387636:387711 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub078:387636:387711 [3] NCCL INFO Trees [0] -1/-1/-1->59->58 [1] -1/-1/-1->59->58
+gpub078:387636:387711 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0
+gpub078:387636:387711 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0
+gpub078:387636:387711 [3] NCCL INFO Connected all rings
+gpub078:387636:387711 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 58[85000] via P2P/IPC
+gpub078:387636:387711 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 58[85000] via P2P/IPC
+gpub078:387636:387711 [3] NCCL INFO Connected all trees
+gpub078:387636:387711 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub078:387636:387711 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub078:387636:387711 [3] NCCL INFO comm 0x50bf4280 rank 59 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub073:748599:748599 [2] NCCL INFO cudaDriverVersion 12010
+gpub073:748599:748599 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.173<0>
+gpub073:748599:748599 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub073:748599:748672 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.173<0>
+gpub073:748599:748672 [2] NCCL INFO Using network IB
+gpub073:748599:748672 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub073:748599:748672 [2] NCCL INFO Trees [0] 51/-1/-1->50->49 [1] 51/-1/-1->50->49
+gpub073:748599:748672 [2] NCCL INFO Channel 00/0 : 50[85000] -> 51[c7000] via P2P/IPC
+gpub073:748599:748672 [2] NCCL INFO Channel 01/0 : 50[85000] -> 51[c7000] via P2P/IPC
+gpub073:748599:748672 [2] NCCL INFO Connected all rings
+gpub073:748599:748672 [2] NCCL INFO Channel 00/0 : 50[85000] -> 49[46000] via P2P/IPC
+gpub073:748599:748672 [2] NCCL INFO Channel 01/0 : 50[85000] -> 49[46000] via P2P/IPC
+gpub073:748599:748672 [2] NCCL INFO Connected all trees
+gpub073:748599:748672 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub073:748599:748672 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub073:748599:748672 [2] NCCL INFO comm 0xa2d1650 rank 50 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub053:2037084:2037084 [2] NCCL INFO cudaDriverVersion 12010
+gpub053:2037084:2037084 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0>
+gpub053:2037084:2037084 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub053:2037084:2037163 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.153<0>
+gpub053:2037084:2037163 [2] NCCL INFO Using network IB
+gpub053:2037084:2037163 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub053:2037084:2037163 [2] NCCL INFO Trees [0] 47/-1/-1->46->45 [1] 47/-1/-1->46->45
+gpub053:2037084:2037163 [2] NCCL INFO Channel 00/0 : 46[85000] -> 47[c7000] via P2P/IPC
+gpub053:2037084:2037163 [2] NCCL INFO Channel 01/0 : 46[85000] -> 47[c7000] via P2P/IPC
+gpub053:2037084:2037163 [2] NCCL INFO Connected all rings
+gpub053:2037084:2037163 [2] NCCL INFO Channel 00/0 : 46[85000] -> 45[46000] via P2P/IPC
+gpub053:2037084:2037163 [2] NCCL INFO Channel 01/0 : 46[85000] -> 45[46000] via P2P/IPC
+gpub053:2037084:2037163 [2] NCCL INFO Connected all trees
+gpub053:2037084:2037163 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub053:2037084:2037163 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub053:2037084:2037163 [2] NCCL INFO comm 0x8c08e1a0 rank 46 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub078:387635:387635 [2] NCCL INFO cudaDriverVersion 12010
+gpub078:387635:387635 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0>
+gpub078:387635:387635 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub078:387635:387713 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0>
+gpub078:387635:387713 [2] NCCL INFO Using network IB
+gpub078:387635:387713 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub078:387635:387713 [2] NCCL INFO Trees [0] 59/-1/-1->58->57 [1] 59/-1/-1->58->57
+gpub078:387635:387713 [2] NCCL INFO Channel 00/0 : 58[85000] -> 59[c7000] via P2P/IPC
+gpub078:387635:387713 [2] NCCL INFO Channel 01/0 : 58[85000] -> 59[c7000] via P2P/IPC
+gpub078:387635:387713 [2] NCCL INFO Connected all rings
+gpub078:387635:387713 [2] NCCL INFO Channel 00/0 : 58[85000] -> 57[46000] via P2P/IPC
+gpub078:387635:387713 [2] NCCL INFO Channel 01/0 : 58[85000] -> 57[46000] via P2P/IPC
+gpub078:387635:387713 [2] NCCL INFO Connected all trees
+gpub078:387635:387713 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub078:387635:387713 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub078:387635:387713 [2] NCCL INFO comm 0x9a633940 rank 58 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub028:3104070:3104070 [3] NCCL INFO cudaDriverVersion 12010
+gpub028:3104070:3104070 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.128<0>
+gpub028:3104070:3104070 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub028:3104070:3104149 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.128<0>
+gpub028:3104070:3104149 [3] NCCL INFO Using network IB
+gpub028:3104070:3104149 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub028:3104070:3104149 [3] NCCL INFO Trees [0] -1/-1/-1->27->26 [1] -1/-1/-1->27->26
+gpub028:3104070:3104149 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0
+gpub028:3104070:3104149 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0
+gpub028:3104070:3104149 [3] NCCL INFO Connected all rings
+gpub028:3104070:3104149 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 26[85000] via P2P/IPC
+gpub028:3104070:3104149 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 26[85000] via P2P/IPC
+gpub028:3104070:3104149 [3] NCCL INFO Connected all trees
+gpub028:3104070:3104149 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub028:3104070:3104149 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub028:3104070:3104149 [3] NCCL INFO comm 0xb81c6b50 rank 27 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub053:2037085:2037085 [3] NCCL INFO cudaDriverVersion 12010
+gpub053:2037085:2037085 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0>
+gpub053:2037085:2037085 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub053:2037085:2037162 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.153<0>
+gpub053:2037085:2037162 [3] NCCL INFO Using network IB
+gpub053:2037085:2037162 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub053:2037085:2037162 [3] NCCL INFO Trees [0] -1/-1/-1->47->46 [1] -1/-1/-1->47->46
+gpub053:2037085:2037162 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0
+gpub053:2037085:2037162 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0
+gpub053:2037085:2037162 [3] NCCL INFO Connected all rings
+gpub053:2037085:2037162 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 46[85000] via P2P/IPC
+gpub053:2037085:2037162 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 46[85000] via P2P/IPC
+gpub053:2037085:2037162 [3] NCCL INFO Connected all trees
+gpub053:2037085:2037162 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub053:2037085:2037162 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub053:2037085:2037162 [3] NCCL INFO comm 0x5026aaa0 rank 47 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub050:2539553:2539553 [1] NCCL INFO cudaDriverVersion 12010
+gpub050:2539553:2539553 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0>
+gpub050:2539553:2539553 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub050:2539553:2539629 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0>
+gpub050:2539553:2539629 [1] NCCL INFO Using network IB
+gpub050:2539553:2539629 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub050:2539553:2539629 [1] NCCL INFO Trees [0] 34/16/-1->33->32 [1] 34/-1/-1->33->32
+gpub050:2539553:2539629 [1] NCCL INFO Channel 00/0 : 33[46000] -> 34[85000] via P2P/IPC
+gpub050:2539553:2539629 [1] NCCL INFO Channel 01/0 : 33[46000] -> 34[85000] via P2P/IPC
+gpub050:2539553:2539629 [1] NCCL INFO Connected all rings
+gpub050:2539553:2539629 [1] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [receive] via NET/IB/0
+gpub050:2539553:2539629 [1] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [send] via NET/IB/0
+gpub050:2539553:2539629 [1] NCCL INFO Channel 00/0 : 33[46000] -> 32[7000] via P2P/IPC
+gpub050:2539553:2539629 [1] NCCL INFO Channel 01/0 : 33[46000] -> 32[7000] via P2P/IPC
+gpub050:2539553:2539629 [1] NCCL INFO Connected all trees
+gpub050:2539553:2539629 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub050:2539553:2539629 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub050:2539553:2539629 [1] NCCL INFO comm 0xa4859b0 rank 33 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub027:3834397:3834397 [1] NCCL INFO cudaDriverVersion 12010
+gpub027:3834397:3834397 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0>
+gpub027:3834397:3834397 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub027:3834397:3834475 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0>
+gpub027:3834397:3834475 [1] NCCL INFO Using network IB
+gpub027:3834397:3834475 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub027:3834397:3834475 [1] NCCL INFO Trees [0] 22/-1/-1->21->20 [1] 22/24/-1->21->20
+gpub027:3834397:3834475 [1] NCCL INFO Channel 00/0 : 21[46000] -> 22[85000] via P2P/IPC
+gpub027:3834397:3834475 [1] NCCL INFO Channel 01/0 : 21[46000] -> 22[85000] via P2P/IPC
+gpub027:3834397:3834475 [1] NCCL INFO Connected all rings
+gpub027:3834397:3834475 [1] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [send] via NET/IB/0
+gpub027:3834397:3834475 [1] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [receive] via NET/IB/0
+gpub027:3834397:3834475 [1] NCCL INFO Channel 00/0 : 21[46000] -> 20[7000] via P2P/IPC
+gpub027:3834397:3834475 [1] NCCL INFO Channel 01/0 : 21[46000] -> 20[7000] via P2P/IPC
+gpub027:3834397:3834475 [1] NCCL INFO Connected all trees
+gpub027:3834397:3834475 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub027:3834397:3834475 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub027:3834397:3834475 [1] NCCL INFO comm 0x8ed34290 rank 21 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub028:3104068:3104068 [1] NCCL INFO cudaDriverVersion 12010
+gpub028:3104068:3104068 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.128<0>
+gpub028:3104068:3104068 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub028:3104068:3104151 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.128<0>
+gpub028:3104068:3104151 [1] NCCL INFO Using network IB
+gpub028:3104068:3104151 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub028:3104068:3104151 [1] NCCL INFO Trees [0] 26/20/-1->25->24 [1] 26/-1/-1->25->24
+gpub028:3104068:3104151 [1] NCCL INFO Channel 00/0 : 25[46000] -> 26[85000] via P2P/IPC
+gpub028:3104068:3104151 [1] NCCL INFO Channel 01/0 : 25[46000] -> 26[85000] via P2P/IPC
+gpub028:3104068:3104151 [1] NCCL INFO Connected all rings
+gpub028:3104068:3104151 [1] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [receive] via NET/IB/0
+gpub028:3104068:3104151 [1] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [send] via NET/IB/0
+gpub028:3104068:3104151 [1] NCCL INFO Channel 00/0 : 25[46000] -> 24[7000] via P2P/IPC
+gpub028:3104068:3104151 [1] NCCL INFO Channel 01/0 : 25[46000] -> 24[7000] via P2P/IPC
+gpub028:3104068:3104151 [1] NCCL INFO Connected all trees
+gpub028:3104068:3104151 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub028:3104068:3104151 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub028:3104068:3104151 [1] NCCL INFO comm 0xb8c85b80 rank 25 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub052:2277064:2277064 [2] NCCL INFO cudaDriverVersion 12010
+gpub052:2277064:2277064 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0>
+gpub052:2277064:2277064 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub052:2277064:2277141 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0>
+gpub052:2277064:2277141 [2] NCCL INFO Using network IB
+gpub052:2277064:2277141 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub052:2277064:2277141 [2] NCCL INFO Trees [0] 43/-1/-1->42->41 [1] 43/-1/-1->42->41
+gpub052:2277064:2277141 [2] NCCL INFO Channel 00/0 : 42[85000] -> 43[c7000] via P2P/IPC
+gpub052:2277064:2277141 [2] NCCL INFO Channel 01/0 : 42[85000] -> 43[c7000] via P2P/IPC
+gpub052:2277064:2277141 [2] NCCL INFO Connected all rings
+gpub052:2277064:2277141 [2] NCCL INFO Channel 00/0 : 42[85000] -> 41[46000] via P2P/IPC
+gpub052:2277064:2277141 [2] NCCL INFO Channel 01/0 : 42[85000] -> 41[46000] via P2P/IPC
+gpub052:2277064:2277141 [2] NCCL INFO Connected all trees
+gpub052:2277064:2277141 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub052:2277064:2277141 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub052:2277064:2277141 [2] NCCL INFO comm 0xa4d0c250 rank 42 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub078:387634:387634 [1] NCCL INFO cudaDriverVersion 12010
+gpub078:387634:387634 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0>
+gpub078:387634:387634 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub078:387634:387712 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0>
+gpub078:387634:387712 [1] NCCL INFO Using network IB
+gpub078:387634:387712 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub078:387634:387712 [1] NCCL INFO Trees [0] 58/52/-1->57->56 [1] 58/-1/-1->57->56
+gpub078:387634:387712 [1] NCCL INFO Channel 00/0 : 57[46000] -> 58[85000] via P2P/IPC
+gpub078:387634:387712 [1] NCCL INFO Channel 01/0 : 57[46000] -> 58[85000] via P2P/IPC
+gpub078:387634:387712 [1] NCCL INFO Connected all rings
+gpub078:387634:387712 [1] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [receive] via NET/IB/0
+gpub078:387634:387712 [1] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [send] via NET/IB/0
+gpub078:387634:387712 [1] NCCL INFO Channel 00/0 : 57[46000] -> 56[7000] via P2P/IPC
+gpub078:387634:387712 [1] NCCL INFO Channel 01/0 : 57[46000] -> 56[7000] via P2P/IPC
+gpub078:387634:387712 [1] NCCL INFO Connected all trees
+gpub078:387634:387712 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub078:387634:387712 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub078:387634:387712 [1] NCCL INFO comm 0xb893bfd0 rank 57 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub051:3225329:3225329 [1] NCCL INFO cudaDriverVersion 12010
+gpub051:3225329:3225329 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0>
+gpub051:3225329:3225329 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub051:3225329:3225407 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0>
+gpub051:3225329:3225407 [1] NCCL INFO Using network IB
+gpub051:3225329:3225407 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub051:3225329:3225407 [1] NCCL INFO Trees [0] 38/-1/-1->37->36 [1] 38/40/-1->37->36
+gpub051:3225329:3225407 [1] NCCL INFO Channel 00/0 : 37[46000] -> 38[85000] via P2P/IPC
+gpub051:3225329:3225407 [1] NCCL INFO Channel 01/0 : 37[46000] -> 38[85000] via P2P/IPC
+gpub051:3225329:3225407 [1] NCCL INFO Connected all rings
+gpub051:3225329:3225407 [1] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [send] via NET/IB/0
+gpub051:3225329:3225407 [1] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [receive] via NET/IB/0
+gpub084:95630:95630 [1] NCCL INFO cudaDriverVersion 12010
+gpub084:95630:95630 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.184<0>
+gpub084:95630:95630 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub084:95630:95713 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.184<0>
+gpub084:95630:95713 [1] NCCL INFO Using network IB
+gpub084:95630:95713 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub084:95630:95713 [1] NCCL INFO Trees [0] 62/-1/-1->61->60 [1] 62/-1/-1->61->60
+gpub084:95630:95713 [1] NCCL INFO Channel 00/0 : 61[46000] -> 62[85000] via P2P/IPC
+gpub084:95630:95713 [1] NCCL INFO Channel 01/0 : 61[46000] -> 62[85000] via P2P/IPC
+gpub084:95630:95713 [1] NCCL INFO Connected all rings
+gpub084:95630:95713 [1] NCCL INFO Channel 00/0 : 61[46000] -> 60[7000] via P2P/IPC
+gpub084:95630:95713 [1] NCCL INFO Channel 01/0 : 61[46000] -> 60[7000] via P2P/IPC
+gpub084:95630:95713 [1] NCCL INFO Connected all trees
+gpub051:3225329:3225407 [1] NCCL INFO Channel 00/0 : 37[46000] -> 36[7000] via P2P/IPC
+gpub051:3225329:3225407 [1] NCCL INFO Channel 01/0 : 37[46000] -> 36[7000] via P2P/IPC
+gpub051:3225329:3225407 [1] NCCL INFO Connected all trees
+gpub051:3225329:3225407 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub051:3225329:3225407 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub051:3225329:3225407 [1] NCCL INFO comm 0xa2b18990 rank 37 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub084:95630:95713 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub084:95630:95713 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub084:95630:95713 [1] NCCL INFO comm 0x505266b0 rank 61 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub030:2867871:2867871 [2] NCCL INFO cudaDriverVersion 12010
+gpub030:2867871:2867871 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0>
+gpub030:2867871:2867871 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub030:2867871:2867947 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0>
+gpub030:2867871:2867947 [2] NCCL INFO Using network IB
+gpub030:2867871:2867947 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub030:2867871:2867947 [2] NCCL INFO Trees [0] 31/-1/-1->30->29 [1] 31/-1/-1->30->29
+gpub030:2867871:2867947 [2] NCCL INFO Channel 00/0 : 30[85000] -> 31[c7000] via P2P/IPC
+gpub030:2867871:2867947 [2] NCCL INFO Channel 01/0 : 30[85000] -> 31[c7000] via P2P/IPC
+gpub030:2867871:2867947 [2] NCCL INFO Connected all rings
+gpub030:2867871:2867947 [2] NCCL INFO Channel 00/0 : 30[85000] -> 29[46000] via P2P/IPC
+gpub030:2867871:2867947 [2] NCCL INFO Channel 01/0 : 30[85000] -> 29[46000] via P2P/IPC
+gpub002:2108202:2108202 [3] NCCL INFO cudaDriverVersion 12010
+gpub002:2108202:2108202 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0>
+gpub002:2108202:2108202 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub002:2108202:2108274 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0>
+gpub002:2108202:2108274 [3] NCCL INFO Using network IB
+gpub002:2108202:2108274 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub002:2108202:2108274 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2
+gpub002:2108202:2108274 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0
+gpub002:2108202:2108274 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0
+gpub002:2108202:2108274 [3] NCCL INFO Connected all rings
+gpub002:2108202:2108274 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC
+gpub002:2108202:2108274 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC
+gpub030:2867871:2867947 [2] NCCL INFO Connected all trees
+gpub030:2867871:2867947 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub030:2867871:2867947 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub030:2867871:2867947 [2] NCCL INFO comm 0x516c8220 rank 30 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub002:2108202:2108274 [3] NCCL INFO Connected all trees
+gpub002:2108202:2108274 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub002:2108202:2108274 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub002:2108202:2108274 [3] NCCL INFO comm 0xba66c350 rank 3 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub028:3104069:3104069 [2] NCCL INFO cudaDriverVersion 12010
+gpub028:3104069:3104069 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.128<0>
+gpub028:3104069:3104069 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub028:3104069:3104150 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.128<0>
+gpub028:3104069:3104150 [2] NCCL INFO Using network IB
+gpub028:3104069:3104150 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub028:3104069:3104150 [2] NCCL INFO Trees [0] 27/-1/-1->26->25 [1] 27/-1/-1->26->25
+gpub028:3104069:3104150 [2] NCCL INFO Channel 00/0 : 26[85000] -> 27[c7000] via P2P/IPC
+gpub028:3104069:3104150 [2] NCCL INFO Channel 01/0 : 26[85000] -> 27[c7000] via P2P/IPC
+gpub028:3104069:3104150 [2] NCCL INFO Connected all rings
+gpub028:3104069:3104150 [2] NCCL INFO Channel 00/0 : 26[85000] -> 25[46000] via P2P/IPC
+gpub028:3104069:3104150 [2] NCCL INFO Channel 01/0 : 26[85000] -> 25[46000] via P2P/IPC
+gpub028:3104069:3104150 [2] NCCL INFO Connected all trees
+gpub028:3104069:3104150 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub028:3104069:3104150 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub028:3104069:3104150 [2] NCCL INFO comm 0x50c3cd20 rank 26 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub084:95629:95629 [0] NCCL INFO cudaDriverVersion 12010
+gpub084:95629:95629 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.184<0>
+gpub084:95629:95629 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub084:95629:95715 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.184<0>
+gpub084:95629:95715 [0] NCCL INFO Using network IB
+gpub084:95629:95715 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub084:95629:95715 [0] NCCL INFO Trees [0] 61/-1/-1->60->56 [1] 61/28/-1->60->-1
+gpub084:95629:95715 [0] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0
+gpub084:95629:95715 [0] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0
+gpub084:95629:95715 [0] NCCL INFO Channel 00/0 : 60[7000] -> 61[46000] via P2P/IPC
+gpub084:95629:95715 [0] NCCL INFO Channel 01/0 : 60[7000] -> 61[46000] via P2P/IPC
+gpub084:95629:95715 [0] NCCL INFO Connected all rings
+gpub084:95629:95715 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [receive] via NET/IB/0
+gpub084:95629:95715 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [receive] via NET/IB/0
+gpub084:95629:95715 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [send] via NET/IB/0
+gpub084:95629:95715 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [send] via NET/IB/0
+gpub084:95629:95715 [0] NCCL INFO Connected all trees
+gpub084:95629:95715 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub084:95629:95715 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub084:95629:95715 [0] NCCL INFO comm 0x4f579950 rank 60 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub050:2539555:2539555 [3] NCCL INFO cudaDriverVersion 12010
+gpub050:2539555:2539555 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0>
+gpub050:2539555:2539555 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub050:2539555:2539630 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0>
+gpub050:2539555:2539630 [3] NCCL INFO Using network IB
+gpub050:2539555:2539630 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub050:2539555:2539630 [3] NCCL INFO Trees [0] -1/-1/-1->35->34 [1] -1/-1/-1->35->34
+gpub050:2539555:2539630 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0
+gpub050:2539555:2539630 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0
+gpub050:2539555:2539630 [3] NCCL INFO Connected all rings
+gpub050:2539555:2539630 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 34[85000] via P2P/IPC
+gpub050:2539555:2539630 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 34[85000] via P2P/IPC
+gpub050:2539555:2539630 [3] NCCL INFO Connected all trees
+gpub050:2539555:2539630 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub050:2539555:2539630 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub050:2539555:2539630 [3] NCCL INFO comm 0xb939ca50 rank 35 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub052:2277062:2277062 [0] NCCL INFO cudaDriverVersion 12010
+gpub052:2277062:2277062 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0>
+gpub052:2277062:2277062 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub052:2277062:2277138 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0>
+gpub052:2277062:2277138 [0] NCCL INFO Using network IB
+gpub052:2277062:2277138 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub052:2277062:2277138 [0] NCCL INFO Trees [0] 41/44/-1->40->49 [1] 41/-1/-1->40->37
+gpub052:2277062:2277138 [0] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0
+gpub052:2277062:2277138 [0] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0
+gpub052:2277062:2277138 [0] NCCL INFO Channel 00/0 : 40[7000] -> 41[46000] via P2P/IPC
+gpub052:2277062:2277138 [0] NCCL INFO Channel 01/0 : 40[7000] -> 41[46000] via P2P/IPC
+gpub052:2277062:2277138 [0] NCCL INFO Connected all rings
+gpub052:2277062:2277138 [0] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [receive] via NET/IB/0
+gpub052:2277062:2277138 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [send] via NET/IB/0
+gpub052:2277062:2277138 [0] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [send] via NET/IB/0
+gpub052:2277062:2277138 [0] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [receive] via NET/IB/0
+gpub052:2277062:2277138 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [receive] via NET/IB/0
+gpub052:2277062:2277138 [0] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [send] via NET/IB/0
+gpub052:2277062:2277138 [0] NCCL INFO Connected all trees
+gpub052:2277062:2277138 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub052:2277062:2277138 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub052:2277062:2277138 [0] NCCL INFO comm 0x8b3e450 rank 40 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub010:1746408:1746408 [1] NCCL INFO cudaDriverVersion 12010
+gpub010:1746408:1746408 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.110<0>
+gpub010:1746408:1746408 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub010:1746408:1746484 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.110<0>
+gpub010:1746408:1746484 [1] NCCL INFO Using network IB
+gpub010:1746408:1746484 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub010:1746408:1746484 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8
+gpub010:1746408:1746484 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC
+gpub010:1746408:1746484 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC
+gpub010:1746408:1746484 [1] NCCL INFO Connected all rings
+gpub010:1746408:1746484 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/IB/0
+gpub010:1746408:1746484 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/IB/0
+gpub010:1746408:1746484 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC
+gpub010:1746408:1746484 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC
+gpub010:1746408:1746484 [1] NCCL INFO Connected all trees
+gpub010:1746408:1746484 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub010:1746408:1746484 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub010:1746408:1746484 [1] NCCL INFO comm 0xab889a50 rank 9 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub010:1746409:1746409 [2] NCCL INFO cudaDriverVersion 12010
+gpub010:1746409:1746409 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.110<0>
+gpub010:1746409:1746409 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub010:1746409:1746487 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.110<0>
+gpub010:1746409:1746487 [2] NCCL INFO Using network IB
+gpub010:1746409:1746487 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub010:1746409:1746487 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9
+gpub010:1746409:1746487 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC
+gpub010:1746409:1746487 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC
+gpub010:1746409:1746487 [2] NCCL INFO Connected all rings
+gpub010:1746409:1746487 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC
+gpub010:1746409:1746487 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC
+gpub010:1746409:1746487 [2] NCCL INFO Connected all trees
+gpub010:1746409:1746487 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub010:1746409:1746487 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub010:1746409:1746487 [2] NCCL INFO comm 0x8d5443e0 rank 10 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub002:2108200:2108200 [1] NCCL INFO cudaDriverVersion 12010
+gpub002:2108200:2108200 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0>
+gpub002:2108200:2108200 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub002:2108200:2108275 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0>
+gpub002:2108200:2108275 [1] NCCL INFO Using network IB
+gpub002:2108200:2108275 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub002:2108200:2108275 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0
+gpub002:2108200:2108275 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC
+gpub002:2108200:2108275 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC
+gpub002:2108200:2108275 [1] NCCL INFO Connected all rings
+gpub002:2108200:2108275 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC
+gpub002:2108200:2108275 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC
+gpub002:2108200:2108275 [1] NCCL INFO Connected all trees
+gpub002:2108200:2108275 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub002:2108200:2108275 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub002:2108200:2108275 [1] NCCL INFO comm 0x8e8ce8d0 rank 1 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub002:2108199:2108273 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0>
+gpub002:2108199:2108273 [0] NCCL INFO Using network IB
+gpub002:2108199:2108273 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub002:2108199:2108273 [0] NCCL INFO Channel 00/02 :    0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19
+gpub002:2108199:2108273 [0] NCCL INFO Channel 01/02 :    0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19
+gpub002:2108199:2108273 [0] NCCL INFO Trees [0] 1/32/-1->0->-1 [1] 1/-1/-1->0->4
+gpub002:2108199:2108273 [0] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0
+gpub002:2108199:2108273 [0] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0
+gpub002:2108199:2108273 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC
+gpub002:2108199:2108273 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC
+gpub002:2108199:2108273 [0] NCCL INFO Connected all rings
+gpub002:2108199:2108273 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/IB/0
+gpub002:2108199:2108273 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [receive] via NET/IB/0
+gpub002:2108199:2108273 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [send] via NET/IB/0
+gpub002:2108199:2108273 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/IB/0
+gpub002:2108199:2108273 [0] NCCL INFO Connected all trees
+gpub002:2108199:2108273 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub002:2108199:2108273 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub002:2108199:2108273 [0] NCCL INFO comm 0x8d0b120 rank 0 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub073:748600:748600 [3] NCCL INFO cudaDriverVersion 12010
+gpub073:748600:748600 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.173<0>
+gpub073:748600:748600 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub073:748600:748671 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.173<0>
+gpub073:748600:748671 [3] NCCL INFO Using network IB
+gpub073:748600:748671 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub073:748600:748671 [3] NCCL INFO Trees [0] -1/-1/-1->51->50 [1] -1/-1/-1->51->50
+gpub073:748600:748671 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0
+gpub073:748600:748671 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0
+gpub073:748600:748671 [3] NCCL INFO Connected all rings
+gpub073:748600:748671 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 50[85000] via P2P/IPC
+gpub073:748600:748671 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 50[85000] via P2P/IPC
+gpub051:3225328:3225328 [0] NCCL INFO cudaDriverVersion 12010
+gpub051:3225328:3225328 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0>
+gpub051:3225328:3225328 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub051:3225328:3225405 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0>
+gpub051:3225328:3225405 [0] NCCL INFO Using network IB
+gpub051:3225328:3225405 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub051:3225328:3225405 [0] NCCL INFO Trees [0] 37/-1/-1->36->41 [1] 37/32/-1->36->44
+gpub051:3225328:3225405 [0] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0
+gpub051:3225328:3225405 [0] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0
+gpub051:3225328:3225405 [0] NCCL INFO Channel 00/0 : 36[7000] -> 37[46000] via P2P/IPC
+gpub051:3225328:3225405 [0] NCCL INFO Channel 01/0 : 36[7000] -> 37[46000] via P2P/IPC
+gpub051:3225328:3225405 [0] NCCL INFO Connected all rings
+gpub073:748600:748671 [3] NCCL INFO Connected all trees
+gpub073:748600:748671 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub073:748600:748671 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub073:748600:748671 [3] NCCL INFO comm 0x4f8ebf60 rank 51 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub051:3225328:3225405 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [receive] via NET/IB/0
+gpub051:3225328:3225405 [0] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [send] via NET/IB/0
+gpub051:3225328:3225405 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [send] via NET/IB/0
+gpub051:3225328:3225405 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [receive] via NET/IB/0
+gpub051:3225328:3225405 [0] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [receive] via NET/IB/0
+gpub051:3225328:3225405 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [send] via NET/IB/0
+gpub051:3225328:3225405 [0] NCCL INFO Connected all trees
+gpub051:3225328:3225405 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub051:3225328:3225405 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub051:3225328:3225405 [0] NCCL INFO comm 0x4f680190 rank 36 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub050:2539554:2539554 [2] NCCL INFO cudaDriverVersion 12010
+gpub050:2539554:2539554 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0>
+gpub050:2539554:2539554 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub050:2539554:2539627 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0>
+gpub050:2539554:2539627 [2] NCCL INFO Using network IB
+gpub050:2539554:2539627 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub050:2539554:2539627 [2] NCCL INFO Trees [0] 35/-1/-1->34->33 [1] 35/-1/-1->34->33
+gpub050:2539554:2539627 [2] NCCL INFO Channel 00/0 : 34[85000] -> 35[c7000] via P2P/IPC
+gpub050:2539554:2539627 [2] NCCL INFO Channel 01/0 : 34[85000] -> 35[c7000] via P2P/IPC
+gpub050:2539554:2539627 [2] NCCL INFO Connected all rings
+gpub050:2539554:2539627 [2] NCCL INFO Channel 00/0 : 34[85000] -> 33[46000] via P2P/IPC
+gpub050:2539554:2539627 [2] NCCL INFO Channel 01/0 : 34[85000] -> 33[46000] via P2P/IPC
+gpub050:2539554:2539627 [2] NCCL INFO Connected all trees
+gpub050:2539554:2539627 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub050:2539554:2539627 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub050:2539554:2539627 [2] NCCL INFO comm 0xa469b710 rank 34 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub027:3834398:3834398 [2] NCCL INFO cudaDriverVersion 12010
+gpub027:3834398:3834398 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0>
+gpub027:3834398:3834398 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub027:3834398:3834473 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0>
+gpub027:3834398:3834473 [2] NCCL INFO Using network IB
+gpub027:3834398:3834473 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub027:3834398:3834473 [2] NCCL INFO Trees [0] 23/-1/-1->22->21 [1] 23/-1/-1->22->21
+gpub027:3834398:3834473 [2] NCCL INFO Channel 00/0 : 22[85000] -> 23[c7000] via P2P/IPC
+gpub027:3834398:3834473 [2] NCCL INFO Channel 01/0 : 22[85000] -> 23[c7000] via P2P/IPC
+gpub027:3834398:3834473 [2] NCCL INFO Connected all rings
+gpub027:3834398:3834473 [2] NCCL INFO Channel 00/0 : 22[85000] -> 21[46000] via P2P/IPC
+gpub027:3834398:3834473 [2] NCCL INFO Channel 01/0 : 22[85000] -> 21[46000] via P2P/IPC
+gpub027:3834398:3834473 [2] NCCL INFO Connected all trees
+gpub027:3834398:3834473 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub027:3834398:3834473 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub027:3834398:3834473 [2] NCCL INFO comm 0x505e2640 rank 22 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub073:748598:748598 [1] NCCL INFO cudaDriverVersion 12010
+gpub073:748598:748598 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.173<0>
+gpub073:748598:748598 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub073:748598:748673 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.173<0>
+gpub073:748598:748673 [1] NCCL INFO Using network IB
+gpub073:748598:748673 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub073:748598:748673 [1] NCCL INFO Trees [0] 50/40/-1->49->48 [1] 50/-1/-1->49->48
+gpub073:748598:748673 [1] NCCL INFO Channel 00/0 : 49[46000] -> 50[85000] via P2P/IPC
+gpub073:748598:748673 [1] NCCL INFO Channel 01/0 : 49[46000] -> 50[85000] via P2P/IPC
+gpub073:748598:748673 [1] NCCL INFO Connected all rings
+gpub073:748598:748673 [1] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [receive] via NET/IB/0
+gpub073:748598:748673 [1] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [send] via NET/IB/0
+gpub073:748598:748673 [1] NCCL INFO Channel 00/0 : 49[46000] -> 48[7000] via P2P/IPC
+gpub073:748598:748673 [1] NCCL INFO Channel 01/0 : 49[46000] -> 48[7000] via P2P/IPC
+gpub073:748598:748673 [1] NCCL INFO Connected all trees
+gpub073:748598:748673 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub073:748598:748673 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub073:748598:748673 [1] NCCL INFO comm 0xb7883d00 rank 49 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub073:748597:748597 [0] NCCL INFO cudaDriverVersion 12010
+gpub073:748597:748597 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.173<0>
+gpub073:748597:748597 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub073:748597:748674 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.173<0>
+gpub073:748597:748674 [0] NCCL INFO Using network IB
+gpub073:748597:748674 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub073:748597:748674 [0] NCCL INFO Trees [0] 49/56/-1->48->32 [1] 49/-1/-1->48->52
+gpub073:748597:748674 [0] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0
+gpub073:748597:748674 [0] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0
+gpub073:748597:748674 [0] NCCL INFO Channel 00/0 : 48[7000] -> 49[46000] via P2P/IPC
+gpub073:748597:748674 [0] NCCL INFO Channel 01/0 : 48[7000] -> 49[46000] via P2P/IPC
+gpub073:748597:748674 [0] NCCL INFO Connected all rings
+gpub073:748597:748674 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [send] via NET/IB/0
+gpub073:748597:748674 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [send] via NET/IB/0
+gpub073:748597:748674 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [receive] via NET/IB/0
+gpub073:748597:748674 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [send] via NET/IB/0
+gpub073:748597:748674 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [receive] via NET/IB/0
+gpub073:748597:748674 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [receive] via NET/IB/0
+gpub073:748597:748674 [0] NCCL INFO Connected all trees
+gpub073:748597:748674 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub073:748597:748674 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub073:748597:748674 [0] NCCL INFO comm 0xa03dfc0 rank 48 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub050:2539552:2539552 [0] NCCL INFO cudaDriverVersion 12010
+gpub050:2539552:2539552 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0>
+gpub050:2539552:2539552 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub050:2539552:2539628 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0>
+gpub050:2539552:2539628 [0] NCCL INFO Using network IB
+gpub050:2539552:2539628 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub050:2539552:2539628 [0] NCCL INFO Trees [0] 33/48/-1->32->0 [1] 33/-1/-1->32->36
+gpub050:2539552:2539628 [0] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0
+gpub050:2539552:2539628 [0] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0
+gpub050:2539552:2539628 [0] NCCL INFO Channel 00/0 : 32[7000] -> 33[46000] via P2P/IPC
+gpub050:2539552:2539628 [0] NCCL INFO Channel 01/0 : 32[7000] -> 33[46000] via P2P/IPC
+gpub050:2539552:2539628 [0] NCCL INFO Connected all rings
+gpub050:2539552:2539628 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [send] via NET/IB/0
+gpub050:2539552:2539628 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [send] via NET/IB/0
+gpub050:2539552:2539628 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [receive] via NET/IB/0
+gpub050:2539552:2539628 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [send] via NET/IB/0
+gpub050:2539552:2539628 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [receive] via NET/IB/0
+gpub050:2539552:2539628 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [receive] via NET/IB/0
+gpub050:2539552:2539628 [0] NCCL INFO Connected all trees
+gpub050:2539552:2539628 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub050:2539552:2539628 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub050:2539552:2539628 [0] NCCL INFO comm 0xaafdc050 rank 32 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub052:2277063:2277063 [1] NCCL INFO cudaDriverVersion 12010
+gpub052:2277063:2277063 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0>
+gpub052:2277063:2277063 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub052:2277063:2277140 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0>
+gpub052:2277063:2277140 [1] NCCL INFO Using network IB
+gpub052:2277063:2277140 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub052:2277063:2277140 [1] NCCL INFO Trees [0] 42/36/-1->41->40 [1] 42/-1/-1->41->40
+gpub052:2277063:2277140 [1] NCCL INFO Channel 00/0 : 41[46000] -> 42[85000] via P2P/IPC
+gpub052:2277063:2277140 [1] NCCL INFO Channel 01/0 : 41[46000] -> 42[85000] via P2P/IPC
+gpub052:2277063:2277140 [1] NCCL INFO Connected all rings
+gpub052:2277063:2277140 [1] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [receive] via NET/IB/0
+gpub052:2277063:2277140 [1] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [send] via NET/IB/0
+gpub052:2277063:2277140 [1] NCCL INFO Channel 00/0 : 41[46000] -> 40[7000] via P2P/IPC
+gpub052:2277063:2277140 [1] NCCL INFO Channel 01/0 : 41[46000] -> 40[7000] via P2P/IPC
+gpub052:2277063:2277140 [1] NCCL INFO Connected all trees
+gpub052:2277063:2277140 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub052:2277063:2277140 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub052:2277063:2277140 [1] NCCL INFO comm 0xa865590 rank 41 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub008:2789794:2789794 [1] NCCL INFO cudaDriverVersion 12010
+gpub008:2789794:2789794 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0>
+gpub008:2789794:2789794 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub008:2789794:2789873 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.108<0>
+gpub008:2789794:2789873 [1] NCCL INFO Using network IB
+gpub008:2789794:2789873 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub008:2789794:2789873 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4
+gpub008:2789794:2789873 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC
+gpub008:2789794:2789873 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC
+gpub008:2789794:2789873 [1] NCCL INFO Connected all rings
+gpub008:2789794:2789873 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/IB/0
+gpub008:2789794:2789873 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/IB/0
+gpub008:2789794:2789873 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC
+gpub008:2789794:2789873 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC
+gpub008:2789794:2789873 [1] NCCL INFO Connected all trees
+gpub008:2789794:2789873 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub008:2789794:2789873 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub008:2789794:2789873 [1] NCCL INFO comm 0x8abbf8b0 rank 5 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub002:2108201:2108201 [2] NCCL INFO cudaDriverVersion 12010
+gpub002:2108201:2108201 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0>
+gpub002:2108201:2108201 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub002:2108201:2108276 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0>
+gpub002:2108201:2108276 [2] NCCL INFO Using network IB
+gpub002:2108201:2108276 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub002:2108201:2108276 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1
+gpub002:2108201:2108276 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC
+gpub002:2108201:2108276 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC
+gpub002:2108201:2108276 [2] NCCL INFO Connected all rings
+gpub002:2108201:2108276 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC
+gpub002:2108201:2108276 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC
+gpub002:2108201:2108276 [2] NCCL INFO Connected all trees
+gpub002:2108201:2108276 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub002:2108201:2108276 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub002:2108201:2108276 [2] NCCL INFO comm 0x8ca2cb90 rank 2 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub051:3225330:3225330 [2] NCCL INFO cudaDriverVersion 12010
+gpub051:3225330:3225330 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0>
+gpub051:3225330:3225330 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub051:3225330:3225408 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0>
+gpub051:3225330:3225408 [2] NCCL INFO Using network IB
+gpub051:3225330:3225408 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub051:3225330:3225408 [2] NCCL INFO Trees [0] 39/-1/-1->38->37 [1] 39/-1/-1->38->37
+gpub051:3225330:3225408 [2] NCCL INFO Channel 00/0 : 38[85000] -> 39[c7000] via P2P/IPC
+gpub051:3225330:3225408 [2] NCCL INFO Channel 01/0 : 38[85000] -> 39[c7000] via P2P/IPC
+gpub051:3225330:3225408 [2] NCCL INFO Connected all rings
+gpub051:3225330:3225408 [2] NCCL INFO Channel 00/0 : 38[85000] -> 37[46000] via P2P/IPC
+gpub051:3225330:3225408 [2] NCCL INFO Channel 01/0 : 38[85000] -> 37[46000] via P2P/IPC
+gpub051:3225330:3225408 [2] NCCL INFO Connected all trees
+gpub051:3225330:3225408 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub051:3225330:3225408 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub051:3225330:3225408 [2] NCCL INFO comm 0x4f59a920 rank 38 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub051:3225331:3225331 [3] NCCL INFO cudaDriverVersion 12010
+gpub051:3225331:3225331 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0>
+gpub051:3225331:3225331 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub051:3225331:3225406 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0>
+gpub051:3225331:3225406 [3] NCCL INFO Using network IB
+gpub051:3225331:3225406 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub051:3225331:3225406 [3] NCCL INFO Trees [0] -1/-1/-1->39->38 [1] -1/-1/-1->39->38
+gpub051:3225331:3225406 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0
+gpub051:3225331:3225406 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0
+gpub051:3225331:3225406 [3] NCCL INFO Connected all rings
+gpub051:3225331:3225406 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 38[85000] via P2P/IPC
+gpub051:3225331:3225406 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 38[85000] via P2P/IPC
+gpub051:3225331:3225406 [3] NCCL INFO Connected all trees
+gpub051:3225331:3225406 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub051:3225331:3225406 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub051:3225331:3225406 [3] NCCL INFO comm 0xb371b610 rank 39 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub052:2277065:2277065 [3] NCCL INFO cudaDriverVersion 12010
+gpub052:2277065:2277065 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0>
+gpub052:2277065:2277065 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub052:2277065:2277139 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0>
+gpub052:2277065:2277139 [3] NCCL INFO Using network IB
+gpub052:2277065:2277139 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub052:2277065:2277139 [3] NCCL INFO Trees [0] -1/-1/-1->43->42 [1] -1/-1/-1->43->42
+gpub052:2277065:2277139 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0
+gpub052:2277065:2277139 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0
+gpub052:2277065:2277139 [3] NCCL INFO Connected all rings
+gpub052:2277065:2277139 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 42[85000] via P2P/IPC
+gpub052:2277065:2277139 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 42[85000] via P2P/IPC
+gpub052:2277065:2277139 [3] NCCL INFO Connected all trees
+gpub052:2277065:2277139 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub052:2277065:2277139 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub052:2277065:2277139 [3] NCCL INFO comm 0x8f38890 rank 43 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[gpub002:0/64] 2023-07-12 13:27:01,090 (trainer:732) INFO: 40epoch:train:1-100batch: iter_time=1.208, forward_time=0.235, loss_ctc=61.116, loss_att=44.050, acc=0.697, loss=49.170, backward_time=1.036, grad_norm=103.910, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.735e-05, train_time=8.676
+[gpub002:0/64] 2023-07-12 13:29:17,100 (trainer:732) INFO: 40epoch:train:101-200batch: iter_time=1.264e-04, forward_time=0.142, loss_ctc=72.135, loss_att=57.235, acc=0.700, loss=61.705, backward_time=1.027, grad_norm=107.394, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.734e-05, train_time=2.721
+[gpub002:0/64] 2023-07-12 13:31:32,346 (trainer:732) INFO: 40epoch:train:201-300batch: iter_time=1.291e-04, forward_time=0.142, loss_ctc=81.043, loss_att=57.325, acc=0.717, loss=64.440, backward_time=1.025, grad_norm=163.403, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.734e-05, train_time=2.705
+[gpub002:0/64] 2023-07-12 13:33:47,706 (trainer:732) INFO: 40epoch:train:301-400batch: iter_time=1.172e-04, forward_time=0.143, loss_ctc=71.829, loss_att=55.277, acc=0.697, loss=60.243, backward_time=1.026, grad_norm=110.550, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.733e-05, train_time=2.707
+[gpub002:0/64] 2023-07-12 13:36:02,885 (trainer:732) INFO: 40epoch:train:401-500batch: iter_time=1.285e-04, forward_time=0.143, loss_ctc=72.347, loss_att=53.215, acc=0.711, loss=58.954, backward_time=1.025, grad_norm=113.868, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.732e-05, train_time=2.703
+[gpub002:0/64] 2023-07-12 13:38:27,982 (trainer:732) INFO: 40epoch:train:501-600batch: iter_time=1.282e-04, forward_time=0.141, loss_ctc=68.400, loss_att=46.292, acc=0.689, loss=52.924, backward_time=1.032, grad_norm=117.473, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.731e-05, train_time=2.902
+[gpub002:0/64] 2023-07-12 13:40:47,434 (trainer:732) INFO: 40epoch:train:601-700batch: iter_time=1.231e-04, forward_time=0.142, loss_ctc=68.083, loss_att=50.252, acc=0.715, loss=55.601, backward_time=1.028, grad_norm=108.806, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.731e-05, train_time=2.789
+[gpub002:0/64] 2023-07-12 13:43:10,107 (trainer:732) INFO: 40epoch:train:701-800batch: iter_time=1.157e-04, forward_time=0.142, loss_ctc=68.676, loss_att=52.334, acc=0.698, loss=57.237, backward_time=1.024, grad_norm=108.949, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.730e-05, train_time=2.852
+[gpub002:0/64] 2023-07-12 13:44:08,310 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub002:0/64] 2023-07-12 13:44:25,786 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 13:44:29,170 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb6abd2bfa0>)
+[gpub002:0/64] 2023-07-12 13:44:29,170 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub002:0/64] 2023-07-12 13:44:29,176 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 13:49:29,871 (trainer:732) INFO: 40epoch:train:801-900batch: iter_time=2.128, forward_time=0.187, loss_ctc=62.142, loss_att=45.987, acc=0.699, loss=50.833, backward_time=1.043, grad_norm=127.878, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.729e-05, train_time=7.596
+[gpub002:0/64] 2023-07-12 13:51:45,925 (trainer:732) INFO: 40epoch:train:901-1000batch: iter_time=1.253e-04, forward_time=0.144, loss_ctc=73.026, loss_att=58.214, acc=0.710, loss=62.657, backward_time=1.025, grad_norm=124.501, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.728e-05, train_time=2.721
+[gpub002:0/64] 2023-07-12 13:54:01,587 (trainer:732) INFO: 40epoch:train:1001-1100batch: iter_time=1.277e-04, forward_time=0.143, loss_ctc=79.141, loss_att=57.052, acc=0.725, loss=63.679, backward_time=1.025, grad_norm=151.133, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.727e-05, train_time=2.713
+[gpub002:0/64] 2023-07-12 13:56:17,421 (trainer:732) INFO: 40epoch:train:1101-1200batch: iter_time=1.301e-04, forward_time=0.143, loss_ctc=71.202, loss_att=55.122, acc=0.705, loss=59.946, backward_time=1.026, grad_norm=97.471, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.727e-05, train_time=2.716
+[gpub002:0/64] 2023-07-12 13:58:33,114 (trainer:732) INFO: 40epoch:train:1201-1300batch: iter_time=1.240e-04, forward_time=0.143, loss_ctc=71.459, loss_att=52.133, acc=0.724, loss=57.931, backward_time=1.025, grad_norm=140.565, clip=100.000, loss_scale=5.192e+32, optim_step_time=0.179, optim0_lr0=5.726e-05, train_time=2.714
+[gpub002:0/64] 2023-07-12 14:00:48,389 (trainer:732) INFO: 40epoch:train:1301-1400batch: iter_time=1.241e-04, forward_time=0.143, loss_ctc=65.305, loss_att=45.866, acc=0.695, loss=51.698, backward_time=1.021, grad_norm=108.278, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.179, optim0_lr0=5.725e-05, train_time=2.705
+[gpub002:0/64] 2023-07-12 14:03:05,877 (trainer:732) INFO: 40epoch:train:1401-1500batch: iter_time=1.239e-04, forward_time=0.142, loss_ctc=68.643, loss_att=50.860, acc=0.714, loss=56.195, backward_time=1.025, grad_norm=114.462, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.179, optim0_lr0=5.724e-05, train_time=2.750
+[gpub002:0/64] 2023-07-12 14:05:24,000 (trainer:732) INFO: 40epoch:train:1501-1600batch: iter_time=1.178e-04, forward_time=0.143, loss_ctc=67.303, loss_att=51.300, acc=0.712, loss=56.101, backward_time=1.026, grad_norm=99.620, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.179, optim0_lr0=5.724e-05, train_time=2.762
+[gpub002:0/64] 2023-07-12 14:06:37,008 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-12 14:06:56,326 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub002:0/64] 2023-07-12 14:07:14,096 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 14:07:17,513 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd881b5ea0>)
+[gpub002:0/64] 2023-07-12 14:07:17,513 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub002:0/64] 2023-07-12 14:07:17,519 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 14:12:18,848 (trainer:732) INFO: 40epoch:train:1601-1700batch: iter_time=1.255, forward_time=0.143, loss_ctc=66.208, loss_att=50.001, acc=0.701, loss=54.863, backward_time=1.030, grad_norm=128.956, clip=100.000, loss_scale=4.967e+32, optim_step_time=0.179, optim0_lr0=5.723e-05, train_time=8.297
+[gpub002:0/64] 2023-07-12 14:14:35,280 (trainer:732) INFO: 40epoch:train:1701-1800batch: iter_time=1.084e-04, forward_time=0.143, loss_ctc=67.632, loss_att=51.671, acc=0.709, loss=56.460, backward_time=1.028, grad_norm=131.230, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.722e-05, train_time=2.728
+[gpub002:0/64] 2023-07-12 14:16:51,826 (trainer:732) INFO: 40epoch:train:1801-1900batch: iter_time=1.260e-04, forward_time=0.145, loss_ctc=77.856, loss_att=57.061, acc=0.718, loss=63.299, backward_time=1.031, grad_norm=127.495, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.722e-05, train_time=2.731
+[gpub002:0/64] 2023-07-12 14:19:07,735 (trainer:732) INFO: 40epoch:train:1901-2000batch: iter_time=1.083e-04, forward_time=0.143, loss_ctc=74.187, loss_att=54.012, acc=0.723, loss=60.065, backward_time=1.029, grad_norm=134.227, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.721e-05, train_time=2.718
+[gpub002:0/64] 2023-07-12 14:21:23,408 (trainer:732) INFO: 40epoch:train:2001-2100batch: iter_time=1.295e-04, forward_time=0.143, loss_ctc=71.655, loss_att=56.017, acc=0.709, loss=60.708, backward_time=1.025, grad_norm=113.356, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.720e-05, train_time=2.713
+[gpub002:0/64] 2023-07-12 14:23:38,786 (trainer:732) INFO: 40epoch:train:2101-2200batch: iter_time=1.211e-04, forward_time=0.143, loss_ctc=68.732, loss_att=48.850, acc=0.703, loss=54.815, backward_time=1.022, grad_norm=114.122, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.719e-05, train_time=2.707
+[gpub002:0/64] 2023-07-12 14:25:54,333 (trainer:732) INFO: 40epoch:train:2201-2300batch: iter_time=1.151e-04, forward_time=0.142, loss_ctc=63.319, loss_att=45.815, acc=0.718, loss=51.066, backward_time=1.024, grad_norm=112.742, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.719e-05, train_time=2.711
+[gpub002:0/64] 2023-07-12 14:28:11,996 (trainer:732) INFO: 40epoch:train:2301-2400batch: iter_time=1.254e-04, forward_time=0.143, loss_ctc=66.195, loss_att=50.742, acc=0.715, loss=55.378, backward_time=1.023, grad_norm=102.963, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.718e-05, train_time=2.753
+[gpub002:0/64] 2023-07-12 14:30:30,321 (trainer:732) INFO: 40epoch:train:2401-2500batch: iter_time=1.265e-04, forward_time=0.143, loss_ctc=71.093, loss_att=53.758, acc=0.706, loss=58.959, backward_time=1.027, grad_norm=115.668, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.717e-05, train_time=2.766
+[gpub002:0/64] 2023-07-12 14:30:32,911 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub002:0/64] 2023-07-12 14:30:50,993 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 14:30:54,403 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbf8e2ef460>)
+[gpub002:0/64] 2023-07-12 14:30:54,403 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub002:0/64] 2023-07-12 14:30:54,409 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 14:37:48,647 (trainer:732) INFO: 40epoch:train:2501-2600batch: iter_time=1.257, forward_time=0.143, loss_ctc=61.405, loss_att=46.625, acc=0.694, loss=51.059, backward_time=1.033, grad_norm=109.781, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.716e-05, train_time=8.766
+[gpub002:0/64] 2023-07-12 14:40:05,295 (trainer:732) INFO: 40epoch:train:2601-2700batch: iter_time=1.359e-04, forward_time=0.144, loss_ctc=69.964, loss_att=53.049, acc=0.717, loss=58.124, backward_time=1.027, grad_norm=135.636, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.716e-05, train_time=2.733
+[gpub002:0/64] 2023-07-12 14:42:21,035 (trainer:732) INFO: 40epoch:train:2701-2800batch: iter_time=1.229e-04, forward_time=0.144, loss_ctc=77.512, loss_att=56.118, acc=0.709, loss=62.537, backward_time=1.025, grad_norm=137.980, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.715e-05, train_time=2.715
+[gpub002:0/64] 2023-07-12 14:44:36,479 (trainer:732) INFO: 40epoch:train:2801-2900batch: iter_time=1.048e-04, forward_time=0.142, loss_ctc=71.235, loss_att=56.169, acc=0.706, loss=60.689, backward_time=1.023, grad_norm=120.061, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.714e-05, train_time=2.709
+[gpub002:0/64] 2023-07-12 14:46:51,759 (trainer:732) INFO: 40epoch:train:2901-3000batch: iter_time=1.043e-04, forward_time=0.143, loss_ctc=68.448, loss_att=51.921, acc=0.704, loss=56.879, backward_time=1.022, grad_norm=104.401, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.713e-05, train_time=2.705
+[gpub002:0/64] 2023-07-12 14:49:07,671 (trainer:732) INFO: 40epoch:train:3001-3100batch: iter_time=1.139e-04, forward_time=0.142, loss_ctc=66.825, loss_att=44.975, acc=0.706, loss=51.530, backward_time=1.021, grad_norm=103.993, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.713e-05, train_time=2.718
+[gpub002:0/64] 2023-07-12 14:51:28,934 (trainer:732) INFO: 40epoch:train:3101-3200batch: iter_time=1.132e-04, forward_time=0.142, loss_ctc=65.918, loss_att=48.901, acc=0.716, loss=54.006, backward_time=1.027, grad_norm=102.075, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.712e-05, train_time=2.825
+[gpub002:0/64] 2023-07-12 14:53:49,088 (trainer:732) INFO: 40epoch:train:3201-3300batch: iter_time=1.065e-04, forward_time=0.142, loss_ctc=69.493, loss_att=54.998, acc=0.698, loss=59.346, backward_time=1.045, grad_norm=117.513, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.711e-05, train_time=2.803
+[gpub002:0/64] 2023-07-12 14:54:39,670 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub002:0/64] 2023-07-12 14:54:57,543 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 14:55:01,100 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd2fc2f610>)
+[gpub002:0/64] 2023-07-12 14:55:01,100 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub002:0/64] 2023-07-12 14:55:01,106 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 15:00:02,934 (trainer:732) INFO: 40epoch:train:3301-3400batch: iter_time=1.259, forward_time=0.143, loss_ctc=63.620, loss_att=45.338, acc=0.714, loss=50.822, backward_time=1.049, grad_norm=109.621, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.710e-05, train_time=7.477
+[gpub002:0/64] 2023-07-12 15:02:20,429 (trainer:732) INFO: 40epoch:train:3401-3500batch: iter_time=1.191e-04, forward_time=0.143, loss_ctc=66.528, loss_att=49.949, acc=0.714, loss=54.922, backward_time=1.026, grad_norm=125.062, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.710e-05, train_time=2.750
+[gpub002:0/64] 2023-07-12 15:04:36,303 (trainer:732) INFO: 40epoch:train:3501-3600batch: iter_time=1.151e-04, forward_time=0.144, loss_ctc=76.092, loss_att=55.412, acc=0.724, loss=61.616, backward_time=1.026, grad_norm=122.114, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.709e-05, train_time=2.717
+[gpub002:0/64] 2023-07-12 15:06:52,169 (trainer:732) INFO: 40epoch:train:3601-3700batch: iter_time=1.145e-04, forward_time=0.144, loss_ctc=72.680, loss_att=53.035, acc=0.724, loss=58.929, backward_time=1.025, grad_norm=97.254, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.708e-05, train_time=2.717
+[gpub002:0/64] 2023-07-12 15:09:07,796 (trainer:732) INFO: 40epoch:train:3701-3800batch: iter_time=1.156e-04, forward_time=0.143, loss_ctc=69.121, loss_att=53.328, acc=0.720, loss=58.066, backward_time=1.024, grad_norm=139.481, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.707e-05, train_time=2.712
+[gpub002:0/64] 2023-07-12 15:11:23,110 (trainer:732) INFO: 40epoch:train:3801-3900batch: iter_time=1.205e-04, forward_time=0.143, loss_ctc=69.056, loss_att=47.903, acc=0.705, loss=54.249, backward_time=1.021, grad_norm=121.204, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.707e-05, train_time=2.706
+[gpub002:0/64] 2023-07-12 15:13:40,798 (trainer:732) INFO: 40epoch:train:3901-4000batch: iter_time=1.128e-04, forward_time=0.143, loss_ctc=65.887, loss_att=48.895, acc=0.717, loss=53.992, backward_time=1.027, grad_norm=104.050, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.706e-05, train_time=2.754
+[gpub002:0/64] 2023-07-12 15:16:02,209 (trainer:732) INFO: 40epoch:train:4001-4100batch: iter_time=1.186e-04, forward_time=0.143, loss_ctc=66.041, loss_att=51.439, acc=0.707, loss=55.819, backward_time=1.028, grad_norm=112.561, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.705e-05, train_time=2.828
+[gpub002:0/64] 2023-07-12 15:17:39,601 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub002:0/64] 2023-07-12 15:17:57,438 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 15:18:00,971 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd4c567250>)
+[gpub002:0/64] 2023-07-12 15:18:00,971 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub002:0/64] 2023-07-12 15:18:00,978 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 15:21:47,365 (trainer:732) INFO: 40epoch:train:4101-4200batch: iter_time=1.250, forward_time=0.144, loss_ctc=68.950, loss_att=50.746, acc=0.718, loss=56.207, backward_time=1.041, grad_norm=106.172, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.704e-05, train_time=6.903
+[gpub002:0/64] 2023-07-12 15:24:03,354 (trainer:732) INFO: 40epoch:train:4201-4300batch: iter_time=1.186e-04, forward_time=0.144, loss_ctc=62.707, loss_att=48.487, acc=0.702, loss=52.753, backward_time=1.026, grad_norm=100.763, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.704e-05, train_time=2.720
+[gpub002:0/64] 2023-07-12 15:26:19,013 (trainer:732) INFO: 40epoch:train:4301-4400batch: iter_time=1.151e-04, forward_time=0.142, loss_ctc=69.760, loss_att=52.370, acc=0.725, loss=57.587, backward_time=1.023, grad_norm=106.670, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.703e-05, train_time=2.713
+[gpub002:0/64] 2023-07-12 15:28:34,645 (trainer:732) INFO: 40epoch:train:4401-4500batch: iter_time=1.100e-04, forward_time=0.143, loss_ctc=78.815, loss_att=57.161, acc=0.718, loss=63.657, backward_time=1.024, grad_norm=126.050, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.702e-05, train_time=2.712
+[gpub002:0/64] 2023-07-12 15:30:50,128 (trainer:732) INFO: 40epoch:train:4501-4600batch: iter_time=1.185e-04, forward_time=0.143, loss_ctc=67.843, loss_att=54.104, acc=0.716, loss=58.226, backward_time=1.023, grad_norm=113.696, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.701e-05, train_time=2.709
+[gpub002:0/64] 2023-07-12 15:33:05,671 (trainer:732) INFO: 40epoch:train:4601-4700batch: iter_time=1.154e-04, forward_time=0.143, loss_ctc=70.374, loss_att=50.230, acc=0.723, loss=56.273, backward_time=1.023, grad_norm=118.649, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.701e-05, train_time=2.711
+[gpub002:0/64] 2023-07-12 15:35:20,996 (trainer:732) INFO: 40epoch:train:4701-4800batch: iter_time=1.161e-04, forward_time=0.143, loss_ctc=64.594, loss_att=44.965, acc=0.707, loss=50.854, backward_time=1.022, grad_norm=106.973, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.700e-05, train_time=2.706
+[gpub002:0/64] 2023-07-12 15:37:36,540 (trainer:732) INFO: 40epoch:train:4801-4900batch: iter_time=1.091e-04, forward_time=0.143, loss_ctc=67.322, loss_att=50.435, acc=0.717, loss=55.501, backward_time=1.024, grad_norm=103.806, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.699e-05, train_time=2.711
+[gpub002:0/64] 2023-07-12 15:39:52,011 (trainer:732) INFO: 40epoch:train:4901-5000batch: iter_time=1.164e-04, forward_time=0.143, loss_ctc=70.025, loss_att=54.199, acc=0.712, loss=58.947, backward_time=1.023, grad_norm=113.323, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.698e-05, train_time=2.709
+[gpub002:0/64] 2023-07-12 15:39:54,676 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub002:0/64] 2023-07-12 15:40:13,004 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 15:40:16,430 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd2fc44760>)
+[gpub002:0/64] 2023-07-12 15:40:16,430 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub002:0/64] 2023-07-12 15:40:16,437 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 15:45:20,743 (trainer:732) INFO: 40epoch:train:5001-5100batch: iter_time=1.263, forward_time=0.180, loss_ctc=60.084, loss_att=45.552, acc=0.702, loss=49.912, backward_time=1.033, grad_norm=102.864, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.698e-05, train_time=6.574
+[gpub002:0/64] 2023-07-12 15:47:37,331 (trainer:732) INFO: 40epoch:train:5101-5200batch: iter_time=1.241e-04, forward_time=0.144, loss_ctc=70.511, loss_att=52.939, acc=0.720, loss=58.210, backward_time=1.024, grad_norm=103.107, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.697e-05, train_time=2.732
+[gpub002:0/64] 2023-07-12 15:49:58,905 (trainer:732) INFO: 40epoch:train:5201-5300batch: iter_time=1.112e-04, forward_time=0.142, loss_ctc=77.354, loss_att=56.014, acc=0.710, loss=62.416, backward_time=1.023, grad_norm=133.871, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.696e-05, train_time=2.831
+[gpub002:0/64] 2023-07-12 15:52:14,432 (trainer:732) INFO: 40epoch:train:5301-5400batch: iter_time=1.243e-04, forward_time=0.143, loss_ctc=68.506, loss_att=54.652, acc=0.711, loss=58.808, backward_time=1.025, grad_norm=114.787, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.695e-05, train_time=2.710
+[gpub002:0/64] 2023-07-12 15:54:29,792 (trainer:732) INFO: 40epoch:train:5401-5500batch: iter_time=1.084e-04, forward_time=0.143, loss_ctc=68.380, loss_att=50.824, acc=0.707, loss=56.091, backward_time=1.023, grad_norm=186.761, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.695e-05, train_time=2.707
+[gpub002:0/64] 2023-07-12 15:56:44,731 (trainer:732) INFO: 40epoch:train:5501-5600batch: iter_time=1.176e-04, forward_time=0.142, loss_ctc=65.993, loss_att=45.437, acc=0.706, loss=51.604, backward_time=1.019, grad_norm=104.703, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.694e-05, train_time=2.699
+[gpub002:0/64] 2023-07-12 15:58:59,893 (trainer:732) INFO: 40epoch:train:5601-5700batch: iter_time=1.276e-04, forward_time=0.143, loss_ctc=65.646, loss_att=48.820, acc=0.718, loss=53.868, backward_time=1.022, grad_norm=121.882, clip=100.000, loss_scale=4.738e+32, optim_step_time=0.180, optim0_lr0=5.693e-05, train_time=2.703
+[gpub002:0/64] 2023-07-12 16:01:15,393 (trainer:732) INFO: 40epoch:train:5701-5800batch: iter_time=1.205e-04, forward_time=0.143, loss_ctc=69.173, loss_att=55.052, acc=0.701, loss=59.288, backward_time=1.025, grad_norm=113.446, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.180, optim0_lr0=5.693e-05, train_time=2.710
+[gpub002:0/64] 2023-07-12 16:02:13,718 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub002:0/64] 2023-07-12 16:02:31,777 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 16:02:35,214 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd4d24f430>)
+[gpub002:0/64] 2023-07-12 16:02:35,215 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub002:0/64] 2023-07-12 16:02:35,223 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 16:06:51,419 (trainer:732) INFO: 40epoch:train:5801-5900batch: iter_time=1.911, forward_time=0.144, loss_ctc=61.992, loss_att=46.321, acc=0.712, loss=51.022, backward_time=1.033, grad_norm=119.027, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.180, optim0_lr0=5.692e-05, train_time=6.720
+[gpub002:0/64] 2023-07-12 16:09:07,218 (trainer:732) INFO: 40epoch:train:5901-6000batch: iter_time=1.335e-04, forward_time=0.143, loss_ctc=66.798, loss_att=49.972, acc=0.708, loss=55.019, backward_time=1.023, grad_norm=108.107, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.179, optim0_lr0=5.691e-05, train_time=2.716
+[gpub002:0/64] 2023-07-12 16:11:22,903 (trainer:732) INFO: 40epoch:train:6001-6100batch: iter_time=1.237e-04, forward_time=0.143, loss_ctc=76.895, loss_att=56.337, acc=0.716, loss=62.504, backward_time=1.024, grad_norm=192.818, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.179, optim0_lr0=5.690e-05, train_time=2.713
+[gpub002:0/64] 2023-07-12 16:13:38,302 (trainer:732) INFO: 40epoch:train:6101-6200batch: iter_time=1.439e-04, forward_time=0.144, loss_ctc=73.992, loss_att=52.793, acc=0.718, loss=59.152, backward_time=1.022, grad_norm=97.949, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.179, optim0_lr0=5.690e-05, train_time=2.708
+[gpub002:0/64] 2023-07-12 16:15:53,887 (trainer:732) INFO: 40epoch:train:6201-6300batch: iter_time=1.375e-04, forward_time=0.144, loss_ctc=69.085, loss_att=53.781, acc=0.709, loss=58.372, backward_time=1.024, grad_norm=107.327, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.179, optim0_lr0=5.689e-05, train_time=2.711
+[gpub002:0/64] 2023-07-12 16:18:09,295 (trainer:732) INFO: 40epoch:train:6301-6400batch: iter_time=1.276e-04, forward_time=0.145, loss_ctc=67.794, loss_att=47.222, acc=0.705, loss=53.394, backward_time=1.023, grad_norm=105.215, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.179, optim0_lr0=5.688e-05, train_time=2.708
+[gpub002:0/64] 2023-07-12 16:20:24,498 (trainer:732) INFO: 40epoch:train:6401-6500batch: iter_time=1.279e-04, forward_time=0.145, loss_ctc=65.690, loss_att=47.722, acc=0.718, loss=53.112, backward_time=1.023, grad_norm=125.378, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.180, optim0_lr0=5.687e-05, train_time=2.704
+[gpub002:0/64] 2023-07-12 16:22:12,380 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-12 16:22:39,598 (trainer:732) INFO: 40epoch:train:6501-6600batch: iter_time=1.096e-04, forward_time=0.144, loss_ctc=64.236, loss_att=49.951, acc=0.705, loss=54.237, backward_time=1.023, grad_norm=93.202, clip=100.000, loss_scale=5.828e+32, optim_step_time=0.180, optim0_lr0=5.687e-05, train_time=2.702
+[gpub002:0/64] 2023-07-12 16:24:14,970 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub002:0/64] 2023-07-12 16:24:32,839 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 16:24:36,515 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd433f8d00>)
+[gpub002:0/64] 2023-07-12 16:24:36,515 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub002:0/64] 2023-07-12 16:24:36,521 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 16:28:25,470 (trainer:732) INFO: 40epoch:train:6601-6700batch: iter_time=1.255, forward_time=0.145, loss_ctc=68.337, loss_att=49.507, acc=0.724, loss=55.156, backward_time=1.038, grad_norm=116.207, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.686e-05, train_time=6.917
+[gpub002:0/64] 2023-07-12 16:30:42,084 (trainer:732) INFO: 40epoch:train:6701-6800batch: iter_time=1.329e-04, forward_time=0.145, loss_ctc=62.314, loss_att=47.201, acc=0.706, loss=51.735, backward_time=1.025, grad_norm=98.754, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.685e-05, train_time=2.732
+[gpub002:0/64] 2023-07-12 16:32:57,908 (trainer:732) INFO: 40epoch:train:6801-6900batch: iter_time=1.346e-04, forward_time=0.145, loss_ctc=68.481, loss_att=52.287, acc=0.727, loss=57.145, backward_time=1.026, grad_norm=109.277, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.684e-05, train_time=2.716
+[gpub002:0/64] 2023-07-12 16:35:13,486 (trainer:732) INFO: 40epoch:train:6901-7000batch: iter_time=1.281e-04, forward_time=0.144, loss_ctc=79.843, loss_att=57.285, acc=0.717, loss=64.053, backward_time=1.026, grad_norm=130.573, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.684e-05, train_time=2.711
+[gpub002:0/64] 2023-07-12 16:37:29,332 (trainer:732) INFO: 40epoch:train:7001-7100batch: iter_time=1.175e-04, forward_time=0.145, loss_ctc=67.417, loss_att=53.169, acc=0.721, loss=57.444, backward_time=1.027, grad_norm=147.307, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.683e-05, train_time=2.717
+[gpub002:0/64] 2023-07-12 16:39:44,891 (trainer:732) INFO: 40epoch:train:7101-7200batch: iter_time=1.272e-04, forward_time=0.145, loss_ctc=70.180, loss_att=50.528, acc=0.727, loss=56.424, backward_time=1.024, grad_norm=107.531, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.682e-05, train_time=2.711
+[gpub002:0/64] 2023-07-12 16:42:05,273 (trainer:732) INFO: 40epoch:train:7201-7300batch: iter_time=1.098e-04, forward_time=0.144, loss_ctc=63.893, loss_att=44.742, acc=0.708, loss=50.488, backward_time=1.023, grad_norm=103.497, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.681e-05, train_time=2.807
+[gpub002:0/64] 2023-07-12 16:44:20,746 (trainer:732) INFO: 40epoch:train:7301-7400batch: iter_time=1.136e-04, forward_time=0.143, loss_ctc=67.728, loss_att=50.827, acc=0.719, loss=55.897, backward_time=1.024, grad_norm=118.243, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.681e-05, train_time=2.709
+[gpub002:0/64] 2023-07-12 16:46:36,313 (trainer:732) INFO: 40epoch:train:7401-7500batch: iter_time=1.123e-04, forward_time=0.143, loss_ctc=69.549, loss_att=54.017, acc=0.715, loss=58.676, backward_time=1.024, grad_norm=105.138, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.680e-05, train_time=2.711
+[gpub002:0/64] 2023-07-12 16:46:39,103 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub002:0/64] 2023-07-12 16:46:57,326 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 16:47:00,716 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb34318ba60>)
+[gpub002:0/64] 2023-07-12 16:47:00,716 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub002:0/64] 2023-07-12 16:47:00,722 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 16:52:43,273 (trainer:732) INFO: 40epoch:train:7501-7600batch: iter_time=1.295, forward_time=0.144, loss_ctc=58.813, loss_att=42.145, acc=0.710, loss=47.145, backward_time=1.037, grad_norm=124.111, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.679e-05, train_time=7.339
+[gpub002:0/64] 2023-07-12 16:54:59,882 (trainer:732) INFO: 40epoch:train:7601-7700batch: iter_time=1.243e-04, forward_time=0.144, loss_ctc=68.732, loss_att=53.937, acc=0.713, loss=58.375, backward_time=1.029, grad_norm=130.599, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.679e-05, train_time=2.732
+[gpub002:0/64] 2023-07-12 16:57:15,353 (trainer:732) INFO: 40epoch:train:7701-7800batch: iter_time=1.224e-04, forward_time=0.145, loss_ctc=77.851, loss_att=55.826, acc=0.727, loss=62.433, backward_time=1.025, grad_norm=121.431, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.678e-05, train_time=2.709
+[gpub002:0/64] 2023-07-12 16:59:30,902 (trainer:732) INFO: 40epoch:train:7801-7900batch: iter_time=1.444e-04, forward_time=0.144, loss_ctc=70.346, loss_att=53.701, acc=0.708, loss=58.695, backward_time=1.026, grad_norm=112.362, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.677e-05, train_time=2.711
+[gpub002:0/64] 2023-07-12 17:01:46,962 (trainer:732) INFO: 40epoch:train:7901-8000batch: iter_time=1.644e-04, forward_time=0.145, loss_ctc=69.909, loss_att=51.923, acc=0.720, loss=57.319, backward_time=1.024, grad_norm=115.655, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.676e-05, train_time=2.721
+[gpub002:0/64] 2023-07-12 17:04:02,181 (trainer:732) INFO: 40epoch:train:8001-8100batch: iter_time=1.097e-04, forward_time=0.143, loss_ctc=64.797, loss_att=45.585, acc=0.695, loss=51.348, backward_time=1.021, grad_norm=96.860, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.676e-05, train_time=2.704
+[gpub002:0/64] 2023-07-12 17:06:18,006 (trainer:732) INFO: 40epoch:train:8101-8200batch: iter_time=1.347e-04, forward_time=0.144, loss_ctc=65.946, loss_att=48.537, acc=0.721, loss=53.759, backward_time=1.025, grad_norm=103.143, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.675e-05, train_time=2.716
+[gpub002:0/64] 2023-07-12 17:08:43,714 (trainer:732) INFO: 40epoch:train:8201-8300batch: iter_time=1.585e-04, forward_time=0.144, loss_ctc=66.556, loss_att=50.587, acc=0.709, loss=55.378, backward_time=1.033, grad_norm=104.722, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.674e-05, train_time=2.914
+[gpub002:0/64] 2023-07-12 17:09:31,672 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub002:0/64] 2023-07-12 17:09:49,815 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 17:09:53,285 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd7c3dfac0>)
+[gpub002:0/64] 2023-07-12 17:09:53,285 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub002:0/64] 2023-07-12 17:09:53,291 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 17:16:05,942 (trainer:732) INFO: 40epoch:train:8301-8400batch: iter_time=1.764, forward_time=0.145, loss_ctc=61.574, loss_att=47.462, acc=0.713, loss=51.695, backward_time=1.040, grad_norm=100.876, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.673e-05, train_time=8.844
+[gpub002:0/64] 2023-07-12 17:18:22,844 (trainer:732) INFO: 40epoch:train:8401-8500batch: iter_time=1.222e-04, forward_time=0.144, loss_ctc=66.028, loss_att=49.519, acc=0.712, loss=54.472, backward_time=1.025, grad_norm=100.831, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.673e-05, train_time=2.738
+[gpub002:0/64] 2023-07-12 17:20:39,127 (trainer:732) INFO: 40epoch:train:8501-8600batch: iter_time=1.118e-04, forward_time=0.144, loss_ctc=75.776, loss_att=55.115, acc=0.722, loss=61.313, backward_time=1.028, grad_norm=133.650, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.672e-05, train_time=2.725
+[gpub002:0/64] 2023-07-12 17:22:54,520 (trainer:732) INFO: 40epoch:train:8601-8700batch: iter_time=1.148e-04, forward_time=0.143, loss_ctc=71.264, loss_att=51.115, acc=0.722, loss=57.159, backward_time=1.024, grad_norm=108.773, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.671e-05, train_time=2.708
+[gpub002:0/64] 2023-07-12 17:25:10,311 (trainer:732) INFO: 40epoch:train:8701-8800batch: iter_time=1.039e-04, forward_time=0.144, loss_ctc=70.093, loss_att=53.908, acc=0.711, loss=58.763, backward_time=1.027, grad_norm=115.711, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.671e-05, train_time=2.716
+[gpub002:0/64] 2023-07-12 17:27:25,641 (trainer:732) INFO: 40epoch:train:8801-8900batch: iter_time=1.147e-04, forward_time=0.143, loss_ctc=65.537, loss_att=45.778, acc=0.707, loss=51.706, backward_time=1.025, grad_norm=109.775, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.670e-05, train_time=2.706
+[gpub002:0/64] 2023-07-12 17:29:40,885 (trainer:732) INFO: 40epoch:train:8901-9000batch: iter_time=1.186e-04, forward_time=0.143, loss_ctc=64.933, loss_att=47.129, acc=0.721, loss=52.470, backward_time=1.024, grad_norm=108.344, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.669e-05, train_time=2.705
+[gpub002:0/64] 2023-07-12 17:31:56,246 (trainer:732) INFO: 40epoch:train:9001-9100batch: iter_time=1.121e-04, forward_time=0.143, loss_ctc=64.752, loss_att=50.086, acc=0.708, loss=54.486, backward_time=1.024, grad_norm=114.341, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.668e-05, train_time=2.707
+[gpub002:0/64] 2023-07-12 17:33:29,921 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub002:0/64] 2023-07-12 17:33:48,221 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 17:33:51,742 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb222a334f0>)
+[gpub002:0/64] 2023-07-12 17:33:51,743 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub002:0/64] 2023-07-12 17:33:51,749 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 17:38:46,300 (trainer:732) INFO: 40epoch:train:9101-9200batch: iter_time=1.947, forward_time=0.180, loss_ctc=68.492, loss_att=49.647, acc=0.722, loss=55.300, backward_time=1.038, grad_norm=117.017, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.668e-05, train_time=8.201
+[gpub002:0/64] 2023-07-12 17:41:02,935 (trainer:732) INFO: 40epoch:train:9201-9300batch: iter_time=1.099e-04, forward_time=0.145, loss_ctc=61.780, loss_att=48.408, acc=0.705, loss=52.420, backward_time=1.025, grad_norm=130.871, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.667e-05, train_time=2.733
+[gpub002:0/64] 2023-07-12 17:43:20,427 (trainer:732) INFO: 40epoch:train:9301-9400batch: iter_time=1.202e-04, forward_time=0.144, loss_ctc=68.219, loss_att=52.508, acc=0.732, loss=57.222, backward_time=1.027, grad_norm=114.930, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.666e-05, train_time=2.750
+[gpub002:0/64] 2023-07-12 17:45:36,344 (trainer:732) INFO: 40epoch:train:9401-9500batch: iter_time=9.479e-05, forward_time=0.142, loss_ctc=78.737, loss_att=56.410, acc=0.723, loss=63.108, backward_time=1.025, grad_norm=120.963, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.665e-05, train_time=2.718
+[gpub002:0/64] 2023-07-12 17:47:51,855 (trainer:732) INFO: 40epoch:train:9501-9600batch: iter_time=9.460e-05, forward_time=0.143, loss_ctc=66.156, loss_att=54.034, acc=0.715, loss=57.671, backward_time=1.023, grad_norm=119.804, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.665e-05, train_time=2.710
+[gpub002:0/64] 2023-07-12 17:50:07,301 (trainer:732) INFO: 40epoch:train:9601-9700batch: iter_time=9.742e-05, forward_time=0.143, loss_ctc=69.405, loss_att=50.896, acc=0.723, loss=56.449, backward_time=1.024, grad_norm=105.723, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.664e-05, train_time=2.709
+[gpub002:0/64] 2023-07-12 17:52:22,352 (trainer:732) INFO: 40epoch:train:9701-9800batch: iter_time=1.037e-04, forward_time=0.142, loss_ctc=62.756, loss_att=43.537, acc=0.712, loss=49.303, backward_time=1.022, grad_norm=114.282, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.663e-05, train_time=2.701
+[gpub002:0/64] 2023-07-12 17:54:44,234 (trainer:732) INFO: 40epoch:train:9801-9900batch: iter_time=9.713e-05, forward_time=0.143, loss_ctc=67.843, loss_att=50.651, acc=0.721, loss=55.809, backward_time=1.030, grad_norm=112.352, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.663e-05, train_time=2.837
+[gpub002:0/64] 2023-07-12 17:57:01,502 (trainer:732) INFO: 40epoch:train:9901-10000batch: iter_time=9.865e-05, forward_time=0.141, loss_ctc=69.656, loss_att=53.667, acc=0.714, loss=58.464, backward_time=1.028, grad_norm=130.803, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.662e-05, train_time=2.745
+[gpub002:0/64] 2023-07-12 18:11:36,573 (trainer:338) INFO: 40epoch results: [train] iter_time=0.178, forward_time=0.145, loss_ctc=68.748, loss_att=51.163, acc=0.712, loss=56.438, backward_time=1.027, grad_norm=116.730, clip=100.000, loss_scale=3.679e+32, optim_step_time=0.180, optim0_lr0=5.698e-05, train_time=3.327, time=4 hours, 37 minutes and 31.92 seconds, total_count=370000, gpu_max_cached_mem_GB=34.277, [valid] loss_ctc=44.137, cer_ctc=0.263, loss_att=39.500, acc=0.667, cer=0.428, wer=1.000, loss=40.891, time=7 minutes and 44.49 seconds, total_count=37950, gpu_max_cached_mem_GB=37.572, [att_plot] time=6 minutes and 32.9 seconds, total_count=0, gpu_max_cached_mem_GB=37.572
+[gpub002:0/64] 2023-07-12 18:11:52,961 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub002:0/64] 2023-07-12 18:11:53,003 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.acc": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.acc.ave_5best.till40epoch.pth
+[gpub002:0/64] 2023-07-12 18:12:44,008 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.total_count": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.total_count.ave_5best.till40epoch.pth
+[gpub002:0/64] 2023-07-12 18:13:08,270 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/35epoch.pth
+[gpub002:0/64] 2023-07-12 18:13:08,326 (trainer:272) INFO: 41/50epoch started. Estimated time to finish: 2 days, 53 minutes and 30.81 seconds
+[gpub002:0/64] 2023-07-12 18:13:09,583 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub002:0/64] 2023-07-12 18:13:27,325 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 18:13:30,751 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbc6d69e620>)
+[gpub002:0/64] 2023-07-12 18:13:30,751 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub002:0/64] 2023-07-12 18:13:30,878 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 18:21:17,615 (trainer:732) INFO: 41epoch:train:1-100batch: iter_time=3.473, forward_time=0.171, loss_ctc=70.840, loss_att=55.420, acc=0.704, loss=60.046, backward_time=1.043, grad_norm=129.418, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.661e-05, train_time=9.772
+[gpub002:0/64] 2023-07-12 18:23:33,343 (trainer:732) INFO: 41epoch:train:101-200batch: iter_time=1.284e-04, forward_time=0.146, loss_ctc=67.748, loss_att=50.057, acc=0.705, loss=55.364, backward_time=1.028, grad_norm=114.263, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.660e-05, train_time=2.714
+[gpub002:0/64] 2023-07-12 18:25:50,201 (trainer:732) INFO: 41epoch:train:201-300batch: iter_time=1.232e-04, forward_time=0.152, loss_ctc=66.301, loss_att=51.918, acc=0.721, loss=56.233, backward_time=1.028, grad_norm=123.052, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.660e-05, train_time=2.737
+[gpub002:0/64] 2023-07-12 18:28:19,026 (trainer:732) INFO: 41epoch:train:301-400batch: iter_time=1.049e-04, forward_time=0.145, loss_ctc=75.210, loss_att=59.758, acc=0.716, loss=64.394, backward_time=1.054, grad_norm=126.516, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.659e-05, train_time=2.976
+[gpub002:0/64] 2023-07-12 18:30:40,311 (trainer:732) INFO: 41epoch:train:401-500batch: iter_time=1.063e-04, forward_time=0.146, loss_ctc=60.935, loss_att=47.224, acc=0.714, loss=51.338, backward_time=1.039, grad_norm=134.990, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.658e-05, train_time=2.825
+[gpub002:0/64] 2023-07-12 18:32:59,302 (trainer:732) INFO: 41epoch:train:501-600batch: iter_time=1.050e-04, forward_time=0.147, loss_ctc=75.633, loss_att=61.020, acc=0.718, loss=65.404, backward_time=1.032, grad_norm=146.761, clip=100.000, loss_scale=3.894e+32, optim_step_time=0.182, optim0_lr0=5.657e-05, train_time=2.780
+[gpub002:0/64] 2023-07-12 18:35:20,968 (trainer:732) INFO: 41epoch:train:601-700batch: iter_time=1.067e-04, forward_time=0.145, loss_ctc=70.522, loss_att=53.175, acc=0.708, loss=58.379, backward_time=1.040, grad_norm=114.115, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.657e-05, train_time=2.833
+[gpub002:0/64] 2023-07-12 18:37:47,401 (trainer:732) INFO: 41epoch:train:701-800batch: iter_time=1.123e-04, forward_time=0.145, loss_ctc=71.912, loss_att=54.658, acc=0.713, loss=59.834, backward_time=1.046, grad_norm=114.837, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.656e-05, train_time=2.928
+[gpub002:0/64] 2023-07-12 18:37:49,975 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-12 18:38:40,955 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub002:0/64] 2023-07-12 18:38:58,926 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 18:39:02,533 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd4d08cb80>)
+[gpub002:0/64] 2023-07-12 18:39:02,533 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub002:0/64] 2023-07-12 18:39:02,539 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 18:44:47,143 (trainer:732) INFO: 41epoch:train:801-900batch: iter_time=1.647, forward_time=0.146, loss_ctc=72.599, loss_att=54.115, acc=0.703, loss=59.660, backward_time=1.040, grad_norm=128.541, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.655e-05, train_time=8.395
+[gpub002:0/64] 2023-07-12 18:45:06,195 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-12 18:47:03,912 (trainer:732) INFO: 41epoch:train:901-1000batch: iter_time=1.239e-04, forward_time=0.145, loss_ctc=70.940, loss_att=54.535, acc=0.709, loss=59.457, backward_time=1.029, grad_norm=120.295, clip=100.000, loss_scale=1.821e+32, optim_step_time=0.182, optim0_lr0=5.655e-05, train_time=2.735
+[gpub002:0/64] 2023-07-12 18:49:19,533 (trainer:732) INFO: 41epoch:train:1001-1100batch: iter_time=1.115e-04, forward_time=0.145, loss_ctc=61.609, loss_att=44.306, acc=0.724, loss=49.497, backward_time=1.027, grad_norm=111.947, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.654e-05, train_time=2.712
+[gpub002:0/64] 2023-07-12 18:51:36,281 (trainer:732) INFO: 41epoch:train:1101-1200batch: iter_time=1.166e-04, forward_time=0.146, loss_ctc=73.032, loss_att=63.628, acc=0.705, loss=66.449, backward_time=1.034, grad_norm=164.160, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.653e-05, train_time=2.735
+[gpub002:0/64] 2023-07-12 18:53:52,356 (trainer:732) INFO: 41epoch:train:1201-1300batch: iter_time=1.133e-04, forward_time=0.146, loss_ctc=68.936, loss_att=48.977, acc=0.729, loss=54.965, backward_time=1.030, grad_norm=118.209, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.652e-05, train_time=2.721
+[gpub002:0/64] 2023-07-12 18:56:08,340 (trainer:732) INFO: 41epoch:train:1301-1400batch: iter_time=1.178e-04, forward_time=0.145, loss_ctc=72.457, loss_att=57.311, acc=0.711, loss=61.854, backward_time=1.030, grad_norm=123.795, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.652e-05, train_time=2.719
+[gpub002:0/64] 2023-07-12 18:58:24,327 (trainer:732) INFO: 41epoch:train:1401-1500batch: iter_time=1.172e-04, forward_time=0.145, loss_ctc=69.223, loss_att=53.363, acc=0.719, loss=58.121, backward_time=1.030, grad_norm=100.726, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.651e-05, train_time=2.720
+[gpub002:0/64] 2023-07-12 19:00:40,397 (trainer:732) INFO: 41epoch:train:1501-1600batch: iter_time=1.334e-04, forward_time=0.146, loss_ctc=71.626, loss_att=57.110, acc=0.711, loss=61.465, backward_time=1.031, grad_norm=128.152, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.650e-05, train_time=2.721
+[gpub002:0/64] 2023-07-12 19:02:11,569 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub002:0/64] 2023-07-12 19:02:29,844 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 19:02:33,282 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbc6ccd7dc0>)
+[gpub002:0/64] 2023-07-12 19:02:33,282 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub002:0/64] 2023-07-12 19:02:33,289 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 19:08:06,638 (trainer:732) INFO: 41epoch:train:1601-1700batch: iter_time=1.676, forward_time=0.169, loss_ctc=70.421, loss_att=49.398, acc=0.710, loss=55.705, backward_time=1.041, grad_norm=138.098, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.650e-05, train_time=8.923
+[gpub002:0/64] 2023-07-12 19:10:23,384 (trainer:732) INFO: 41epoch:train:1701-1800batch: iter_time=1.274e-04, forward_time=0.146, loss_ctc=69.158, loss_att=55.267, acc=0.709, loss=59.434, backward_time=1.031, grad_norm=104.995, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.649e-05, train_time=2.736
+[gpub002:0/64] 2023-07-12 19:12:38,917 (trainer:732) INFO: 41epoch:train:1801-1900batch: iter_time=1.230e-04, forward_time=0.145, loss_ctc=66.374, loss_att=50.597, acc=0.699, loss=55.330, backward_time=1.027, grad_norm=112.628, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.648e-05, train_time=2.710
+[gpub002:0/64] 2023-07-12 19:14:54,622 (trainer:732) INFO: 41epoch:train:1901-2000batch: iter_time=1.230e-04, forward_time=0.145, loss_ctc=68.154, loss_att=57.325, acc=0.709, loss=60.574, backward_time=1.027, grad_norm=103.678, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.647e-05, train_time=2.714
+[gpub002:0/64] 2023-07-12 19:17:10,478 (trainer:732) INFO: 41epoch:train:2001-2100batch: iter_time=1.035e-04, forward_time=0.145, loss_ctc=72.381, loss_att=54.045, acc=0.729, loss=59.546, backward_time=1.027, grad_norm=114.921, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.647e-05, train_time=2.717
+[gpub002:0/64] 2023-07-12 19:19:26,375 (trainer:732) INFO: 41epoch:train:2101-2200batch: iter_time=1.268e-04, forward_time=0.145, loss_ctc=68.777, loss_att=54.166, acc=0.702, loss=58.549, backward_time=1.028, grad_norm=113.943, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.646e-05, train_time=2.718
+[gpub002:0/64] 2023-07-12 19:21:42,063 (trainer:732) INFO: 41epoch:train:2201-2300batch: iter_time=1.302e-04, forward_time=0.146, loss_ctc=63.800, loss_att=49.285, acc=0.717, loss=53.640, backward_time=1.027, grad_norm=146.299, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.645e-05, train_time=2.714
+[gpub002:0/64] 2023-07-12 19:23:57,840 (trainer:732) INFO: 41epoch:train:2301-2400batch: iter_time=1.250e-04, forward_time=0.145, loss_ctc=74.630, loss_att=56.323, acc=0.712, loss=61.815, backward_time=1.028, grad_norm=111.859, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.644e-05, train_time=2.715
+[gpub002:0/64] 2023-07-12 19:26:19,254 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub002:0/64] 2023-07-12 19:26:37,400 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 19:26:40,808 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad3e27b460>)
+[gpub002:0/64] 2023-07-12 19:26:40,808 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub002:0/64] 2023-07-12 19:26:40,814 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 19:29:54,444 (trainer:732) INFO: 41epoch:train:2401-2500batch: iter_time=2.164, forward_time=0.165, loss_ctc=73.293, loss_att=57.191, acc=0.704, loss=62.022, backward_time=1.035, grad_norm=154.823, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.644e-05, train_time=7.132
+[gpub002:0/64] 2023-07-12 19:32:11,981 (trainer:732) INFO: 41epoch:train:2501-2600batch: iter_time=1.086e-04, forward_time=0.145, loss_ctc=70.299, loss_att=55.298, acc=0.703, loss=59.798, backward_time=1.037, grad_norm=109.577, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.643e-05, train_time=2.751
+[gpub002:0/64] 2023-07-12 19:34:27,583 (trainer:732) INFO: 41epoch:train:2601-2700batch: iter_time=1.227e-04, forward_time=0.145, loss_ctc=65.933, loss_att=48.206, acc=0.704, loss=53.524, backward_time=1.028, grad_norm=91.627, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.642e-05, train_time=2.712
+[gpub002:0/64] 2023-07-12 19:36:43,388 (trainer:732) INFO: 41epoch:train:2701-2800batch: iter_time=1.208e-04, forward_time=0.147, loss_ctc=64.547, loss_att=51.693, acc=0.716, loss=55.549, backward_time=1.027, grad_norm=123.357, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.642e-05, train_time=2.716
+[gpub002:0/64] 2023-07-12 19:38:59,319 (trainer:732) INFO: 41epoch:train:2801-2900batch: iter_time=1.147e-04, forward_time=0.145, loss_ctc=74.813, loss_att=58.932, acc=0.715, loss=63.696, backward_time=1.030, grad_norm=122.567, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.641e-05, train_time=2.718
+[gpub002:0/64] 2023-07-12 19:41:15,045 (trainer:732) INFO: 41epoch:train:2901-3000batch: iter_time=1.152e-04, forward_time=0.145, loss_ctc=61.440, loss_att=45.559, acc=0.725, loss=50.324, backward_time=1.028, grad_norm=124.359, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.640e-05, train_time=2.714
+[gpub002:0/64] 2023-07-12 19:43:31,121 (trainer:732) INFO: 41epoch:train:3001-3100batch: iter_time=1.234e-04, forward_time=0.146, loss_ctc=73.113, loss_att=58.408, acc=0.712, loss=62.819, backward_time=1.028, grad_norm=132.870, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.639e-05, train_time=2.721
+[gpub002:0/64] 2023-07-12 19:45:49,866 (trainer:732) INFO: 41epoch:train:3101-3200batch: iter_time=1.274e-04, forward_time=0.145, loss_ctc=70.690, loss_att=51.235, acc=0.716, loss=57.071, backward_time=1.033, grad_norm=113.301, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.639e-05, train_time=2.775
+[gpub002:0/64] 2023-07-12 19:48:12,230 (trainer:732) INFO: 41epoch:train:3201-3300batch: iter_time=1.179e-04, forward_time=0.145, loss_ctc=72.441, loss_att=57.254, acc=0.711, loss=61.810, backward_time=1.033, grad_norm=107.815, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.638e-05, train_time=2.847
+[gpub002:0/64] 2023-07-12 19:49:02,365 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub002:0/64] 2023-07-12 19:49:20,319 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 19:49:23,731 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbc6cb6f7f0>)
+[gpub002:0/64] 2023-07-12 19:49:23,731 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub002:0/64] 2023-07-12 19:49:23,737 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 19:54:17,214 (trainer:732) INFO: 41epoch:train:3301-3400batch: iter_time=1.844, forward_time=0.147, loss_ctc=67.576, loss_att=49.521, acc=0.713, loss=54.938, backward_time=1.043, grad_norm=133.347, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.637e-05, train_time=7.299
+[gpub002:0/64] 2023-07-12 19:56:33,870 (trainer:732) INFO: 41epoch:train:3401-3500batch: iter_time=1.057e-04, forward_time=0.146, loss_ctc=70.531, loss_att=54.713, acc=0.713, loss=59.458, backward_time=1.030, grad_norm=130.288, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.637e-05, train_time=2.733
+[gpub002:0/64] 2023-07-12 19:58:49,517 (trainer:732) INFO: 41epoch:train:3501-3600batch: iter_time=1.089e-04, forward_time=0.145, loss_ctc=60.897, loss_att=42.699, acc=0.730, loss=48.158, backward_time=1.028, grad_norm=137.043, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.636e-05, train_time=2.713
+[gpub002:0/64] 2023-07-12 20:01:05,892 (trainer:732) INFO: 41epoch:train:3601-3700batch: iter_time=1.232e-04, forward_time=0.146, loss_ctc=74.026, loss_att=64.166, acc=0.709, loss=67.124, backward_time=1.032, grad_norm=127.815, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.635e-05, train_time=2.727
+[gpub002:0/64] 2023-07-12 20:03:21,618 (trainer:732) INFO: 41epoch:train:3701-3800batch: iter_time=1.125e-04, forward_time=0.145, loss_ctc=67.233, loss_att=48.554, acc=0.733, loss=54.158, backward_time=1.028, grad_norm=114.242, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.634e-05, train_time=2.714
+[gpub002:0/64] 2023-07-12 20:05:37,755 (trainer:732) INFO: 41epoch:train:3801-3900batch: iter_time=1.077e-04, forward_time=0.146, loss_ctc=70.919, loss_att=56.270, acc=0.718, loss=60.665, backward_time=1.031, grad_norm=122.264, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.634e-05, train_time=2.723
+[gpub002:0/64] 2023-07-12 20:07:53,980 (trainer:732) INFO: 41epoch:train:3901-4000batch: iter_time=1.098e-04, forward_time=0.146, loss_ctc=69.754, loss_att=53.402, acc=0.720, loss=58.308, backward_time=1.032, grad_norm=108.612, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.633e-05, train_time=2.724
+[gpub002:0/64] 2023-07-12 20:10:10,380 (trainer:732) INFO: 41epoch:train:4001-4100batch: iter_time=1.024e-04, forward_time=0.147, loss_ctc=70.275, loss_att=56.231, acc=0.716, loss=60.444, backward_time=1.032, grad_norm=117.778, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.632e-05, train_time=2.728
+[gpub002:0/64] 2023-07-12 20:11:39,912 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub002:0/64] 2023-07-12 20:11:58,290 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 20:12:01,692 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd2eb000d0>)
+[gpub002:0/64] 2023-07-12 20:12:01,692 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub002:0/64] 2023-07-12 20:12:01,698 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 20:17:26,480 (trainer:732) INFO: 41epoch:train:4101-4200batch: iter_time=1.566, forward_time=0.146, loss_ctc=72.772, loss_att=55.012, acc=0.708, loss=60.340, backward_time=1.042, grad_norm=117.600, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.632e-05, train_time=8.722
+[gpub002:0/64] 2023-07-12 20:19:42,895 (trainer:732) INFO: 41epoch:train:4201-4300batch: iter_time=1.236e-04, forward_time=0.146, loss_ctc=67.515, loss_att=51.939, acc=0.704, loss=56.612, backward_time=1.030, grad_norm=123.653, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.631e-05, train_time=2.728
+[gpub002:0/64] 2023-07-12 20:21:58,413 (trainer:732) INFO: 41epoch:train:4301-4400batch: iter_time=1.320e-04, forward_time=0.145, loss_ctc=61.290, loss_att=47.503, acc=0.716, loss=51.639, backward_time=1.025, grad_norm=112.693, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.630e-05, train_time=2.710
+[gpub002:0/64] 2023-07-12 20:24:14,368 (trainer:732) INFO: 41epoch:train:4401-4500batch: iter_time=1.325e-04, forward_time=0.145, loss_ctc=71.825, loss_att=62.339, acc=0.697, loss=65.185, backward_time=1.029, grad_norm=117.005, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.629e-05, train_time=2.719
+[gpub002:0/64] 2023-07-12 20:26:30,216 (trainer:732) INFO: 41epoch:train:4501-4600batch: iter_time=1.354e-04, forward_time=0.145, loss_ctc=64.044, loss_att=44.250, acc=0.731, loss=50.189, backward_time=1.028, grad_norm=95.630, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.629e-05, train_time=2.717
+[gpub002:0/64] 2023-07-12 20:28:46,011 (trainer:732) INFO: 41epoch:train:4601-4700batch: iter_time=1.354e-04, forward_time=0.144, loss_ctc=72.970, loss_att=56.752, acc=0.705, loss=61.617, backward_time=1.029, grad_norm=111.368, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.628e-05, train_time=2.716
+[gpub002:0/64] 2023-07-12 20:31:02,650 (trainer:732) INFO: 41epoch:train:4701-4800batch: iter_time=1.450e-04, forward_time=0.144, loss_ctc=71.101, loss_att=53.521, acc=0.717, loss=58.795, backward_time=1.029, grad_norm=100.929, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.627e-05, train_time=2.733
+[gpub002:0/64] 2023-07-12 20:33:18,834 (trainer:732) INFO: 41epoch:train:4801-4900batch: iter_time=1.218e-04, forward_time=0.146, loss_ctc=72.523, loss_att=58.663, acc=0.707, loss=62.821, backward_time=1.031, grad_norm=127.161, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.627e-05, train_time=2.723
+[gpub002:0/64] 2023-07-12 20:35:36,747 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub002:0/64] 2023-07-12 20:35:54,833 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 20:35:58,239 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd2eb6b7c0>)
+[gpub002:0/64] 2023-07-12 20:35:58,239 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub002:0/64] 2023-07-12 20:35:58,246 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 20:41:04,776 (trainer:732) INFO: 41epoch:train:4901-5000batch: iter_time=1.597, forward_time=0.146, loss_ctc=69.320, loss_att=46.915, acc=0.725, loss=53.637, backward_time=1.038, grad_norm=113.892, clip=100.000, loss_scale=3.018e+32, optim_step_time=0.182, optim0_lr0=5.626e-05, train_time=9.319
+[gpub002:0/64] 2023-07-12 20:43:22,964 (trainer:732) INFO: 41epoch:train:5001-5100batch: iter_time=1.259e-04, forward_time=0.146, loss_ctc=68.557, loss_att=53.531, acc=0.716, loss=58.039, backward_time=1.036, grad_norm=109.852, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.625e-05, train_time=2.764
+[gpub002:0/64] 2023-07-12 20:45:38,905 (trainer:732) INFO: 41epoch:train:5101-5200batch: iter_time=1.167e-04, forward_time=0.146, loss_ctc=65.544, loss_att=48.308, acc=0.716, loss=53.479, backward_time=1.029, grad_norm=123.663, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.624e-05, train_time=2.719
+[gpub002:0/64] 2023-07-12 20:47:55,428 (trainer:732) INFO: 41epoch:train:5201-5300batch: iter_time=9.594e-05, forward_time=0.147, loss_ctc=69.697, loss_att=59.000, acc=0.717, loss=62.209, backward_time=1.031, grad_norm=109.695, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.624e-05, train_time=2.730
+[gpub002:0/64] 2023-07-12 20:50:11,373 (trainer:732) INFO: 41epoch:train:5301-5400batch: iter_time=1.157e-04, forward_time=0.145, loss_ctc=71.695, loss_att=51.848, acc=0.732, loss=57.802, backward_time=1.029, grad_norm=105.656, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.623e-05, train_time=2.719
+[gpub002:0/64] 2023-07-12 20:52:26,937 (trainer:732) INFO: 41epoch:train:5401-5500batch: iter_time=1.158e-04, forward_time=0.145, loss_ctc=69.713, loss_att=56.136, acc=0.698, loss=60.209, backward_time=1.026, grad_norm=124.763, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.622e-05, train_time=2.711
+[gpub002:0/64] 2023-07-12 20:54:42,808 (trainer:732) INFO: 41epoch:train:5501-5600batch: iter_time=1.183e-04, forward_time=0.145, loss_ctc=65.673, loss_att=49.982, acc=0.728, loss=54.690, backward_time=1.028, grad_norm=121.869, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.622e-05, train_time=2.717
+[gpub002:0/64] 2023-07-12 20:56:58,543 (trainer:732) INFO: 41epoch:train:5601-5700batch: iter_time=1.085e-04, forward_time=0.145, loss_ctc=72.242, loss_att=52.803, acc=0.723, loss=58.635, backward_time=1.027, grad_norm=111.473, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.621e-05, train_time=2.714
+[gpub002:0/64] 2023-07-12 20:59:14,728 (trainer:732) INFO: 41epoch:train:5701-5800batch: iter_time=9.577e-05, forward_time=0.145, loss_ctc=69.925, loss_att=54.850, acc=0.714, loss=59.373, backward_time=1.031, grad_norm=113.233, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.620e-05, train_time=2.723
+[gpub002:0/64] 2023-07-12 21:00:01,150 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub002:0/64] 2023-07-12 21:00:19,189 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 21:00:22,555 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd7c42b4f0>)
+[gpub002:0/64] 2023-07-12 21:00:22,555 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub002:0/64] 2023-07-12 21:00:22,561 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 21:05:30,215 (trainer:732) INFO: 41epoch:train:5801-5900batch: iter_time=1.645, forward_time=0.193, loss_ctc=72.625, loss_att=53.896, acc=0.723, loss=59.515, backward_time=1.042, grad_norm=126.902, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.619e-05, train_time=7.509
+[gpub002:0/64] 2023-07-12 21:07:46,821 (trainer:732) INFO: 41epoch:train:5901-6000batch: iter_time=1.429e-04, forward_time=0.147, loss_ctc=66.989, loss_att=49.786, acc=0.709, loss=54.947, backward_time=1.029, grad_norm=130.905, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.619e-05, train_time=2.733
+[gpub002:0/64] 2023-07-12 21:10:03,539 (trainer:732) INFO: 41epoch:train:6001-6100batch: iter_time=1.134e-04, forward_time=0.149, loss_ctc=64.189, loss_att=51.518, acc=0.726, loss=55.319, backward_time=1.031, grad_norm=103.534, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.618e-05, train_time=2.734
+[gpub002:0/64] 2023-07-12 21:12:19,396 (trainer:732) INFO: 41epoch:train:6101-6200batch: iter_time=1.098e-04, forward_time=0.146, loss_ctc=72.559, loss_att=55.954, acc=0.724, loss=60.936, backward_time=1.029, grad_norm=122.019, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.617e-05, train_time=2.717
+[gpub002:0/64] 2023-07-12 21:14:35,307 (trainer:732) INFO: 41epoch:train:6201-6300batch: iter_time=1.148e-04, forward_time=0.146, loss_ctc=64.191, loss_att=50.498, acc=0.713, loss=54.606, backward_time=1.028, grad_norm=114.048, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.617e-05, train_time=2.718
+[gpub002:0/64] 2023-07-12 21:15:07,857 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-12 21:16:51,122 (trainer:732) INFO: 41epoch:train:6301-6400batch: iter_time=1.005e-04, forward_time=0.146, loss_ctc=70.103, loss_att=54.476, acc=0.732, loss=59.164, backward_time=1.030, grad_norm=103.656, clip=100.000, loss_scale=1.987e+32, optim_step_time=0.183, optim0_lr0=5.616e-05, train_time=2.716
+[gpub002:0/64] 2023-07-12 21:17:42,514 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-12 21:19:06,809 (trainer:732) INFO: 41epoch:train:6401-6500batch: iter_time=1.398e-04, forward_time=0.146, loss_ctc=70.713, loss_att=52.907, acc=0.719, loss=58.249, backward_time=1.028, grad_norm=129.552, clip=100.000, loss_scale=1.109e+32, optim_step_time=0.182, optim0_lr0=5.615e-05, train_time=2.714
+[gpub002:0/64] 2023-07-12 21:21:23,047 (trainer:732) INFO: 41epoch:train:6501-6600batch: iter_time=1.335e-04, forward_time=0.147, loss_ctc=71.542, loss_att=55.074, acc=0.720, loss=60.015, backward_time=1.031, grad_norm=119.497, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.615e-05, train_time=2.725
+[gpub002:0/64] 2023-07-12 21:23:04,288 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub002:0/64] 2023-07-12 21:23:22,599 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 21:23:26,047 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbb5e093520>)
+[gpub002:0/64] 2023-07-12 21:23:26,047 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub002:0/64] 2023-07-12 21:23:26,053 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 21:28:01,974 (trainer:732) INFO: 41epoch:train:6601-6700batch: iter_time=2.543, forward_time=0.146, loss_ctc=71.485, loss_att=51.860, acc=0.717, loss=57.748, backward_time=1.039, grad_norm=121.242, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.614e-05, train_time=7.978
+[gpub002:0/64] 2023-07-12 21:30:19,098 (trainer:732) INFO: 41epoch:train:6701-6800batch: iter_time=1.407e-04, forward_time=0.145, loss_ctc=67.783, loss_att=52.647, acc=0.706, loss=57.188, backward_time=1.030, grad_norm=102.934, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.613e-05, train_time=2.742
+[gpub002:0/64] 2023-07-12 21:32:35,143 (trainer:732) INFO: 41epoch:train:6801-6900batch: iter_time=1.370e-04, forward_time=0.149, loss_ctc=60.432, loss_att=44.809, acc=0.723, loss=49.496, backward_time=1.030, grad_norm=143.111, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.612e-05, train_time=2.721
+[gpub002:0/64] 2023-07-12 21:34:51,046 (trainer:732) INFO: 41epoch:train:6901-7000batch: iter_time=1.716e-04, forward_time=0.147, loss_ctc=73.656, loss_att=65.329, acc=0.698, loss=67.827, backward_time=1.029, grad_norm=127.853, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.612e-05, train_time=2.718
+[gpub002:0/64] 2023-07-12 21:37:06,844 (trainer:732) INFO: 41epoch:train:7001-7100batch: iter_time=1.519e-04, forward_time=0.147, loss_ctc=63.569, loss_att=45.390, acc=0.737, loss=50.844, backward_time=1.028, grad_norm=115.348, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.611e-05, train_time=2.716
+[gpub002:0/64] 2023-07-12 21:39:22,832 (trainer:732) INFO: 41epoch:train:7101-7200batch: iter_time=1.483e-04, forward_time=0.147, loss_ctc=73.348, loss_att=57.141, acc=0.707, loss=62.003, backward_time=1.030, grad_norm=125.156, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.610e-05, train_time=2.720
+[gpub002:0/64] 2023-07-12 21:41:38,634 (trainer:732) INFO: 41epoch:train:7201-7300batch: iter_time=1.409e-04, forward_time=0.145, loss_ctc=68.433, loss_att=52.831, acc=0.715, loss=57.512, backward_time=1.026, grad_norm=117.453, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.610e-05, train_time=2.716
+[gpub002:0/64] 2023-07-12 21:43:54,806 (trainer:732) INFO: 41epoch:train:7301-7400batch: iter_time=1.454e-04, forward_time=0.148, loss_ctc=69.535, loss_att=57.942, acc=0.709, loss=61.420, backward_time=1.031, grad_norm=142.520, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.609e-05, train_time=2.723
+[gpub002:0/64] 2023-07-12 21:46:12,782 (trainer:732) INFO: 41epoch:train:7401-7500batch: iter_time=1.376e-04, forward_time=0.147, loss_ctc=67.374, loss_att=45.684, acc=0.729, loss=52.191, backward_time=1.031, grad_norm=135.947, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.608e-05, train_time=2.759
+[gpub002:0/64] 2023-07-12 21:46:15,854 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub002:0/64] 2023-07-12 21:46:34,163 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 21:46:37,582 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd4c8579a0>)
+[gpub002:0/64] 2023-07-12 21:46:37,582 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub002:0/64] 2023-07-12 21:46:37,588 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 21:54:09,248 (trainer:732) INFO: 41epoch:train:7501-7600batch: iter_time=1.603, forward_time=0.145, loss_ctc=71.396, loss_att=58.603, acc=0.700, loss=62.441, backward_time=1.042, grad_norm=141.498, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.607e-05, train_time=9.529
+[gpub002:0/64] 2023-07-12 21:56:25,789 (trainer:732) INFO: 41epoch:train:7601-7700batch: iter_time=1.471e-04, forward_time=0.144, loss_ctc=66.114, loss_att=48.625, acc=0.707, loss=53.872, backward_time=1.029, grad_norm=119.762, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.607e-05, train_time=2.731
+[gpub002:0/64] 2023-07-12 21:58:41,647 (trainer:732) INFO: 41epoch:train:7701-7800batch: iter_time=1.349e-04, forward_time=0.145, loss_ctc=63.466, loss_att=50.427, acc=0.716, loss=54.339, backward_time=1.028, grad_norm=113.922, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.606e-05, train_time=2.717
+[gpub002:0/64] 2023-07-12 22:00:59,471 (trainer:732) INFO: 41epoch:train:7801-7900batch: iter_time=1.247e-04, forward_time=0.147, loss_ctc=72.326, loss_att=56.756, acc=0.720, loss=61.427, backward_time=1.031, grad_norm=109.457, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.605e-05, train_time=2.756
+[gpub002:0/64] 2023-07-12 22:03:19,497 (trainer:732) INFO: 41epoch:train:7901-8000batch: iter_time=1.212e-04, forward_time=0.145, loss_ctc=60.819, loss_att=44.765, acc=0.722, loss=49.581, backward_time=1.033, grad_norm=106.824, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.605e-05, train_time=2.800
+[gpub002:0/64] 2023-07-12 22:05:35,694 (trainer:732) INFO: 41epoch:train:8001-8100batch: iter_time=1.268e-04, forward_time=0.146, loss_ctc=75.424, loss_att=59.810, acc=0.714, loss=64.495, backward_time=1.030, grad_norm=123.469, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.604e-05, train_time=2.724
+[gpub002:0/64] 2023-07-12 22:07:51,699 (trainer:732) INFO: 41epoch:train:8101-8200batch: iter_time=1.220e-04, forward_time=0.147, loss_ctc=69.288, loss_att=51.179, acc=0.716, loss=56.612, backward_time=1.029, grad_norm=117.088, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.603e-05, train_time=2.720
+[gpub002:0/64] 2023-07-12 22:10:07,627 (trainer:732) INFO: 41epoch:train:8201-8300batch: iter_time=1.312e-04, forward_time=0.145, loss_ctc=72.391, loss_att=55.439, acc=0.714, loss=60.525, backward_time=1.029, grad_norm=127.931, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.603e-05, train_time=2.718
+[gpub002:0/64] 2023-07-12 22:10:55,464 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub002:0/64] 2023-07-12 22:11:13,825 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 22:11:17,494 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fac101ae380>)
+[gpub002:0/64] 2023-07-12 22:11:17,494 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub002:0/64] 2023-07-12 22:11:17,500 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 22:18:38,342 (trainer:732) INFO: 41epoch:train:8301-8400batch: iter_time=1.615, forward_time=0.147, loss_ctc=67.249, loss_att=51.366, acc=0.704, loss=56.131, backward_time=1.046, grad_norm=129.432, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.602e-05, train_time=10.214
+[gpub002:0/64] 2023-07-12 22:20:55,423 (trainer:732) INFO: 41epoch:train:8401-8500batch: iter_time=9.569e-05, forward_time=0.145, loss_ctc=68.689, loss_att=52.556, acc=0.712, loss=57.396, backward_time=1.031, grad_norm=129.356, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.601e-05, train_time=2.741
+[gpub002:0/64] 2023-07-12 22:23:11,495 (trainer:732) INFO: 41epoch:train:8501-8600batch: iter_time=9.187e-05, forward_time=0.146, loss_ctc=62.059, loss_att=44.195, acc=0.721, loss=49.554, backward_time=1.028, grad_norm=111.154, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.600e-05, train_time=2.721
+[gpub002:0/64] 2023-07-12 22:25:27,686 (trainer:732) INFO: 41epoch:train:8601-8700batch: iter_time=1.103e-04, forward_time=0.146, loss_ctc=71.896, loss_att=62.165, acc=0.700, loss=65.084, backward_time=1.030, grad_norm=133.087, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.600e-05, train_time=2.724
+[gpub002:0/64] 2023-07-12 22:27:43,333 (trainer:732) INFO: 41epoch:train:8701-8800batch: iter_time=1.105e-04, forward_time=0.145, loss_ctc=68.306, loss_att=48.020, acc=0.733, loss=54.106, backward_time=1.027, grad_norm=109.698, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.599e-05, train_time=2.713
+[gpub002:0/64] 2023-07-12 22:29:58,990 (trainer:732) INFO: 41epoch:train:8801-8900batch: iter_time=1.056e-04, forward_time=0.145, loss_ctc=70.034, loss_att=54.974, acc=0.709, loss=59.492, backward_time=1.026, grad_norm=125.573, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.598e-05, train_time=2.713
+[gpub002:0/64] 2023-07-12 22:32:14,802 (trainer:732) INFO: 41epoch:train:8901-9000batch: iter_time=9.824e-05, forward_time=0.146, loss_ctc=70.258, loss_att=54.489, acc=0.715, loss=59.220, backward_time=1.028, grad_norm=114.409, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.598e-05, train_time=2.716
+[gpub002:0/64] 2023-07-12 22:34:31,075 (trainer:732) INFO: 41epoch:train:9001-9100batch: iter_time=1.091e-04, forward_time=0.146, loss_ctc=68.492, loss_att=54.068, acc=0.716, loss=58.395, backward_time=1.029, grad_norm=140.544, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.597e-05, train_time=2.725
+[gpub002:0/64] 2023-07-12 22:36:03,875 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub002:0/64] 2023-07-12 22:36:22,352 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 22:36:26,145 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd4c8681f0>)
+[gpub002:0/64] 2023-07-12 22:36:26,145 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub002:0/64] 2023-07-12 22:36:26,151 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 22:42:43,033 (trainer:732) INFO: 41epoch:train:9101-9200batch: iter_time=1.881, forward_time=0.184, loss_ctc=68.469, loss_att=50.264, acc=0.718, loss=55.726, backward_time=1.044, grad_norm=106.621, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.596e-05, train_time=9.838
+[gpub002:0/64] 2023-07-12 22:45:00,788 (trainer:732) INFO: 41epoch:train:9201-9300batch: iter_time=1.293e-04, forward_time=0.148, loss_ctc=68.471, loss_att=53.732, acc=0.720, loss=58.153, backward_time=1.035, grad_norm=122.009, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.596e-05, train_time=2.755
+[gpub002:0/64] 2023-07-12 22:47:19,808 (trainer:732) INFO: 41epoch:train:9301-9400batch: iter_time=1.176e-04, forward_time=0.147, loss_ctc=65.914, loss_att=48.978, acc=0.714, loss=54.059, backward_time=1.037, grad_norm=109.971, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.595e-05, train_time=2.780
+[gpub002:0/64] 2023-07-12 22:49:36,096 (trainer:732) INFO: 41epoch:train:9401-9500batch: iter_time=1.167e-04, forward_time=0.146, loss_ctc=68.014, loss_att=54.562, acc=0.726, loss=58.597, backward_time=1.029, grad_norm=117.199, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.594e-05, train_time=2.726
+[gpub002:0/64] 2023-07-12 22:51:51,997 (trainer:732) INFO: 41epoch:train:9501-9600batch: iter_time=1.252e-04, forward_time=0.146, loss_ctc=71.249, loss_att=53.735, acc=0.733, loss=58.989, backward_time=1.029, grad_norm=138.178, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.593e-05, train_time=2.718
+[gpub002:0/64] 2023-07-12 22:54:07,882 (trainer:732) INFO: 41epoch:train:9601-9700batch: iter_time=1.247e-04, forward_time=0.147, loss_ctc=66.076, loss_att=52.907, acc=0.713, loss=56.858, backward_time=1.028, grad_norm=106.233, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.593e-05, train_time=2.717
+[gpub002:0/64] 2023-07-12 22:56:23,823 (trainer:732) INFO: 41epoch:train:9701-9800batch: iter_time=1.160e-04, forward_time=0.146, loss_ctc=62.985, loss_att=48.457, acc=0.729, loss=52.816, backward_time=1.029, grad_norm=112.368, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.592e-05, train_time=2.719
+[gpub002:0/64] 2023-07-12 22:58:40,560 (trainer:732) INFO: 41epoch:train:9801-9900batch: iter_time=1.181e-04, forward_time=0.146, loss_ctc=74.657, loss_att=55.990, acc=0.719, loss=61.590, backward_time=1.027, grad_norm=113.466, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.591e-05, train_time=2.735
+[gpub002:0/64] 2023-07-12 23:00:56,830 (trainer:732) INFO: 41epoch:train:9901-10000batch: iter_time=1.108e-04, forward_time=0.146, loss_ctc=72.076, loss_att=56.216, acc=0.714, loss=60.974, backward_time=1.030, grad_norm=131.622, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.591e-05, train_time=2.725
+[gpub002:0/64] 2023-07-12 23:18:11,678 (trainer:338) INFO: 41epoch results: [train] iter_time=0.233, forward_time=0.147, loss_ctc=69.121, loss_att=53.257, acc=0.715, loss=58.016, backward_time=1.032, grad_norm=120.506, clip=100.000, loss_scale=1.781e+32, optim_step_time=0.182, optim0_lr0=5.626e-05, train_time=3.453, time=4 hours, 48 minutes and 7.1 seconds, total_count=380000, gpu_max_cached_mem_GB=37.572, [valid] loss_ctc=44.125, cer_ctc=0.260, loss_att=39.542, acc=0.666, cer=0.430, wer=0.999, loss=40.917, time=8 minutes and 29.09 seconds, total_count=38962, gpu_max_cached_mem_GB=37.572, [att_plot] time=8 minutes and 26.85 seconds, total_count=0, gpu_max_cached_mem_GB=37.572
+[gpub002:0/64] 2023-07-12 23:18:30,669 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub002:0/64] 2023-07-12 23:18:30,800 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/36epoch.pth
+[gpub002:0/64] 2023-07-12 23:18:30,800 (trainer:272) INFO: 42/50epoch started. Estimated time to finish: 1 day, 20 hours and 54 minutes
+[gpub002:0/64] 2023-07-12 23:18:31,369 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub002:0/64] 2023-07-12 23:18:49,582 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 23:18:55,229 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb1bb6e86d0>)
+[gpub002:0/64] 2023-07-12 23:18:55,229 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub002:0/64] 2023-07-12 23:18:55,791 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 23:37:37,107 (trainer:732) INFO: 42epoch:train:1-100batch: iter_time=8.724, forward_time=1.113, loss_ctc=71.175, loss_att=49.430, acc=0.718, loss=55.953, backward_time=1.227, grad_norm=114.213, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.228, optim0_lr0=5.590e-05, train_time=22.913
+[gpub002:0/64] 2023-07-12 23:42:11,727 (trainer:732) INFO: 42epoch:train:101-200batch: iter_time=0.006, forward_time=0.982, loss_ctc=69.942, loss_att=54.374, acc=0.696, loss=59.044, backward_time=1.246, grad_norm=121.482, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.288, optim0_lr0=5.589e-05, train_time=5.491
+[gpub002:0/64] 2023-07-12 23:47:05,839 (trainer:732) INFO: 42epoch:train:201-300batch: iter_time=0.017, forward_time=1.206, loss_ctc=61.801, loss_att=45.949, acc=0.719, loss=50.705, backward_time=1.243, grad_norm=123.757, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.240, optim0_lr0=5.589e-05, train_time=5.881
+[gpub002:0/64] 2023-07-12 23:50:49,717 (trainer:732) INFO: 42epoch:train:301-400batch: iter_time=0.004, forward_time=0.743, loss_ctc=73.412, loss_att=51.589, acc=0.721, loss=58.136, backward_time=1.151, grad_norm=125.779, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.222, optim0_lr0=5.588e-05, train_time=4.480
+[gpub002:0/64] 2023-07-12 23:54:19,195 (trainer:732) INFO: 42epoch:train:401-500batch: iter_time=0.012, forward_time=0.551, loss_ctc=82.982, loss_att=60.880, acc=0.688, loss=67.511, backward_time=1.134, grad_norm=124.069, clip=100.000, loss_scale=1.314e+32, optim_step_time=0.209, optim0_lr0=5.587e-05, train_time=4.189
+[gpub002:0/64] 2023-07-12 23:57:28,757 (trainer:732) INFO: 42epoch:train:501-600batch: iter_time=0.002, forward_time=0.518, loss_ctc=78.082, loss_att=61.663, acc=0.708, loss=66.589, backward_time=1.094, grad_norm=124.871, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.201, optim0_lr0=5.586e-05, train_time=3.791
+[gpub002:0/64] 2023-07-13 00:00:25,025 (trainer:732) INFO: 42epoch:train:601-700batch: iter_time=6.864e-04, forward_time=0.420, loss_ctc=79.441, loss_att=60.935, acc=0.691, loss=66.487, backward_time=1.078, grad_norm=128.577, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.199, optim0_lr0=5.586e-05, train_time=3.526
+[gpub002:0/64] 2023-07-13 00:03:02,236 (trainer:732) INFO: 42epoch:train:701-800batch: iter_time=4.233e-04, forward_time=0.282, loss_ctc=72.996, loss_att=53.014, acc=0.712, loss=59.009, backward_time=1.066, grad_norm=122.990, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.197, optim0_lr0=5.585e-05, train_time=3.144
+[gpub002:0/64] 2023-07-13 00:04:00,778 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub002:0/64] 2023-07-13 00:04:18,719 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 00:04:22,403 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad3d0e3fd0>)
+[gpub002:0/64] 2023-07-13 00:04:22,403 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub002:0/64] 2023-07-13 00:04:22,409 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 00:09:56,081 (trainer:732) INFO: 42epoch:train:801-900batch: iter_time=2.590, forward_time=0.185, loss_ctc=73.051, loss_att=54.592, acc=0.715, loss=60.130, backward_time=1.048, grad_norm=128.053, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.185, optim0_lr0=5.584e-05, train_time=8.279
+[gpub002:0/64] 2023-07-13 00:12:13,867 (trainer:732) INFO: 42epoch:train:901-1000batch: iter_time=1.283e-04, forward_time=0.148, loss_ctc=66.524, loss_att=49.934, acc=0.708, loss=54.911, backward_time=1.035, grad_norm=121.941, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.584e-05, train_time=2.756
+[gpub002:0/64] 2023-07-13 00:14:30,359 (trainer:732) INFO: 42epoch:train:1001-1100batch: iter_time=1.139e-04, forward_time=0.147, loss_ctc=61.394, loss_att=46.990, acc=0.713, loss=51.311, backward_time=1.031, grad_norm=98.338, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.583e-05, train_time=2.730
+[gpub002:0/64] 2023-07-13 00:16:46,091 (trainer:732) INFO: 42epoch:train:1101-1200batch: iter_time=1.241e-04, forward_time=0.146, loss_ctc=68.116, loss_att=48.127, acc=0.735, loss=54.124, backward_time=1.028, grad_norm=118.499, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.582e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 00:19:03,313 (trainer:732) INFO: 42epoch:train:1201-1300batch: iter_time=1.194e-04, forward_time=0.145, loss_ctc=78.528, loss_att=57.137, acc=0.710, loss=63.554, backward_time=1.036, grad_norm=124.867, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.582e-05, train_time=2.744
+[gpub002:0/64] 2023-07-13 00:21:20,377 (trainer:732) INFO: 42epoch:train:1301-1400batch: iter_time=1.156e-04, forward_time=0.146, loss_ctc=79.593, loss_att=62.972, acc=0.704, loss=67.958, backward_time=1.031, grad_norm=134.092, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.581e-05, train_time=2.741
+[gpub002:0/64] 2023-07-13 00:23:54,097 (trainer:732) INFO: 42epoch:train:1401-1500batch: iter_time=1.091e-04, forward_time=0.145, loss_ctc=74.007, loss_att=54.221, acc=0.718, loss=60.157, backward_time=1.048, grad_norm=127.878, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.580e-05, train_time=3.074
+[gpub002:0/64] 2023-07-13 00:26:09,909 (trainer:732) INFO: 42epoch:train:1501-1600batch: iter_time=1.349e-04, forward_time=0.144, loss_ctc=75.275, loss_att=58.574, acc=0.702, loss=63.584, backward_time=1.029, grad_norm=121.251, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.579e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 00:27:59,966 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub002:0/64] 2023-07-13 00:28:18,102 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 00:28:22,132 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd2e935690>)
+[gpub002:0/64] 2023-07-13 00:28:22,141 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub002:0/64] 2023-07-13 00:28:22,317 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 00:35:20,703 (trainer:732) INFO: 42epoch:train:1601-1700batch: iter_time=4.060, forward_time=0.198, loss_ctc=75.398, loss_att=54.018, acc=0.726, loss=60.432, backward_time=1.041, grad_norm=136.654, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.185, optim0_lr0=5.579e-05, train_time=11.016
+[gpub002:0/64] 2023-07-13 00:36:32,130 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-13 00:37:37,330 (trainer:732) INFO: 42epoch:train:1701-1800batch: iter_time=1.140e-04, forward_time=0.145, loss_ctc=64.814, loss_att=48.109, acc=0.703, loss=53.121, backward_time=1.033, grad_norm=135.699, clip=100.000, loss_scale=1.225e+32, optim_step_time=0.182, optim0_lr0=5.578e-05, train_time=2.732
+[gpub002:0/64] 2023-07-13 00:39:53,639 (trainer:732) INFO: 42epoch:train:1801-1900batch: iter_time=1.067e-04, forward_time=0.145, loss_ctc=62.019, loss_att=46.732, acc=0.717, loss=51.318, backward_time=1.032, grad_norm=99.223, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.577e-05, train_time=2.726
+[gpub002:0/64] 2023-07-13 00:42:09,855 (trainer:732) INFO: 42epoch:train:1901-2000batch: iter_time=1.118e-04, forward_time=0.146, loss_ctc=68.823, loss_att=48.017, acc=0.738, loss=54.259, backward_time=1.028, grad_norm=118.420, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.577e-05, train_time=2.723
+[gpub002:0/64] 2023-07-13 00:44:25,673 (trainer:732) INFO: 42epoch:train:2001-2100batch: iter_time=1.124e-04, forward_time=0.146, loss_ctc=81.313, loss_att=58.446, acc=0.706, loss=65.306, backward_time=1.028, grad_norm=134.672, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.576e-05, train_time=2.717
+[gpub002:0/64] 2023-07-13 00:46:41,649 (trainer:732) INFO: 42epoch:train:2101-2200batch: iter_time=1.066e-04, forward_time=0.146, loss_ctc=78.344, loss_att=62.937, acc=0.702, loss=67.559, backward_time=1.030, grad_norm=151.716, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.575e-05, train_time=2.719
+[gpub002:0/64] 2023-07-13 00:48:57,917 (trainer:732) INFO: 42epoch:train:2201-2300batch: iter_time=1.105e-04, forward_time=0.145, loss_ctc=75.719, loss_att=57.616, acc=0.712, loss=63.047, backward_time=1.032, grad_norm=184.811, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.575e-05, train_time=2.725
+[gpub002:0/64] 2023-07-13 00:51:28,156 (trainer:732) INFO: 42epoch:train:2301-2400batch: iter_time=1.113e-04, forward_time=0.147, loss_ctc=73.652, loss_att=53.750, acc=0.713, loss=59.720, backward_time=1.049, grad_norm=115.411, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.574e-05, train_time=3.005
+[gpub002:0/64] 2023-07-13 00:53:44,309 (trainer:732) INFO: 42epoch:train:2401-2500batch: iter_time=1.099e-04, forward_time=0.146, loss_ctc=70.966, loss_att=49.355, acc=0.718, loss=55.839, backward_time=1.028, grad_norm=106.942, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.573e-05, train_time=2.723
+[gpub002:0/64] 2023-07-13 00:53:46,681 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub002:0/64] 2023-07-13 00:54:04,903 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 00:54:08,345 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad3c7574f0>)
+[gpub002:0/64] 2023-07-13 00:54:08,345 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub002:0/64] 2023-07-13 00:54:08,352 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 01:01:39,737 (trainer:732) INFO: 42epoch:train:2501-2600batch: iter_time=1.820, forward_time=0.145, loss_ctc=69.815, loss_att=48.599, acc=0.726, loss=54.963, backward_time=1.041, grad_norm=140.834, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.573e-05, train_time=9.508
+[gpub002:0/64] 2023-07-13 01:03:56,212 (trainer:732) INFO: 42epoch:train:2601-2700batch: iter_time=1.162e-04, forward_time=0.147, loss_ctc=67.653, loss_att=52.431, acc=0.712, loss=56.997, backward_time=1.031, grad_norm=109.062, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.572e-05, train_time=2.729
+[gpub002:0/64] 2023-07-13 01:06:11,899 (trainer:732) INFO: 42epoch:train:2701-2800batch: iter_time=1.161e-04, forward_time=0.145, loss_ctc=59.663, loss_att=44.027, acc=0.729, loss=48.718, backward_time=1.028, grad_norm=114.622, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.571e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 01:08:27,521 (trainer:732) INFO: 42epoch:train:2801-2900batch: iter_time=1.208e-04, forward_time=0.145, loss_ctc=71.051, loss_att=47.872, acc=0.736, loss=54.826, backward_time=1.028, grad_norm=161.477, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.570e-05, train_time=2.712
+[gpub002:0/64] 2023-07-13 01:10:43,390 (trainer:732) INFO: 42epoch:train:2901-3000batch: iter_time=1.227e-04, forward_time=0.147, loss_ctc=80.680, loss_att=59.876, acc=0.698, loss=66.117, backward_time=1.029, grad_norm=125.354, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.570e-05, train_time=2.717
+[gpub002:0/64] 2023-07-13 01:12:59,481 (trainer:732) INFO: 42epoch:train:3001-3100batch: iter_time=1.471e-04, forward_time=0.146, loss_ctc=74.273, loss_att=57.896, acc=0.724, loss=62.809, backward_time=1.030, grad_norm=158.704, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.569e-05, train_time=2.722
+[gpub002:0/64] 2023-07-13 01:15:15,668 (trainer:732) INFO: 42epoch:train:3101-3200batch: iter_time=1.247e-04, forward_time=0.146, loss_ctc=78.031, loss_att=59.327, acc=0.704, loss=64.938, backward_time=1.030, grad_norm=153.548, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.568e-05, train_time=2.724
+[gpub002:0/64] 2023-07-13 01:17:36,473 (trainer:732) INFO: 42epoch:train:3201-3300batch: iter_time=1.264e-04, forward_time=0.145, loss_ctc=73.496, loss_att=52.683, acc=0.721, loss=58.927, backward_time=1.028, grad_norm=135.257, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.568e-05, train_time=2.816
+[gpub002:0/64] 2023-07-13 01:18:26,019 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub002:0/64] 2023-07-13 01:18:44,507 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 01:18:47,930 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd4cb7fbb0>)
+[gpub002:0/64] 2023-07-13 01:18:47,930 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub002:0/64] 2023-07-13 01:18:47,937 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 01:25:11,697 (trainer:732) INFO: 42epoch:train:3301-3400batch: iter_time=1.638, forward_time=0.194, loss_ctc=72.538, loss_att=53.392, acc=0.723, loss=59.136, backward_time=1.050, grad_norm=121.024, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.185, optim0_lr0=5.567e-05, train_time=9.103
+[gpub002:0/64] 2023-07-13 01:27:27,626 (trainer:732) INFO: 42epoch:train:3401-3500batch: iter_time=1.464e-04, forward_time=0.146, loss_ctc=67.316, loss_att=49.889, acc=0.703, loss=55.117, backward_time=1.029, grad_norm=129.513, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.566e-05, train_time=2.720
+[gpub002:0/64] 2023-07-13 01:29:43,626 (trainer:732) INFO: 42epoch:train:3501-3600batch: iter_time=1.305e-04, forward_time=0.146, loss_ctc=61.606, loss_att=46.689, acc=0.718, loss=51.164, backward_time=1.029, grad_norm=108.622, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.566e-05, train_time=2.720
+[gpub002:0/64] 2023-07-13 01:31:59,663 (trainer:732) INFO: 42epoch:train:3601-3700batch: iter_time=1.609e-04, forward_time=0.146, loss_ctc=67.195, loss_att=48.227, acc=0.733, loss=53.917, backward_time=1.029, grad_norm=126.501, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.565e-05, train_time=2.721
+[gpub002:0/64] 2023-07-13 01:34:15,629 (trainer:732) INFO: 42epoch:train:3701-3800batch: iter_time=1.261e-04, forward_time=0.147, loss_ctc=76.534, loss_att=56.304, acc=0.712, loss=62.373, backward_time=1.030, grad_norm=117.496, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.564e-05, train_time=2.719
+[gpub002:0/64] 2023-07-13 01:36:31,874 (trainer:732) INFO: 42epoch:train:3801-3900batch: iter_time=1.527e-04, forward_time=0.147, loss_ctc=79.688, loss_att=61.818, acc=0.707, loss=67.179, backward_time=1.031, grad_norm=127.586, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.564e-05, train_time=2.725
+[gpub002:0/64] 2023-07-13 01:38:47,365 (trainer:732) INFO: 42epoch:train:3901-4000batch: iter_time=1.292e-04, forward_time=0.146, loss_ctc=73.126, loss_att=53.845, acc=0.710, loss=59.629, backward_time=1.026, grad_norm=113.224, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.563e-05, train_time=2.710
+[gpub002:0/64] 2023-07-13 01:41:03,491 (trainer:732) INFO: 42epoch:train:4001-4100batch: iter_time=1.410e-04, forward_time=0.147, loss_ctc=73.877, loss_att=58.523, acc=0.696, loss=63.129, backward_time=1.030, grad_norm=130.725, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.562e-05, train_time=2.722
+[gpub002:0/64] 2023-07-13 01:42:49,251 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub002:0/64] 2023-07-13 01:43:07,433 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 01:43:10,890 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd7c6a74f0>)
+[gpub002:0/64] 2023-07-13 01:43:10,891 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub002:0/64] 2023-07-13 01:43:10,897 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 01:48:33,517 (trainer:732) INFO: 42epoch:train:4101-4200batch: iter_time=2.988, forward_time=0.147, loss_ctc=74.164, loss_att=53.456, acc=0.719, loss=59.668, backward_time=1.039, grad_norm=115.908, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.561e-05, train_time=9.000
+[gpub002:0/64] 2023-07-13 01:50:50,772 (trainer:732) INFO: 42epoch:train:4201-4300batch: iter_time=1.188e-04, forward_time=0.146, loss_ctc=64.361, loss_att=46.601, acc=0.711, loss=51.929, backward_time=1.033, grad_norm=103.878, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.561e-05, train_time=2.745
+[gpub002:0/64] 2023-07-13 01:53:07,508 (trainer:732) INFO: 42epoch:train:4301-4400batch: iter_time=1.087e-04, forward_time=0.147, loss_ctc=61.857, loss_att=46.652, acc=0.722, loss=51.213, backward_time=1.030, grad_norm=101.562, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.560e-05, train_time=2.735
+[gpub002:0/64] 2023-07-13 01:55:23,236 (trainer:732) INFO: 42epoch:train:4401-4500batch: iter_time=1.127e-04, forward_time=0.145, loss_ctc=69.241, loss_att=48.461, acc=0.741, loss=54.695, backward_time=1.028, grad_norm=128.802, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.559e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 01:57:42,177 (trainer:732) INFO: 42epoch:train:4501-4600batch: iter_time=1.137e-04, forward_time=0.146, loss_ctc=80.204, loss_att=58.270, acc=0.708, loss=64.850, backward_time=1.034, grad_norm=105.180, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.559e-05, train_time=2.779
+[gpub002:0/64] 2023-07-13 01:59:58,237 (trainer:732) INFO: 42epoch:train:4601-4700batch: iter_time=1.188e-04, forward_time=0.146, loss_ctc=76.178, loss_att=61.031, acc=0.708, loss=65.575, backward_time=1.030, grad_norm=119.146, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.558e-05, train_time=2.721
+[gpub002:0/64] 2023-07-13 02:02:14,580 (trainer:732) INFO: 42epoch:train:4701-4800batch: iter_time=1.163e-04, forward_time=0.147, loss_ctc=75.844, loss_att=58.430, acc=0.715, loss=63.654, backward_time=1.034, grad_norm=126.411, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.557e-05, train_time=2.727
+[gpub002:0/64] 2023-07-13 02:04:30,380 (trainer:732) INFO: 42epoch:train:4801-4900batch: iter_time=1.091e-04, forward_time=0.146, loss_ctc=73.955, loss_att=54.425, acc=0.710, loss=60.284, backward_time=1.028, grad_norm=110.702, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.557e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 02:06:46,081 (trainer:732) INFO: 42epoch:train:4901-5000batch: iter_time=1.107e-04, forward_time=0.146, loss_ctc=71.515, loss_att=48.756, acc=0.724, loss=55.584, backward_time=1.029, grad_norm=124.882, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.556e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 02:06:47,710 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub002:0/64] 2023-07-13 02:07:06,009 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 02:07:09,425 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb77976c1f0>)
+[gpub002:0/64] 2023-07-13 02:07:09,425 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub002:0/64] 2023-07-13 02:07:09,432 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 02:14:16,140 (trainer:732) INFO: 42epoch:train:5001-5100batch: iter_time=1.636, forward_time=0.145, loss_ctc=69.401, loss_att=50.532, acc=0.709, loss=56.193, backward_time=1.040, grad_norm=142.315, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.555e-05, train_time=9.001
+[gpub002:0/64] 2023-07-13 02:16:32,200 (trainer:732) INFO: 42epoch:train:5101-5200batch: iter_time=1.226e-04, forward_time=0.145, loss_ctc=65.757, loss_att=49.932, acc=0.709, loss=54.679, backward_time=1.028, grad_norm=114.490, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.555e-05, train_time=2.721
+[gpub002:0/64] 2023-07-13 02:18:52,290 (trainer:732) INFO: 42epoch:train:5201-5300batch: iter_time=1.255e-04, forward_time=0.144, loss_ctc=64.010, loss_att=44.270, acc=0.738, loss=50.192, backward_time=1.032, grad_norm=105.407, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.554e-05, train_time=2.802
+[gpub002:0/64] 2023-07-13 02:21:08,766 (trainer:732) INFO: 42epoch:train:5301-5400batch: iter_time=1.153e-04, forward_time=0.147, loss_ctc=70.481, loss_att=53.258, acc=0.712, loss=58.425, backward_time=1.033, grad_norm=117.124, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.553e-05, train_time=2.729
+[gpub002:0/64] 2023-07-13 02:23:24,679 (trainer:732) INFO: 42epoch:train:5401-5500batch: iter_time=1.204e-04, forward_time=0.146, loss_ctc=79.697, loss_att=60.747, acc=0.706, loss=66.432, backward_time=1.030, grad_norm=129.480, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.553e-05, train_time=2.718
+[gpub002:0/64] 2023-07-13 02:25:41,523 (trainer:732) INFO: 42epoch:train:5501-5600batch: iter_time=1.228e-04, forward_time=0.149, loss_ctc=72.910, loss_att=55.688, acc=0.712, loss=60.854, backward_time=1.030, grad_norm=149.499, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.186, optim0_lr0=5.552e-05, train_time=2.737
+[gpub002:0/64] 2023-07-13 02:27:57,260 (trainer:732) INFO: 42epoch:train:5601-5700batch: iter_time=1.284e-04, forward_time=0.145, loss_ctc=77.018, loss_att=59.105, acc=0.700, loss=64.479, backward_time=1.028, grad_norm=131.998, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.551e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 02:30:13,284 (trainer:732) INFO: 42epoch:train:5701-5800batch: iter_time=1.144e-04, forward_time=0.145, loss_ctc=73.572, loss_att=53.577, acc=0.722, loss=59.576, backward_time=1.030, grad_norm=103.209, clip=100.000, loss_scale=1.201e+32, optim_step_time=0.182, optim0_lr0=5.551e-05, train_time=2.720
+[gpub002:0/64] 2023-07-13 02:31:01,533 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub002:0/64] 2023-07-13 02:31:19,805 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 02:31:23,527 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb7797774f0>)
+[gpub002:0/64] 2023-07-13 02:31:23,527 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub002:0/64] 2023-07-13 02:31:23,533 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 02:38:09,529 (trainer:732) INFO: 42epoch:train:5801-5900batch: iter_time=1.672, forward_time=0.223, loss_ctc=66.753, loss_att=45.931, acc=0.710, loss=52.177, backward_time=1.044, grad_norm=115.552, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.185, optim0_lr0=5.550e-05, train_time=9.524
+[gpub002:0/64] 2023-07-13 02:40:27,216 (trainer:732) INFO: 42epoch:train:5901-6000batch: iter_time=1.296e-04, forward_time=0.146, loss_ctc=66.955, loss_att=51.180, acc=0.714, loss=55.913, backward_time=1.032, grad_norm=103.524, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.549e-05, train_time=2.754
+[gpub002:0/64] 2023-07-13 02:42:42,739 (trainer:732) INFO: 42epoch:train:6001-6100batch: iter_time=1.289e-04, forward_time=0.144, loss_ctc=62.586, loss_att=45.365, acc=0.728, loss=50.531, backward_time=1.026, grad_norm=94.871, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.548e-05, train_time=2.710
+[gpub002:0/64] 2023-07-13 02:44:58,673 (trainer:732) INFO: 42epoch:train:6101-6200batch: iter_time=1.208e-04, forward_time=0.145, loss_ctc=70.502, loss_att=52.029, acc=0.726, loss=57.571, backward_time=1.029, grad_norm=120.129, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.548e-05, train_time=2.718
+[gpub002:0/64] 2023-07-13 02:47:14,390 (trainer:732) INFO: 42epoch:train:6201-6300batch: iter_time=1.266e-04, forward_time=0.144, loss_ctc=80.294, loss_att=60.866, acc=0.691, loss=66.694, backward_time=1.027, grad_norm=141.400, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.547e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 02:49:29,945 (trainer:732) INFO: 42epoch:train:6301-6400batch: iter_time=1.154e-04, forward_time=0.144, loss_ctc=74.026, loss_att=55.778, acc=0.718, loss=61.252, backward_time=1.026, grad_norm=131.830, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.546e-05, train_time=2.711
+[gpub002:0/64] 2023-07-13 02:51:45,921 (trainer:732) INFO: 42epoch:train:6401-6500batch: iter_time=1.322e-04, forward_time=0.145, loss_ctc=77.487, loss_att=57.948, acc=0.707, loss=63.810, backward_time=1.028, grad_norm=135.680, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.546e-05, train_time=2.719
+[gpub002:0/64] 2023-07-13 02:54:01,435 (trainer:732) INFO: 42epoch:train:6501-6600batch: iter_time=1.228e-04, forward_time=0.144, loss_ctc=69.023, loss_att=50.317, acc=0.715, loss=55.929, backward_time=1.026, grad_norm=121.503, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.545e-05, train_time=2.710
+[gpub002:0/64] 2023-07-13 02:55:35,752 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub002:0/64] 2023-07-13 02:55:53,825 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 02:55:57,465 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb73efff460>)
+[gpub002:0/64] 2023-07-13 02:55:57,465 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub002:0/64] 2023-07-13 02:55:57,472 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 03:02:27,111 (trainer:732) INFO: 42epoch:train:6601-6700batch: iter_time=3.598, forward_time=0.208, loss_ctc=68.057, loss_att=47.036, acc=0.718, loss=53.342, backward_time=1.042, grad_norm=118.857, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=5.544e-05, train_time=10.113
+[gpub002:0/64] 2023-07-13 03:04:44,821 (trainer:732) INFO: 42epoch:train:6701-6800batch: iter_time=1.149e-04, forward_time=0.148, loss_ctc=68.438, loss_att=52.085, acc=0.723, loss=56.991, backward_time=1.034, grad_norm=140.459, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.544e-05, train_time=2.754
+[gpub002:0/64] 2023-07-13 03:07:01,317 (trainer:732) INFO: 42epoch:train:6801-6900batch: iter_time=1.279e-04, forward_time=0.145, loss_ctc=64.735, loss_att=49.425, acc=0.714, loss=54.018, backward_time=1.027, grad_norm=114.945, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.543e-05, train_time=2.730
+[gpub002:0/64] 2023-07-13 03:09:19,001 (trainer:732) INFO: 42epoch:train:6901-7000batch: iter_time=0.004, forward_time=0.147, loss_ctc=64.172, loss_att=44.513, acc=0.739, loss=50.411, backward_time=1.035, grad_norm=108.401, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.542e-05, train_time=2.753
+[gpub002:0/64] 2023-07-13 03:11:34,913 (trainer:732) INFO: 42epoch:train:7001-7100batch: iter_time=0.002, forward_time=0.145, loss_ctc=70.024, loss_att=52.180, acc=0.717, loss=57.534, backward_time=1.028, grad_norm=133.620, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.542e-05, train_time=2.718
+[gpub002:0/64] 2023-07-13 03:14:01,730 (trainer:732) INFO: 42epoch:train:7101-7200batch: iter_time=1.283e-04, forward_time=0.217, loss_ctc=80.096, loss_att=59.082, acc=0.709, loss=65.386, backward_time=1.048, grad_norm=121.033, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.188, optim0_lr0=5.541e-05, train_time=2.936
+[gpub002:0/64] 2023-07-13 03:16:18,403 (trainer:732) INFO: 42epoch:train:7201-7300batch: iter_time=1.249e-04, forward_time=0.146, loss_ctc=72.737, loss_att=55.530, acc=0.720, loss=60.692, backward_time=1.031, grad_norm=126.104, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.540e-05, train_time=2.733
+[gpub002:0/64] 2023-07-13 03:18:34,604 (trainer:732) INFO: 42epoch:train:7301-7400batch: iter_time=1.150e-04, forward_time=0.146, loss_ctc=76.887, loss_att=59.261, acc=0.713, loss=64.549, backward_time=1.028, grad_norm=153.123, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.540e-05, train_time=2.724
+[gpub002:0/64] 2023-07-13 03:20:50,723 (trainer:732) INFO: 42epoch:train:7401-7500batch: iter_time=1.391e-04, forward_time=0.147, loss_ctc=74.305, loss_att=53.789, acc=0.728, loss=59.944, backward_time=1.028, grad_norm=120.151, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.539e-05, train_time=2.722
+[gpub002:0/64] 2023-07-13 03:21:11,975 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub002:0/64] 2023-07-13 03:21:30,453 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 03:21:33,941 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb7788a3610>)
+[gpub002:0/64] 2023-07-13 03:21:33,941 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub002:0/64] 2023-07-13 03:21:33,947 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 03:28:50,459 (trainer:732) INFO: 42epoch:train:7501-7600batch: iter_time=3.260, forward_time=0.146, loss_ctc=68.511, loss_att=47.602, acc=0.728, loss=53.875, backward_time=1.046, grad_norm=119.300, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.538e-05, train_time=9.595
+[gpub002:0/64] 2023-07-13 03:31:08,080 (trainer:732) INFO: 42epoch:train:7601-7700batch: iter_time=1.319e-04, forward_time=0.145, loss_ctc=65.544, loss_att=50.965, acc=0.711, loss=55.339, backward_time=1.032, grad_norm=117.013, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.538e-05, train_time=2.752
+[gpub002:0/64] 2023-07-13 03:33:24,531 (trainer:732) INFO: 42epoch:train:7701-7800batch: iter_time=1.504e-04, forward_time=0.147, loss_ctc=59.954, loss_att=44.149, acc=0.726, loss=48.890, backward_time=1.030, grad_norm=111.944, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.537e-05, train_time=2.729
+[gpub002:0/64] 2023-07-13 03:35:39,885 (trainer:732) INFO: 42epoch:train:7801-7900batch: iter_time=1.352e-04, forward_time=0.144, loss_ctc=71.306, loss_att=50.202, acc=0.729, loss=56.534, backward_time=1.025, grad_norm=132.190, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.536e-05, train_time=2.707
+[gpub002:0/64] 2023-07-13 03:38:11,937 (trainer:732) INFO: 42epoch:train:7901-8000batch: iter_time=1.097e-04, forward_time=0.144, loss_ctc=81.280, loss_att=59.731, acc=0.695, loss=66.195, backward_time=1.044, grad_norm=116.322, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.536e-05, train_time=3.041
+[gpub002:0/64] 2023-07-13 03:40:27,681 (trainer:732) INFO: 42epoch:train:8001-8100batch: iter_time=1.301e-04, forward_time=0.144, loss_ctc=73.657, loss_att=57.378, acc=0.722, loss=62.262, backward_time=1.028, grad_norm=119.137, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.535e-05, train_time=2.715
+[gpub002:0/64] 2023-07-13 03:42:43,340 (trainer:732) INFO: 42epoch:train:8101-8200batch: iter_time=1.130e-04, forward_time=0.145, loss_ctc=77.258, loss_att=59.529, acc=0.697, loss=64.848, backward_time=1.029, grad_norm=124.385, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.534e-05, train_time=2.713
+[gpub002:0/64] 2023-07-13 03:44:58,939 (trainer:732) INFO: 42epoch:train:8201-8300batch: iter_time=1.061e-04, forward_time=0.145, loss_ctc=70.039, loss_att=50.529, acc=0.722, loss=56.382, backward_time=1.028, grad_norm=114.947, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.533e-05, train_time=2.712
+[gpub002:0/64] 2023-07-13 03:45:46,940 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub002:0/64] 2023-07-13 03:46:05,424 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 03:46:08,900 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fac1270a9b0>)
+[gpub002:0/64] 2023-07-13 03:46:08,900 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub002:0/64] 2023-07-13 03:46:08,906 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 03:53:07,279 (trainer:732) INFO: 42epoch:train:8301-8400batch: iter_time=1.680, forward_time=0.177, loss_ctc=70.901, loss_att=52.528, acc=0.721, loss=58.040, backward_time=1.041, grad_norm=127.142, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.533e-05, train_time=9.767
+[gpub002:0/64] 2023-07-13 03:55:25,674 (trainer:732) INFO: 42epoch:train:8401-8500batch: iter_time=1.138e-04, forward_time=0.145, loss_ctc=63.924, loss_att=47.108, acc=0.710, loss=52.153, backward_time=1.037, grad_norm=118.179, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.532e-05, train_time=2.768
+[gpub002:0/64] 2023-07-13 03:57:42,100 (trainer:732) INFO: 42epoch:train:8501-8600batch: iter_time=1.247e-04, forward_time=0.145, loss_ctc=63.131, loss_att=46.587, acc=0.723, loss=51.551, backward_time=1.029, grad_norm=92.654, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.531e-05, train_time=2.728
+[gpub002:0/64] 2023-07-13 04:00:13,714 (trainer:732) INFO: 42epoch:train:8601-8700batch: iter_time=1.103e-04, forward_time=0.144, loss_ctc=68.642, loss_att=48.513, acc=0.737, loss=54.552, backward_time=1.043, grad_norm=114.432, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.531e-05, train_time=3.032
+[gpub002:0/64] 2023-07-13 04:02:29,944 (trainer:732) INFO: 42epoch:train:8701-8800batch: iter_time=1.279e-04, forward_time=0.144, loss_ctc=75.843, loss_att=56.720, acc=0.713, loss=62.457, backward_time=1.028, grad_norm=108.012, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.530e-05, train_time=2.724
+[gpub002:0/64] 2023-07-13 04:04:45,889 (trainer:732) INFO: 42epoch:train:8801-8900batch: iter_time=1.261e-04, forward_time=0.145, loss_ctc=77.727, loss_att=61.456, acc=0.711, loss=66.338, backward_time=1.029, grad_norm=118.766, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.529e-05, train_time=2.719
+[gpub002:0/64] 2023-07-13 04:07:01,510 (trainer:732) INFO: 42epoch:train:8901-9000batch: iter_time=1.245e-04, forward_time=0.144, loss_ctc=72.948, loss_att=53.187, acc=0.715, loss=59.115, backward_time=1.027, grad_norm=137.792, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.529e-05, train_time=2.712
+[gpub002:0/64] 2023-07-13 04:09:17,733 (trainer:732) INFO: 42epoch:train:9001-9100batch: iter_time=1.255e-04, forward_time=0.145, loss_ctc=75.533, loss_att=57.845, acc=0.699, loss=63.151, backward_time=1.031, grad_norm=119.867, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.528e-05, train_time=2.725
+[gpub002:0/64] 2023-07-13 04:10:50,524 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub002:0/64] 2023-07-13 04:11:08,648 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 04:11:12,048 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fac1272f460>)
+[gpub002:0/64] 2023-07-13 04:11:12,048 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub002:0/64] 2023-07-13 04:11:12,055 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 04:16:46,199 (trainer:732) INFO: 42epoch:train:9101-9200batch: iter_time=1.637, forward_time=0.145, loss_ctc=71.833, loss_att=48.627, acc=0.732, loss=55.589, backward_time=1.042, grad_norm=104.157, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.527e-05, train_time=8.969
+[gpub002:0/64] 2023-07-13 04:19:03,487 (trainer:732) INFO: 42epoch:train:9201-9300batch: iter_time=1.282e-04, forward_time=0.146, loss_ctc=67.367, loss_att=51.468, acc=0.722, loss=56.238, backward_time=1.032, grad_norm=120.743, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.527e-05, train_time=2.746
+[gpub002:0/64] 2023-07-13 04:21:20,258 (trainer:732) INFO: 42epoch:train:9301-9400batch: iter_time=1.226e-04, forward_time=0.145, loss_ctc=65.408, loss_att=50.219, acc=0.717, loss=54.776, backward_time=1.030, grad_norm=110.375, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.526e-05, train_time=2.735
+[gpub002:0/64] 2023-07-13 04:23:36,660 (trainer:732) INFO: 42epoch:train:9401-9500batch: iter_time=1.233e-04, forward_time=0.144, loss_ctc=63.769, loss_att=43.754, acc=0.739, loss=49.759, backward_time=1.025, grad_norm=107.213, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.525e-05, train_time=2.728
+[gpub002:0/64] 2023-07-13 04:25:52,274 (trainer:732) INFO: 42epoch:train:9501-9600batch: iter_time=1.125e-04, forward_time=0.144, loss_ctc=70.289, loss_att=52.121, acc=0.718, loss=57.571, backward_time=1.025, grad_norm=122.835, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.525e-05, train_time=2.712
+[gpub002:0/64] 2023-07-13 04:28:08,506 (trainer:732) INFO: 42epoch:train:9601-9700batch: iter_time=1.172e-04, forward_time=0.145, loss_ctc=81.429, loss_att=62.500, acc=0.706, loss=68.178, backward_time=1.028, grad_norm=125.597, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.524e-05, train_time=2.724
+[gpub002:0/64] 2023-07-13 04:30:24,543 (trainer:732) INFO: 42epoch:train:9701-9800batch: iter_time=1.224e-04, forward_time=0.145, loss_ctc=71.177, loss_att=53.266, acc=0.725, loss=58.639, backward_time=1.027, grad_norm=126.374, clip=100.000, loss_scale=2.401e+32, optim_step_time=0.181, optim0_lr0=5.523e-05, train_time=2.721
+[gpub002:0/64] 2023-07-13 04:32:40,550 (trainer:732) INFO: 42epoch:train:9801-9900batch: iter_time=1.218e-04, forward_time=0.145, loss_ctc=75.760, loss_att=57.853, acc=0.719, loss=63.225, backward_time=1.030, grad_norm=129.317, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.523e-05, train_time=2.720
+[gpub002:0/64] 2023-07-13 04:34:56,397 (trainer:732) INFO: 42epoch:train:9901-10000batch: iter_time=1.167e-04, forward_time=0.146, loss_ctc=73.059, loss_att=52.842, acc=0.731, loss=58.907, backward_time=1.028, grad_norm=109.600, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.522e-05, train_time=2.717
+[gpub002:0/64] 2023-07-13 04:50:54,355 (trainer:338) INFO: 42epoch results: [train] iter_time=0.354, forward_time=0.196, loss_ctc=71.674, loss_att=53.129, acc=0.716, loss=58.693, backward_time=1.042, grad_norm=122.970, clip=100.000, loss_scale=1.303e+32, optim_step_time=0.185, optim0_lr0=5.556e-05, train_time=3.797, time=5 hours, 16 minutes and 39.53 seconds, total_count=390000, gpu_max_cached_mem_GB=37.572, [valid] loss_ctc=44.090, cer_ctc=0.260, loss_att=36.484, acc=0.697, cer=0.342, wer=0.989, loss=38.766, time=6 minutes and 47.34 seconds, total_count=39974, gpu_max_cached_mem_GB=37.572, [att_plot] time=8 minutes and 56.53 seconds, total_count=0, gpu_max_cached_mem_GB=37.572
+[gpub002:0/64] 2023-07-13 04:51:13,901 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count
+[gpub002:0/64] 2023-07-13 04:51:13,945 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/31epoch.pth
+[gpub002:0/64] 2023-07-13 04:51:14,020 (trainer:272) INFO: 43/50epoch started. Estimated time to finish: 1 day, 17 hours and 23 minutes
+[gpub002:0/64] 2023-07-13 04:51:15,288 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub002:0/64] 2023-07-13 04:51:35,527 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 04:51:39,096 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabbd8dfe20>)
+[gpub002:0/64] 2023-07-13 04:51:39,097 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub002:0/64] 2023-07-13 04:51:39,159 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 04:58:53,737 (trainer:732) INFO: 43epoch:train:1-100batch: iter_time=3.165, forward_time=0.177, loss_ctc=67.171, loss_att=52.136, acc=0.696, loss=56.646, backward_time=1.043, grad_norm=125.404, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.521e-05, train_time=9.181
+[gpub002:0/64] 2023-07-13 05:01:09,947 (trainer:732) INFO: 43epoch:train:101-200batch: iter_time=1.246e-04, forward_time=0.146, loss_ctc=74.299, loss_att=53.346, acc=0.702, loss=59.632, backward_time=1.031, grad_norm=143.592, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.521e-05, train_time=2.724
+[gpub002:0/64] 2023-07-13 05:03:26,486 (trainer:732) INFO: 43epoch:train:201-300batch: iter_time=1.129e-04, forward_time=0.146, loss_ctc=72.274, loss_att=51.180, acc=0.712, loss=57.508, backward_time=1.030, grad_norm=127.012, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.520e-05, train_time=2.731
+[gpub002:0/64] 2023-07-13 05:05:44,653 (trainer:732) INFO: 43epoch:train:301-400batch: iter_time=1.104e-04, forward_time=0.149, loss_ctc=74.766, loss_att=54.066, acc=0.696, loss=60.276, backward_time=1.031, grad_norm=130.116, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.519e-05, train_time=2.763
+[gpub002:0/64] 2023-07-13 05:08:01,530 (trainer:732) INFO: 43epoch:train:401-500batch: iter_time=1.201e-04, forward_time=0.146, loss_ctc=69.776, loss_att=53.497, acc=0.706, loss=58.381, backward_time=1.029, grad_norm=113.600, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.519e-05, train_time=2.737
+[gpub002:0/64] 2023-07-13 05:10:24,010 (trainer:732) INFO: 43epoch:train:501-600batch: iter_time=3.155e-04, forward_time=0.146, loss_ctc=78.761, loss_att=55.515, acc=0.699, loss=62.489, backward_time=1.032, grad_norm=125.079, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.518e-05, train_time=2.849
+[gpub002:0/64] 2023-07-13 05:12:54,234 (trainer:732) INFO: 43epoch:train:601-700batch: iter_time=1.062e-04, forward_time=0.144, loss_ctc=71.370, loss_att=43.750, acc=0.721, loss=52.036, backward_time=1.053, grad_norm=126.006, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.517e-05, train_time=3.004
+[gpub002:0/64] 2023-07-13 05:15:26,303 (trainer:732) INFO: 43epoch:train:701-800batch: iter_time=5.144e-04, forward_time=0.179, loss_ctc=66.772, loss_att=49.511, acc=0.712, loss=54.690, backward_time=1.041, grad_norm=121.595, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.517e-05, train_time=3.041
+[gpub002:0/64] 2023-07-13 05:16:19,570 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub002:0/64] 2023-07-13 05:16:37,303 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 05:16:40,677 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabbd8df0a0>)
+[gpub002:0/64] 2023-07-13 05:16:40,677 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub002:0/64] 2023-07-13 05:16:40,684 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 05:23:58,859 (trainer:732) INFO: 43epoch:train:801-900batch: iter_time=1.808, forward_time=0.186, loss_ctc=68.942, loss_att=51.380, acc=0.705, loss=56.649, backward_time=1.041, grad_norm=122.084, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.516e-05, train_time=10.251
+[gpub002:0/64] 2023-07-13 05:26:15,903 (trainer:732) INFO: 43epoch:train:901-1000batch: iter_time=1.277e-04, forward_time=0.148, loss_ctc=67.154, loss_att=51.145, acc=0.718, loss=55.947, backward_time=1.034, grad_norm=100.494, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.515e-05, train_time=2.741
+[gpub002:0/64] 2023-07-13 05:28:31,874 (trainer:732) INFO: 43epoch:train:1001-1100batch: iter_time=1.304e-04, forward_time=0.145, loss_ctc=74.706, loss_att=53.692, acc=0.714, loss=59.997, backward_time=1.028, grad_norm=134.309, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.515e-05, train_time=2.719
+[gpub002:0/64] 2023-07-13 05:30:50,785 (trainer:732) INFO: 43epoch:train:1101-1200batch: iter_time=1.143e-04, forward_time=0.145, loss_ctc=70.458, loss_att=49.738, acc=0.704, loss=55.954, backward_time=1.031, grad_norm=126.190, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.514e-05, train_time=2.778
+[gpub002:0/64] 2023-07-13 05:33:07,145 (trainer:732) INFO: 43epoch:train:1201-1300batch: iter_time=1.270e-04, forward_time=0.146, loss_ctc=76.640, loss_att=58.441, acc=0.723, loss=63.900, backward_time=1.030, grad_norm=120.803, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.513e-05, train_time=2.727
+[gpub002:0/64] 2023-07-13 05:35:42,951 (trainer:732) INFO: 43epoch:train:1301-1400batch: iter_time=0.003, forward_time=0.281, loss_ctc=63.792, loss_att=48.848, acc=0.704, loss=53.331, backward_time=1.064, grad_norm=120.871, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.193, optim0_lr0=5.513e-05, train_time=3.115
+[gpub002:0/64] 2023-07-13 05:37:59,663 (trainer:732) INFO: 43epoch:train:1401-1500batch: iter_time=1.387e-04, forward_time=0.147, loss_ctc=73.107, loss_att=46.764, acc=0.723, loss=54.667, backward_time=1.030, grad_norm=135.144, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.512e-05, train_time=2.735
+[gpub002:0/64] 2023-07-13 05:40:17,904 (trainer:732) INFO: 43epoch:train:1501-1600batch: iter_time=1.201e-04, forward_time=0.147, loss_ctc=66.824, loss_att=47.833, acc=0.731, loss=53.530, backward_time=1.030, grad_norm=106.942, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.511e-05, train_time=2.765
+[gpub002:0/64] 2023-07-13 05:41:55,175 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub002:0/64] 2023-07-13 05:42:13,121 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 05:42:16,546 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb1eb3884f0>)
+[gpub002:0/64] 2023-07-13 05:42:16,546 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub002:0/64] 2023-07-13 05:42:16,552 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 05:46:42,778 (trainer:732) INFO: 43epoch:train:1601-1700batch: iter_time=2.035, forward_time=0.145, loss_ctc=64.829, loss_att=45.468, acc=0.725, loss=51.276, backward_time=1.045, grad_norm=109.907, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.511e-05, train_time=7.697
+[gpub002:0/64] 2023-07-13 05:49:04,557 (trainer:732) INFO: 43epoch:train:1701-1800batch: iter_time=1.137e-04, forward_time=0.146, loss_ctc=68.037, loss_att=51.317, acc=0.717, loss=56.333, backward_time=1.039, grad_norm=110.998, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.510e-05, train_time=2.835
+[gpub002:0/64] 2023-07-13 05:51:20,869 (trainer:732) INFO: 43epoch:train:1801-1900batch: iter_time=1.132e-04, forward_time=0.147, loss_ctc=78.940, loss_att=57.649, acc=0.709, loss=64.036, backward_time=1.028, grad_norm=127.683, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.509e-05, train_time=2.726
+[gpub002:0/64] 2023-07-13 05:53:36,717 (trainer:732) INFO: 43epoch:train:1901-2000batch: iter_time=1.120e-04, forward_time=0.146, loss_ctc=62.792, loss_att=44.461, acc=0.716, loss=49.960, backward_time=1.025, grad_norm=98.542, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.509e-05, train_time=2.717
+[gpub002:0/64] 2023-07-13 05:55:52,706 (trainer:732) INFO: 43epoch:train:2001-2100batch: iter_time=1.126e-04, forward_time=0.146, loss_ctc=81.040, loss_att=61.512, acc=0.713, loss=67.371, backward_time=1.029, grad_norm=130.485, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.508e-05, train_time=2.720
+[gpub002:0/64] 2023-07-13 05:58:11,248 (trainer:732) INFO: 43epoch:train:2101-2200batch: iter_time=1.188e-04, forward_time=0.146, loss_ctc=64.820, loss_att=50.416, acc=0.711, loss=54.738, backward_time=1.030, grad_norm=111.591, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.507e-05, train_time=2.771
+[gpub002:0/64] 2023-07-13 06:00:26,904 (trainer:732) INFO: 43epoch:train:2201-2300batch: iter_time=1.170e-04, forward_time=0.147, loss_ctc=69.248, loss_att=47.166, acc=0.714, loss=53.791, backward_time=1.026, grad_norm=117.367, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.507e-05, train_time=2.713
+[gpub002:0/64] 2023-07-13 06:02:46,061 (trainer:732) INFO: 43epoch:train:2301-2400batch: iter_time=1.175e-04, forward_time=0.146, loss_ctc=65.678, loss_att=43.020, acc=0.730, loss=49.818, backward_time=1.039, grad_norm=117.099, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.506e-05, train_time=2.783
+[gpub002:0/64] 2023-07-13 06:05:02,779 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub002:0/64] 2023-07-13 06:05:21,017 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 06:05:24,530 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad3da869b0>)
+[gpub002:0/64] 2023-07-13 06:05:24,530 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub002:0/64] 2023-07-13 06:05:24,536 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 06:11:05,880 (trainer:732) INFO: 43epoch:train:2401-2500batch: iter_time=1.277, forward_time=0.147, loss_ctc=70.028, loss_att=50.589, acc=0.734, loss=56.420, backward_time=1.041, grad_norm=117.428, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.505e-05, train_time=9.996
+[gpub002:0/64] 2023-07-13 06:13:45,256 (trainer:732) INFO: 43epoch:train:2501-2600batch: iter_time=1.182e-04, forward_time=0.146, loss_ctc=65.700, loss_att=49.932, acc=0.707, loss=54.662, backward_time=1.045, grad_norm=108.564, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.505e-05, train_time=3.187
+[gpub002:0/64] 2023-07-13 06:16:01,175 (trainer:732) INFO: 43epoch:train:2601-2700batch: iter_time=1.210e-04, forward_time=0.145, loss_ctc=69.006, loss_att=51.406, acc=0.708, loss=56.686, backward_time=1.029, grad_norm=117.685, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.504e-05, train_time=2.718
+[gpub002:0/64] 2023-07-13 06:18:16,928 (trainer:732) INFO: 43epoch:train:2701-2800batch: iter_time=1.219e-04, forward_time=0.145, loss_ctc=68.756, loss_att=49.034, acc=0.720, loss=54.951, backward_time=1.026, grad_norm=113.801, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.503e-05, train_time=2.715
+[gpub002:0/64] 2023-07-13 06:20:32,341 (trainer:732) INFO: 43epoch:train:2801-2900batch: iter_time=1.224e-04, forward_time=0.145, loss_ctc=74.163, loss_att=54.309, acc=0.702, loss=60.265, backward_time=1.024, grad_norm=120.888, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.503e-05, train_time=2.708
+[gpub002:0/64] 2023-07-13 06:22:47,910 (trainer:732) INFO: 43epoch:train:2901-3000batch: iter_time=1.300e-04, forward_time=0.145, loss_ctc=68.902, loss_att=52.706, acc=0.711, loss=57.565, backward_time=1.027, grad_norm=117.962, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.502e-05, train_time=2.711
+[gpub002:0/64] 2023-07-13 06:25:04,075 (trainer:732) INFO: 43epoch:train:3001-3100batch: iter_time=1.264e-04, forward_time=0.148, loss_ctc=72.330, loss_att=53.634, acc=0.706, loss=59.243, backward_time=1.029, grad_norm=126.462, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.501e-05, train_time=2.723
+[gpub002:0/64] 2023-07-13 06:27:19,562 (trainer:732) INFO: 43epoch:train:3101-3200batch: iter_time=1.278e-04, forward_time=0.146, loss_ctc=64.870, loss_att=42.763, acc=0.727, loss=49.396, backward_time=1.026, grad_norm=109.345, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.501e-05, train_time=2.710
+[gpub002:0/64] 2023-07-13 06:29:35,085 (trainer:732) INFO: 43epoch:train:3201-3300batch: iter_time=1.258e-04, forward_time=0.146, loss_ctc=66.320, loss_att=48.097, acc=0.724, loss=53.564, backward_time=1.026, grad_norm=116.268, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.500e-05, train_time=2.710
+[gpub002:0/64] 2023-07-13 06:30:19,781 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub002:0/64] 2023-07-13 06:30:37,996 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 06:30:41,419 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fac125cbfa0>)
+[gpub002:0/64] 2023-07-13 06:30:41,419 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub002:0/64] 2023-07-13 06:30:41,425 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 06:36:35,849 (trainer:732) INFO: 43epoch:train:3301-3400batch: iter_time=1.288, forward_time=0.145, loss_ctc=70.209, loss_att=56.960, acc=0.702, loss=60.934, backward_time=1.040, grad_norm=127.952, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.499e-05, train_time=8.415
+[gpub002:0/64] 2023-07-13 06:38:52,517 (trainer:732) INFO: 43epoch:train:3401-3500batch: iter_time=1.077e-04, forward_time=0.147, loss_ctc=70.143, loss_att=50.603, acc=0.720, loss=56.465, backward_time=1.032, grad_norm=141.257, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.499e-05, train_time=2.733
+[gpub002:0/64] 2023-07-13 06:41:08,598 (trainer:732) INFO: 43epoch:train:3501-3600batch: iter_time=1.134e-04, forward_time=0.146, loss_ctc=65.845, loss_att=47.427, acc=0.720, loss=52.953, backward_time=1.031, grad_norm=125.431, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.498e-05, train_time=2.721
+[gpub002:0/64] 2023-07-13 06:43:24,520 (trainer:732) INFO: 43epoch:train:3601-3700batch: iter_time=1.134e-04, forward_time=0.146, loss_ctc=77.247, loss_att=55.054, acc=0.710, loss=61.712, backward_time=1.028, grad_norm=118.561, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.497e-05, train_time=2.718
+[gpub002:0/64] 2023-07-13 06:45:40,581 (trainer:732) INFO: 43epoch:train:3701-3800batch: iter_time=1.182e-04, forward_time=0.147, loss_ctc=65.852, loss_att=50.169, acc=0.722, loss=54.874, backward_time=1.031, grad_norm=106.865, clip=100.000, loss_scale=4.803e+32, optim_step_time=0.182, optim0_lr0=5.497e-05, train_time=2.721
+[gpub002:0/64] 2023-07-13 06:47:59,361 (trainer:732) INFO: 43epoch:train:3801-3900batch: iter_time=1.074e-04, forward_time=0.148, loss_ctc=72.644, loss_att=54.032, acc=0.710, loss=59.615, backward_time=1.031, grad_norm=136.386, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.496e-05, train_time=2.775
+[gpub002:0/64] 2023-07-13 06:50:15,237 (trainer:732) INFO: 43epoch:train:3901-4000batch: iter_time=1.063e-04, forward_time=0.147, loss_ctc=64.068, loss_att=40.961, acc=0.734, loss=47.893, backward_time=1.029, grad_norm=130.607, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.495e-05, train_time=2.717
+[gpub002:0/64] 2023-07-13 06:52:31,943 (trainer:732) INFO: 43epoch:train:4001-4100batch: iter_time=1.034e-04, forward_time=0.147, loss_ctc=65.865, loss_att=48.470, acc=0.723, loss=53.688, backward_time=1.030, grad_norm=141.896, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.495e-05, train_time=2.734
+[gpub002:0/64] 2023-07-13 06:54:15,645 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub002:0/64] 2023-07-13 06:54:33,672 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 06:54:37,089 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabbe07b3a0>)
+[gpub002:0/64] 2023-07-13 06:54:37,089 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub002:0/64] 2023-07-13 06:54:37,095 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 06:58:54,596 (trainer:732) INFO: 43epoch:train:4101-4200batch: iter_time=1.344, forward_time=0.226, loss_ctc=68.341, loss_att=49.394, acc=0.720, loss=55.078, backward_time=1.069, grad_norm=135.878, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.186, optim0_lr0=5.494e-05, train_time=7.653
+[gpub002:0/64] 2023-07-13 07:01:12,004 (trainer:732) INFO: 43epoch:train:4201-4300batch: iter_time=1.012e-04, forward_time=0.148, loss_ctc=68.518, loss_att=52.143, acc=0.714, loss=57.055, backward_time=1.031, grad_norm=164.714, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.493e-05, train_time=2.748
+[gpub002:0/64] 2023-07-13 07:03:28,068 (trainer:732) INFO: 43epoch:train:4301-4400batch: iter_time=1.019e-04, forward_time=0.146, loss_ctc=78.283, loss_att=56.097, acc=0.717, loss=62.753, backward_time=1.031, grad_norm=131.835, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.493e-05, train_time=2.721
+[gpub002:0/64] 2023-07-13 07:05:44,046 (trainer:732) INFO: 43epoch:train:4401-4500batch: iter_time=1.003e-04, forward_time=0.146, loss_ctc=61.875, loss_att=43.498, acc=0.722, loss=49.011, backward_time=1.030, grad_norm=101.796, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.492e-05, train_time=2.719
+[gpub002:0/64] 2023-07-13 07:08:00,105 (trainer:732) INFO: 43epoch:train:4501-4600batch: iter_time=9.596e-05, forward_time=0.145, loss_ctc=80.642, loss_att=61.038, acc=0.717, loss=66.919, backward_time=1.029, grad_norm=116.023, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.491e-05, train_time=2.721
+[gpub002:0/64] 2023-07-13 07:10:39,483 (trainer:732) INFO: 43epoch:train:4601-4700batch: iter_time=1.048e-04, forward_time=0.146, loss_ctc=62.748, loss_att=49.774, acc=0.715, loss=53.666, backward_time=1.071, grad_norm=110.121, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.491e-05, train_time=3.187
+[gpub002:0/64] 2023-07-13 07:12:58,455 (trainer:732) INFO: 43epoch:train:4701-4800batch: iter_time=1.119e-04, forward_time=0.146, loss_ctc=69.387, loss_att=46.901, acc=0.713, loss=53.646, backward_time=1.043, grad_norm=122.959, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.490e-05, train_time=2.779
+[gpub002:0/64] 2023-07-13 07:15:14,699 (trainer:732) INFO: 43epoch:train:4801-4900batch: iter_time=1.047e-04, forward_time=0.146, loss_ctc=63.195, loss_att=42.368, acc=0.732, loss=48.616, backward_time=1.029, grad_norm=110.403, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.489e-05, train_time=2.725
+[gpub002:0/64] 2023-07-13 07:16:46,779 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-13 07:17:31,636 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub002:0/64] 2023-07-13 07:17:49,916 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 07:17:53,361 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabc5ecca90>)
+[gpub002:0/64] 2023-07-13 07:17:53,361 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub002:0/64] 2023-07-13 07:17:53,368 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 07:21:12,580 (trainer:732) INFO: 43epoch:train:4901-5000batch: iter_time=1.363, forward_time=0.145, loss_ctc=69.506, loss_att=50.192, acc=0.737, loss=55.987, backward_time=1.033, grad_norm=111.488, clip=100.000, loss_scale=5.431e+32, optim_step_time=0.182, optim0_lr0=5.489e-05, train_time=7.157
+[gpub002:0/64] 2023-07-13 07:23:30,148 (trainer:732) INFO: 43epoch:train:5001-5100batch: iter_time=1.400e-04, forward_time=0.148, loss_ctc=65.881, loss_att=50.693, acc=0.708, loss=55.250, backward_time=1.037, grad_norm=122.810, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.488e-05, train_time=2.751
+[gpub002:0/64] 2023-07-13 07:25:46,716 (trainer:732) INFO: 43epoch:train:5101-5200batch: iter_time=1.426e-04, forward_time=0.147, loss_ctc=67.325, loss_att=50.408, acc=0.713, loss=55.483, backward_time=1.029, grad_norm=122.888, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.487e-05, train_time=2.731
+[gpub002:0/64] 2023-07-13 07:28:02,507 (trainer:732) INFO: 43epoch:train:5201-5300batch: iter_time=1.599e-04, forward_time=0.146, loss_ctc=69.909, loss_att=49.226, acc=0.722, loss=55.431, backward_time=1.029, grad_norm=113.819, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.487e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 07:28:23,967 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-13 07:30:18,027 (trainer:732) INFO: 43epoch:train:5301-5400batch: iter_time=1.310e-04, forward_time=0.146, loss_ctc=73.830, loss_att=53.727, acc=0.707, loss=59.758, backward_time=1.028, grad_norm=126.012, clip=100.000, loss_scale=1.854e+32, optim_step_time=0.182, optim0_lr0=5.486e-05, train_time=2.710
+[gpub002:0/64] 2023-07-13 07:32:33,556 (trainer:732) INFO: 43epoch:train:5401-5500batch: iter_time=1.358e-04, forward_time=0.145, loss_ctc=69.457, loss_att=53.006, acc=0.711, loss=57.941, backward_time=1.027, grad_norm=118.602, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.485e-05, train_time=2.710
+[gpub002:0/64] 2023-07-13 07:34:49,617 (trainer:732) INFO: 43epoch:train:5501-5600batch: iter_time=1.294e-04, forward_time=0.147, loss_ctc=70.674, loss_att=52.567, acc=0.708, loss=57.999, backward_time=1.029, grad_norm=124.421, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.485e-05, train_time=2.721
+[gpub002:0/64] 2023-07-13 07:37:05,125 (trainer:732) INFO: 43epoch:train:5601-5700batch: iter_time=1.538e-04, forward_time=0.146, loss_ctc=64.632, loss_att=42.627, acc=0.728, loss=49.228, backward_time=1.026, grad_norm=117.128, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.484e-05, train_time=2.710
+[gpub002:0/64] 2023-07-13 07:39:20,847 (trainer:732) INFO: 43epoch:train:5701-5800batch: iter_time=1.408e-04, forward_time=0.147, loss_ctc=65.046, loss_att=47.284, acc=0.726, loss=52.613, backward_time=1.028, grad_norm=118.675, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.483e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 07:40:20,191 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub002:0/64] 2023-07-13 07:40:38,345 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 07:40:41,837 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabfa9ce350>)
+[gpub002:0/64] 2023-07-13 07:40:41,837 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub002:0/64] 2023-07-13 07:40:41,843 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 07:46:21,327 (trainer:732) INFO: 43epoch:train:5801-5900batch: iter_time=2.635, forward_time=0.147, loss_ctc=69.559, loss_att=55.942, acc=0.705, loss=60.027, backward_time=1.049, grad_norm=111.034, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.483e-05, train_time=8.409
+[gpub002:0/64] 2023-07-13 07:48:37,870 (trainer:732) INFO: 43epoch:train:5901-6000batch: iter_time=1.176e-04, forward_time=0.145, loss_ctc=71.663, loss_att=52.278, acc=0.708, loss=58.094, backward_time=1.029, grad_norm=167.485, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.482e-05, train_time=2.731
+[gpub002:0/64] 2023-07-13 07:50:53,784 (trainer:732) INFO: 43epoch:train:6001-6100batch: iter_time=1.131e-04, forward_time=0.146, loss_ctc=65.614, loss_att=46.575, acc=0.717, loss=52.287, backward_time=1.029, grad_norm=119.203, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.481e-05, train_time=2.718
+[gpub002:0/64] 2023-07-13 07:53:27,460 (trainer:732) INFO: 43epoch:train:6101-6200batch: iter_time=1.129e-04, forward_time=0.144, loss_ctc=75.060, loss_att=54.210, acc=0.707, loss=60.465, backward_time=1.046, grad_norm=113.546, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.481e-05, train_time=3.073
+[gpub002:0/64] 2023-07-13 07:55:51,083 (trainer:732) INFO: 43epoch:train:6201-6300batch: iter_time=3.027e-04, forward_time=0.179, loss_ctc=66.883, loss_att=50.860, acc=0.711, loss=55.667, backward_time=1.034, grad_norm=113.637, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.480e-05, train_time=2.872
+[gpub002:0/64] 2023-07-13 07:58:09,272 (trainer:732) INFO: 43epoch:train:6301-6400batch: iter_time=1.044e-04, forward_time=0.163, loss_ctc=73.490, loss_att=54.979, acc=0.704, loss=60.532, backward_time=1.030, grad_norm=134.206, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.479e-05, train_time=2.764
+[gpub002:0/64] 2023-07-13 08:00:24,736 (trainer:732) INFO: 43epoch:train:6401-6500batch: iter_time=1.026e-04, forward_time=0.144, loss_ctc=65.033, loss_att=41.165, acc=0.730, loss=48.326, backward_time=1.025, grad_norm=125.179, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.479e-05, train_time=2.709
+[gpub002:0/64] 2023-07-13 08:02:40,579 (trainer:732) INFO: 43epoch:train:6501-6600batch: iter_time=1.147e-04, forward_time=0.144, loss_ctc=67.103, loss_att=47.658, acc=0.719, loss=53.491, backward_time=1.028, grad_norm=121.695, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.478e-05, train_time=2.717
+[gpub002:0/64] 2023-07-13 08:04:11,900 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub002:0/64] 2023-07-13 08:04:30,016 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 08:04:33,389 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabfaa0f4f0>)
+[gpub002:0/64] 2023-07-13 08:04:33,389 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub002:0/64] 2023-07-13 08:04:33,395 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 08:09:38,546 (trainer:732) INFO: 43epoch:train:6601-6700batch: iter_time=1.336, forward_time=0.186, loss_ctc=69.082, loss_att=51.109, acc=0.714, loss=56.501, backward_time=1.038, grad_norm=108.294, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.185, optim0_lr0=5.477e-05, train_time=8.359
+[gpub002:0/64] 2023-07-13 08:10:34,386 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-13 08:12:01,808 (trainer:732) INFO: 43epoch:train:6701-6800batch: iter_time=1.327e-04, forward_time=0.145, loss_ctc=67.704, loss_att=51.773, acc=0.711, loss=56.552, backward_time=1.037, grad_norm=130.635, clip=100.000, loss_scale=1.109e+32, optim_step_time=0.182, optim0_lr0=5.477e-05, train_time=2.865
+[gpub002:0/64] 2023-07-13 08:14:23,253 (trainer:732) INFO: 43epoch:train:6801-6900batch: iter_time=1.293e-04, forward_time=0.145, loss_ctc=77.182, loss_att=56.548, acc=0.705, loss=62.738, backward_time=1.042, grad_norm=118.840, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.476e-05, train_time=2.829
+[gpub002:0/64] 2023-07-13 08:16:48,767 (trainer:732) INFO: 43epoch:train:6901-7000batch: iter_time=1.451e-04, forward_time=0.145, loss_ctc=60.933, loss_att=43.482, acc=0.718, loss=48.717, backward_time=1.039, grad_norm=142.200, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.476e-05, train_time=2.910
+[gpub002:0/64] 2023-07-13 08:19:18,077 (trainer:732) INFO: 43epoch:train:7001-7100batch: iter_time=1.466e-04, forward_time=0.146, loss_ctc=80.225, loss_att=60.938, acc=0.714, loss=66.724, backward_time=1.043, grad_norm=128.693, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.475e-05, train_time=2.986
+[gpub002:0/64] 2023-07-13 08:21:41,636 (trainer:732) INFO: 43epoch:train:7101-7200batch: iter_time=1.517e-04, forward_time=0.146, loss_ctc=62.221, loss_att=49.382, acc=0.708, loss=53.234, backward_time=1.042, grad_norm=137.390, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.474e-05, train_time=2.871
+[gpub002:0/64] 2023-07-13 08:24:09,625 (trainer:732) INFO: 43epoch:train:7201-7300batch: iter_time=1.485e-04, forward_time=0.145, loss_ctc=69.377, loss_att=47.292, acc=0.709, loss=53.917, backward_time=1.058, grad_norm=123.920, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.474e-05, train_time=2.960
+[gpub002:0/64] 2023-07-13 08:26:25,316 (trainer:732) INFO: 43epoch:train:7301-7400batch: iter_time=1.355e-04, forward_time=0.146, loss_ctc=62.734, loss_att=42.130, acc=0.731, loss=48.311, backward_time=1.028, grad_norm=112.463, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.473e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 08:28:42,136 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub002:0/64] 2023-07-13 08:29:00,185 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 08:29:03,836 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fac1754fca0>)
+[gpub002:0/64] 2023-07-13 08:29:03,836 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub002:0/64] 2023-07-13 08:29:03,842 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 08:31:55,060 (trainer:732) INFO: 43epoch:train:7401-7500batch: iter_time=1.410, forward_time=0.173, loss_ctc=70.908, loss_att=52.086, acc=0.730, loss=57.732, backward_time=1.031, grad_norm=103.567, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.472e-05, train_time=6.595
+[gpub002:0/64] 2023-07-13 08:34:14,214 (trainer:732) INFO: 43epoch:train:7501-7600batch: iter_time=1.180e-04, forward_time=0.147, loss_ctc=66.889, loss_att=51.014, acc=0.718, loss=55.777, backward_time=1.039, grad_norm=122.067, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.472e-05, train_time=2.783
+[gpub002:0/64] 2023-07-13 08:36:31,213 (trainer:732) INFO: 43epoch:train:7601-7700batch: iter_time=1.128e-04, forward_time=0.146, loss_ctc=76.393, loss_att=55.664, acc=0.714, loss=61.882, backward_time=1.029, grad_norm=138.860, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.471e-05, train_time=2.740
+[gpub002:0/64] 2023-07-13 08:38:47,398 (trainer:732) INFO: 43epoch:train:7701-7800batch: iter_time=1.169e-04, forward_time=0.146, loss_ctc=62.356, loss_att=44.020, acc=0.725, loss=49.521, backward_time=1.030, grad_norm=118.319, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.470e-05, train_time=2.723
+[gpub002:0/64] 2023-07-13 08:41:05,554 (trainer:732) INFO: 43epoch:train:7801-7900batch: iter_time=1.119e-04, forward_time=0.146, loss_ctc=79.362, loss_att=60.799, acc=0.719, loss=66.368, backward_time=1.031, grad_norm=129.499, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.470e-05, train_time=2.763
+[gpub002:0/64] 2023-07-13 08:43:32,372 (trainer:732) INFO: 43epoch:train:7901-8000batch: iter_time=1.138e-04, forward_time=0.145, loss_ctc=62.046, loss_att=49.851, acc=0.708, loss=53.509, backward_time=1.046, grad_norm=124.743, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.469e-05, train_time=2.936
+[gpub002:0/64] 2023-07-13 08:45:48,570 (trainer:732) INFO: 43epoch:train:8001-8100batch: iter_time=1.225e-04, forward_time=0.144, loss_ctc=67.225, loss_att=44.208, acc=0.725, loss=51.113, backward_time=1.027, grad_norm=118.012, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.468e-05, train_time=2.724
+[gpub002:0/64] 2023-07-13 08:48:07,719 (trainer:732) INFO: 43epoch:train:8101-8200batch: iter_time=1.209e-04, forward_time=0.146, loss_ctc=64.666, loss_att=43.302, acc=0.727, loss=49.711, backward_time=1.038, grad_norm=118.883, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.468e-05, train_time=2.783
+[gpub002:0/64] 2023-07-13 08:50:32,893 (trainer:732) INFO: 43epoch:train:8201-8300batch: iter_time=1.252e-04, forward_time=0.145, loss_ctc=70.428, loss_att=52.218, acc=0.734, loss=57.681, backward_time=1.049, grad_norm=109.704, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.467e-05, train_time=2.903
+[gpub002:0/64] 2023-07-13 08:51:36,412 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub002:0/64] 2023-07-13 08:51:54,619 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 08:51:58,008 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad36bb32e0>)
+[gpub002:0/64] 2023-07-13 08:51:58,008 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub002:0/64] 2023-07-13 08:51:58,097 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 08:58:16,119 (trainer:732) INFO: 43epoch:train:8301-8400batch: iter_time=3.044, forward_time=0.205, loss_ctc=68.169, loss_att=54.539, acc=0.704, loss=58.628, backward_time=1.054, grad_norm=109.947, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.186, optim0_lr0=5.466e-05, train_time=9.264
+[gpub002:0/64] 2023-07-13 09:00:33,779 (trainer:732) INFO: 43epoch:train:8401-8500batch: iter_time=1.245e-04, forward_time=0.145, loss_ctc=71.006, loss_att=51.275, acc=0.715, loss=57.194, backward_time=1.029, grad_norm=111.833, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.466e-05, train_time=2.753
+[gpub002:0/64] 2023-07-13 09:02:50,313 (trainer:732) INFO: 43epoch:train:8501-8600batch: iter_time=1.221e-04, forward_time=0.146, loss_ctc=65.216, loss_att=46.304, acc=0.720, loss=51.978, backward_time=1.031, grad_norm=114.151, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.465e-05, train_time=2.730
+[gpub002:0/64] 2023-07-13 09:05:11,317 (trainer:732) INFO: 43epoch:train:8601-8700batch: iter_time=1.264e-04, forward_time=0.144, loss_ctc=75.105, loss_att=54.233, acc=0.710, loss=60.494, backward_time=1.034, grad_norm=141.900, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.464e-05, train_time=2.820
+[gpub002:0/64] 2023-07-13 09:07:26,944 (trainer:732) INFO: 43epoch:train:8701-8800batch: iter_time=1.226e-04, forward_time=0.144, loss_ctc=65.886, loss_att=50.674, acc=0.710, loss=55.238, backward_time=1.027, grad_norm=114.949, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.464e-05, train_time=2.712
+[gpub002:0/64] 2023-07-13 09:09:54,294 (trainer:732) INFO: 43epoch:train:8801-8900batch: iter_time=1.217e-04, forward_time=0.145, loss_ctc=72.328, loss_att=54.323, acc=0.708, loss=59.725, backward_time=1.040, grad_norm=129.091, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.463e-05, train_time=2.947
+[gpub002:0/64] 2023-07-13 09:12:09,955 (trainer:732) INFO: 43epoch:train:8901-9000batch: iter_time=1.209e-04, forward_time=0.144, loss_ctc=62.790, loss_att=40.413, acc=0.733, loss=47.126, backward_time=1.026, grad_norm=123.009, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.462e-05, train_time=2.713
+[gpub002:0/64] 2023-07-13 09:14:30,267 (trainer:732) INFO: 43epoch:train:9001-9100batch: iter_time=1.184e-04, forward_time=0.144, loss_ctc=65.546, loss_att=47.434, acc=0.723, loss=52.868, backward_time=1.030, grad_norm=126.202, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.462e-05, train_time=2.806
+[gpub002:0/64] 2023-07-13 09:16:03,133 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub002:0/64] 2023-07-13 09:16:21,262 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 09:16:24,730 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabc151b430>)
+[gpub002:0/64] 2023-07-13 09:16:24,730 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub002:0/64] 2023-07-13 09:16:24,737 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 09:19:55,353 (trainer:732) INFO: 43epoch:train:9101-9200batch: iter_time=1.299, forward_time=0.166, loss_ctc=68.022, loss_att=50.137, acc=0.719, loss=55.502, backward_time=1.039, grad_norm=114.933, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.461e-05, train_time=6.502
+[gpub002:0/64] 2023-07-13 09:22:18,425 (trainer:732) INFO: 43epoch:train:9201-9300batch: iter_time=0.002, forward_time=0.182, loss_ctc=67.757, loss_att=51.479, acc=0.719, loss=56.362, backward_time=1.044, grad_norm=114.798, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.184, optim0_lr0=5.460e-05, train_time=2.861
+[gpub002:0/64] 2023-07-13 09:24:35,968 (trainer:732) INFO: 43epoch:train:9301-9400batch: iter_time=1.123e-04, forward_time=0.148, loss_ctc=75.353, loss_att=56.327, acc=0.716, loss=62.035, backward_time=1.030, grad_norm=143.157, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.460e-05, train_time=2.751
+[gpub002:0/64] 2023-07-13 09:26:52,066 (trainer:732) INFO: 43epoch:train:9401-9500batch: iter_time=1.172e-04, forward_time=0.145, loss_ctc=63.167, loss_att=43.706, acc=0.722, loss=49.544, backward_time=1.028, grad_norm=124.127, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.459e-05, train_time=2.722
+[gpub002:0/64] 2023-07-13 09:29:08,787 (trainer:732) INFO: 43epoch:train:9501-9600batch: iter_time=1.175e-04, forward_time=0.145, loss_ctc=78.082, loss_att=59.968, acc=0.719, loss=65.402, backward_time=1.029, grad_norm=139.813, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.459e-05, train_time=2.734
+[gpub002:0/64] 2023-07-13 09:31:24,647 (trainer:732) INFO: 43epoch:train:9601-9700batch: iter_time=1.180e-04, forward_time=0.145, loss_ctc=62.075, loss_att=49.317, acc=0.717, loss=53.145, backward_time=1.028, grad_norm=129.904, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.458e-05, train_time=2.717
+[gpub002:0/64] 2023-07-13 09:33:40,464 (trainer:732) INFO: 43epoch:train:9701-9800batch: iter_time=1.205e-04, forward_time=0.145, loss_ctc=67.821, loss_att=46.388, acc=0.718, loss=52.818, backward_time=1.029, grad_norm=116.293, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.457e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 09:35:56,258 (trainer:732) INFO: 43epoch:train:9801-9900batch: iter_time=1.285e-04, forward_time=0.146, loss_ctc=62.427, loss_att=40.698, acc=0.739, loss=47.217, backward_time=1.028, grad_norm=116.885, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.457e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 09:38:11,996 (trainer:732) INFO: 43epoch:train:9901-10000batch: iter_time=1.175e-04, forward_time=0.146, loss_ctc=69.128, loss_att=50.402, acc=0.737, loss=56.020, backward_time=1.028, grad_norm=113.548, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.456e-05, train_time=2.715
+[gpub002:0/64] 2023-07-13 09:51:01,601 (trainer:338) INFO: 43epoch results: [train] iter_time=0.220, forward_time=0.151, loss_ctc=69.174, loss_att=50.360, acc=0.716, loss=56.005, backward_time=1.034, grad_norm=122.264, clip=100.000, loss_scale=2.614e+32, optim_step_time=0.182, optim0_lr0=5.488e-05, train_time=3.443, time=4 hours, 47 minutes and 9.28 seconds, total_count=400000, gpu_max_cached_mem_GB=37.574, [valid] loss_ctc=44.035, cer_ctc=0.257, loss_att=36.329, acc=0.704, cer=0.323, wer=0.986, loss=38.641, time=6 minutes and 40.54 seconds, total_count=40986, gpu_max_cached_mem_GB=37.574, [att_plot] time=5 minutes and 57.64 seconds, total_count=0, gpu_max_cached_mem_GB=37.574
+[gpub002:0/64] 2023-07-13 09:51:20,539 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count
+[gpub002:0/64] 2023-07-13 09:51:20,601 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/23epoch.pth
+[gpub002:0/64] 2023-07-13 09:51:20,630 (trainer:272) INFO: 44/50epoch started. Estimated time to finish: 1 day, 11 hours and 55 minutes
+[gpub002:0/64] 2023-07-13 09:51:21,201 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub002:0/64] 2023-07-13 09:51:40,196 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 09:51:43,685 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb6c2aa3e20>)
+[gpub002:0/64] 2023-07-13 09:51:43,685 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub002:0/64] 2023-07-13 09:51:43,691 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 09:59:49,160 (trainer:732) INFO: 44epoch:train:1-100batch: iter_time=3.652, forward_time=0.172, loss_ctc=71.282, loss_att=54.434, acc=0.717, loss=59.488, backward_time=1.044, grad_norm=125.239, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.455e-05, train_time=10.160
+[gpub002:0/64] 2023-07-13 10:02:06,233 (trainer:732) INFO: 44epoch:train:101-200batch: iter_time=1.160e-04, forward_time=0.145, loss_ctc=74.775, loss_att=56.050, acc=0.700, loss=61.668, backward_time=1.031, grad_norm=122.349, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.455e-05, train_time=2.742
+[gpub002:0/64] 2023-07-13 10:04:22,775 (trainer:732) INFO: 44epoch:train:201-300batch: iter_time=1.130e-04, forward_time=0.146, loss_ctc=87.979, loss_att=66.421, acc=0.708, loss=72.889, backward_time=1.032, grad_norm=117.444, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.454e-05, train_time=2.731
+[gpub002:0/64] 2023-07-13 10:06:38,707 (trainer:732) INFO: 44epoch:train:301-400batch: iter_time=1.104e-04, forward_time=0.144, loss_ctc=70.492, loss_att=49.371, acc=0.696, loss=55.707, backward_time=1.027, grad_norm=120.741, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.453e-05, train_time=2.718
+[gpub002:0/64] 2023-07-13 10:08:58,696 (trainer:732) INFO: 44epoch:train:401-500batch: iter_time=1.123e-04, forward_time=0.144, loss_ctc=76.164, loss_att=60.005, acc=0.695, loss=64.852, backward_time=1.030, grad_norm=120.304, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.453e-05, train_time=2.800
+[gpub002:0/64] 2023-07-13 10:11:18,222 (trainer:732) INFO: 44epoch:train:501-600batch: iter_time=1.088e-04, forward_time=0.144, loss_ctc=86.092, loss_att=56.917, acc=0.713, loss=65.670, backward_time=1.031, grad_norm=137.885, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.452e-05, train_time=2.790
+[gpub002:0/64] 2023-07-13 10:13:52,188 (trainer:732) INFO: 44epoch:train:601-700batch: iter_time=1.207e-04, forward_time=0.145, loss_ctc=72.733, loss_att=54.095, acc=0.720, loss=59.686, backward_time=1.042, grad_norm=123.779, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.451e-05, train_time=3.079
+[gpub002:0/64] 2023-07-13 10:16:22,649 (trainer:732) INFO: 44epoch:train:701-800batch: iter_time=1.176e-04, forward_time=0.145, loss_ctc=76.359, loss_att=63.914, acc=0.695, loss=67.647, backward_time=1.055, grad_norm=120.020, clip=100.000, loss_scale=1.314e+32, optim_step_time=0.182, optim0_lr0=5.451e-05, train_time=3.009
+[gpub002:0/64] 2023-07-13 10:17:24,426 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub002:0/64] 2023-07-13 10:17:42,352 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 10:17:45,723 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb6c2aa0460>)
+[gpub002:0/64] 2023-07-13 10:17:45,723 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub002:0/64] 2023-07-13 10:17:45,730 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 10:24:20,092 (trainer:732) INFO: 44epoch:train:801-900batch: iter_time=3.299, forward_time=0.177, loss_ctc=75.207, loss_att=54.676, acc=0.718, loss=60.835, backward_time=1.047, grad_norm=125.033, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=5.450e-05, train_time=9.548
+[gpub002:0/64] 2023-07-13 10:26:36,519 (trainer:732) INFO: 44epoch:train:901-1000batch: iter_time=1.216e-04, forward_time=0.145, loss_ctc=74.822, loss_att=57.463, acc=0.693, loss=62.670, backward_time=1.028, grad_norm=130.012, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.449e-05, train_time=2.729
+[gpub002:0/64] 2023-07-13 10:28:53,173 (trainer:732) INFO: 44epoch:train:1001-1100batch: iter_time=1.127e-04, forward_time=0.147, loss_ctc=83.374, loss_att=63.558, acc=0.698, loss=69.502, backward_time=1.030, grad_norm=144.799, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.449e-05, train_time=2.733
+[gpub002:0/64] 2023-07-13 10:31:09,555 (trainer:732) INFO: 44epoch:train:1101-1200batch: iter_time=1.185e-04, forward_time=0.145, loss_ctc=77.286, loss_att=52.646, acc=0.715, loss=60.038, backward_time=1.031, grad_norm=129.265, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.448e-05, train_time=2.727
+[gpub002:0/64] 2023-07-13 10:33:25,148 (trainer:732) INFO: 44epoch:train:1201-1300batch: iter_time=1.248e-04, forward_time=0.143, loss_ctc=71.158, loss_att=53.092, acc=0.697, loss=58.512, backward_time=1.026, grad_norm=97.542, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.447e-05, train_time=2.712
+[gpub002:0/64] 2023-07-13 10:35:40,885 (trainer:732) INFO: 44epoch:train:1301-1400batch: iter_time=1.248e-04, forward_time=0.145, loss_ctc=79.055, loss_att=55.984, acc=0.699, loss=62.905, backward_time=1.028, grad_norm=113.589, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.447e-05, train_time=2.715
+[gpub002:0/64] 2023-07-13 10:37:56,583 (trainer:732) INFO: 44epoch:train:1401-1500batch: iter_time=1.192e-04, forward_time=0.145, loss_ctc=76.489, loss_att=55.112, acc=0.717, loss=61.525, backward_time=1.027, grad_norm=124.920, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.446e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 10:40:12,676 (trainer:732) INFO: 44epoch:train:1501-1600batch: iter_time=1.402e-04, forward_time=0.145, loss_ctc=71.799, loss_att=59.313, acc=0.689, loss=63.059, backward_time=1.031, grad_norm=120.829, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.446e-05, train_time=2.722
+[gpub002:0/64] 2023-07-13 10:41:44,379 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub002:0/64] 2023-07-13 10:42:02,839 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 10:42:06,277 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad4104bca0>)
+[gpub002:0/64] 2023-07-13 10:42:06,277 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub002:0/64] 2023-07-13 10:42:06,283 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 10:45:52,981 (trainer:732) INFO: 44epoch:train:1601-1700batch: iter_time=1.287, forward_time=0.144, loss_ctc=80.146, loss_att=60.139, acc=0.710, loss=66.141, backward_time=1.038, grad_norm=125.126, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.445e-05, train_time=6.806
+[gpub002:0/64] 2023-07-13 10:48:09,910 (trainer:732) INFO: 44epoch:train:1701-1800batch: iter_time=1.102e-04, forward_time=0.146, loss_ctc=70.303, loss_att=54.003, acc=0.711, loss=58.893, backward_time=1.033, grad_norm=114.713, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.444e-05, train_time=2.738
+[gpub002:0/64] 2023-07-13 10:50:25,669 (trainer:732) INFO: 44epoch:train:1801-1900batch: iter_time=1.468e-04, forward_time=0.146, loss_ctc=76.075, loss_att=61.114, acc=0.693, loss=65.602, backward_time=1.028, grad_norm=138.441, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.444e-05, train_time=2.715
+[gpub002:0/64] 2023-07-13 10:52:41,672 (trainer:732) INFO: 44epoch:train:1901-2000batch: iter_time=1.075e-04, forward_time=0.144, loss_ctc=83.849, loss_att=56.957, acc=0.718, loss=65.025, backward_time=1.029, grad_norm=117.446, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.443e-05, train_time=2.720
+[gpub002:0/64] 2023-07-13 10:54:57,290 (trainer:732) INFO: 44epoch:train:2001-2100batch: iter_time=1.074e-04, forward_time=0.145, loss_ctc=67.877, loss_att=48.882, acc=0.702, loss=54.581, backward_time=1.026, grad_norm=113.885, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.442e-05, train_time=2.712
+[gpub002:0/64] 2023-07-13 10:57:13,426 (trainer:732) INFO: 44epoch:train:2101-2200batch: iter_time=9.840e-05, forward_time=0.144, loss_ctc=81.071, loss_att=59.103, acc=0.698, loss=65.693, backward_time=1.029, grad_norm=131.853, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.442e-05, train_time=2.723
+[gpub002:0/64] 2023-07-13 10:59:32,038 (trainer:732) INFO: 44epoch:train:2201-2300batch: iter_time=1.123e-04, forward_time=0.145, loss_ctc=74.573, loss_att=53.223, acc=0.709, loss=59.628, backward_time=1.030, grad_norm=119.358, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.441e-05, train_time=2.772
+[gpub002:0/64] 2023-07-13 11:01:50,330 (trainer:732) INFO: 44epoch:train:2301-2400batch: iter_time=1.018e-04, forward_time=0.145, loss_ctc=75.625, loss_att=60.504, acc=0.695, loss=65.040, backward_time=1.041, grad_norm=128.773, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.440e-05, train_time=2.766
+[gpub002:0/64] 2023-07-13 11:04:07,084 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub002:0/64] 2023-07-13 11:04:25,422 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 11:04:28,900 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fac69e93640>)
+[gpub002:0/64] 2023-07-13 11:04:28,900 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub002:0/64] 2023-07-13 11:04:28,906 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 11:08:22,215 (trainer:732) INFO: 44epoch:train:2401-2500batch: iter_time=1.295, forward_time=0.144, loss_ctc=78.132, loss_att=62.868, acc=0.704, loss=67.447, backward_time=1.036, grad_norm=137.248, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.440e-05, train_time=7.837
+[gpub002:0/64] 2023-07-13 11:10:40,796 (trainer:732) INFO: 44epoch:train:2501-2600batch: iter_time=1.172e-04, forward_time=0.144, loss_ctc=67.608, loss_att=49.652, acc=0.723, loss=55.039, backward_time=1.036, grad_norm=147.316, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.439e-05, train_time=2.771
+[gpub002:0/64] 2023-07-13 11:12:56,584 (trainer:732) INFO: 44epoch:train:2601-2700batch: iter_time=1.208e-04, forward_time=0.144, loss_ctc=73.380, loss_att=58.680, acc=0.696, loss=63.090, backward_time=1.029, grad_norm=223.909, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.438e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 11:15:12,427 (trainer:732) INFO: 44epoch:train:2701-2800batch: iter_time=1.109e-04, forward_time=0.145, loss_ctc=86.240, loss_att=61.095, acc=0.709, loss=68.639, backward_time=1.027, grad_norm=160.636, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.438e-05, train_time=2.717
+[gpub002:0/64] 2023-07-13 11:17:27,987 (trainer:732) INFO: 44epoch:train:2801-2900batch: iter_time=1.162e-04, forward_time=0.144, loss_ctc=68.542, loss_att=47.662, acc=0.704, loss=53.926, backward_time=1.026, grad_norm=161.052, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.437e-05, train_time=2.711
+[gpub002:0/64] 2023-07-13 11:19:43,703 (trainer:732) INFO: 44epoch:train:2901-3000batch: iter_time=1.479e-04, forward_time=0.145, loss_ctc=74.838, loss_att=58.199, acc=0.693, loss=63.191, backward_time=1.028, grad_norm=127.448, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.437e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 11:21:59,433 (trainer:732) INFO: 44epoch:train:3001-3100batch: iter_time=1.218e-04, forward_time=0.144, loss_ctc=85.103, loss_att=56.031, acc=0.712, loss=64.753, backward_time=1.027, grad_norm=130.375, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.436e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 11:24:15,402 (trainer:732) INFO: 44epoch:train:3101-3200batch: iter_time=1.117e-04, forward_time=0.145, loss_ctc=70.848, loss_att=54.267, acc=0.711, loss=59.241, backward_time=1.029, grad_norm=107.471, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.435e-05, train_time=2.719
+[gpub002:0/64] 2023-07-13 11:26:31,345 (trainer:732) INFO: 44epoch:train:3201-3300batch: iter_time=1.136e-04, forward_time=0.145, loss_ctc=78.308, loss_att=65.274, acc=0.688, loss=69.184, backward_time=1.030, grad_norm=163.070, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.435e-05, train_time=2.719
+[gpub002:0/64] 2023-07-13 11:27:16,348 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub002:0/64] 2023-07-13 11:27:34,316 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 11:27:37,764 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb6c86609d0>)
+[gpub002:0/64] 2023-07-13 11:27:37,764 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub002:0/64] 2023-07-13 11:27:37,771 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 11:34:38,710 (trainer:732) INFO: 44epoch:train:3301-3400batch: iter_time=1.305, forward_time=0.144, loss_ctc=71.977, loss_att=52.228, acc=0.724, loss=58.153, backward_time=1.042, grad_norm=140.230, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.434e-05, train_time=9.747
+[gpub002:0/64] 2023-07-13 11:36:55,659 (trainer:732) INFO: 44epoch:train:3401-3500batch: iter_time=1.283e-04, forward_time=0.145, loss_ctc=73.853, loss_att=55.371, acc=0.710, loss=60.915, backward_time=1.029, grad_norm=146.633, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.433e-05, train_time=2.739
+[gpub002:0/64] 2023-07-13 11:39:16,602 (trainer:732) INFO: 44epoch:train:3501-3600batch: iter_time=1.104e-04, forward_time=0.146, loss_ctc=82.166, loss_att=61.239, acc=0.713, loss=67.517, backward_time=1.055, grad_norm=156.465, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.193, optim0_lr0=5.433e-05, train_time=2.819
+[gpub002:0/64] 2023-07-13 11:41:35,797 (trainer:732) INFO: 44epoch:train:3601-3700batch: iter_time=1.001e-04, forward_time=0.146, loss_ctc=76.586, loss_att=51.918, acc=0.716, loss=59.318, backward_time=1.044, grad_norm=124.601, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.193, optim0_lr0=5.432e-05, train_time=2.784
+[gpub002:0/64] 2023-07-13 11:43:51,423 (trainer:732) INFO: 44epoch:train:3701-3800batch: iter_time=9.900e-05, forward_time=0.145, loss_ctc=69.296, loss_att=53.491, acc=0.701, loss=58.232, backward_time=1.027, grad_norm=124.208, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.431e-05, train_time=2.712
+[gpub002:0/64] 2023-07-13 11:46:07,819 (trainer:732) INFO: 44epoch:train:3801-3900batch: iter_time=1.039e-04, forward_time=0.147, loss_ctc=78.553, loss_att=56.210, acc=0.707, loss=62.913, backward_time=1.031, grad_norm=124.973, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.431e-05, train_time=2.728
+[gpub002:0/64] 2023-07-13 11:48:28,053 (trainer:732) INFO: 44epoch:train:3901-4000batch: iter_time=1.387e-04, forward_time=0.148, loss_ctc=74.950, loss_att=55.969, acc=0.726, loss=61.664, backward_time=1.037, grad_norm=137.290, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.430e-05, train_time=2.804
+[gpub002:0/64] 2023-07-13 11:50:48,088 (trainer:732) INFO: 44epoch:train:4001-4100batch: iter_time=1.290e-04, forward_time=0.148, loss_ctc=70.087, loss_att=56.682, acc=0.705, loss=60.703, backward_time=1.035, grad_norm=149.974, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.429e-05, train_time=2.800
+[gpub002:0/64] 2023-07-13 11:52:19,107 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub002:0/64] 2023-07-13 11:52:37,276 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 11:52:40,687 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabbe008a30>)
+[gpub002:0/64] 2023-07-13 11:52:40,687 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub002:0/64] 2023-07-13 11:52:40,693 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 11:59:04,406 (trainer:732) INFO: 44epoch:train:4101-4200batch: iter_time=1.307, forward_time=0.146, loss_ctc=78.980, loss_att=60.456, acc=0.713, loss=66.013, backward_time=1.053, grad_norm=139.801, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.429e-05, train_time=9.926
+[gpub002:0/64] 2023-07-13 12:01:20,686 (trainer:732) INFO: 44epoch:train:4201-4300batch: iter_time=1.136e-04, forward_time=0.146, loss_ctc=70.147, loss_att=53.310, acc=0.720, loss=58.362, backward_time=1.030, grad_norm=116.468, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.428e-05, train_time=2.725
+[gpub002:0/64] 2023-07-13 12:03:37,446 (trainer:732) INFO: 44epoch:train:4301-4400batch: iter_time=1.102e-04, forward_time=0.145, loss_ctc=76.582, loss_att=59.622, acc=0.708, loss=64.710, backward_time=1.032, grad_norm=126.272, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.428e-05, train_time=2.735
+[gpub002:0/64] 2023-07-13 12:05:53,280 (trainer:732) INFO: 44epoch:train:4401-4500batch: iter_time=1.157e-04, forward_time=0.145, loss_ctc=82.415, loss_att=56.209, acc=0.724, loss=64.071, backward_time=1.028, grad_norm=129.616, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.427e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 12:08:08,646 (trainer:732) INFO: 44epoch:train:4501-4600batch: iter_time=1.143e-04, forward_time=0.145, loss_ctc=65.252, loss_att=46.355, acc=0.705, loss=52.024, backward_time=1.024, grad_norm=116.158, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.426e-05, train_time=2.707
+[gpub002:0/64] 2023-07-13 12:10:24,749 (trainer:732) INFO: 44epoch:train:4601-4700batch: iter_time=1.158e-04, forward_time=0.145, loss_ctc=80.559, loss_att=58.522, acc=0.706, loss=65.133, backward_time=1.030, grad_norm=160.477, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.426e-05, train_time=2.722
+[gpub002:0/64] 2023-07-13 12:12:40,507 (trainer:732) INFO: 44epoch:train:4701-4800batch: iter_time=1.147e-04, forward_time=0.146, loss_ctc=76.103, loss_att=53.892, acc=0.722, loss=60.555, backward_time=1.027, grad_norm=118.492, clip=100.000, loss_scale=2.629e+32, optim_step_time=0.181, optim0_lr0=5.425e-05, train_time=2.715
+[gpub002:0/64] 2023-07-13 12:14:56,545 (trainer:732) INFO: 44epoch:train:4801-4900batch: iter_time=1.162e-04, forward_time=0.146, loss_ctc=76.099, loss_att=61.644, acc=0.708, loss=65.980, backward_time=1.030, grad_norm=121.373, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.424e-05, train_time=2.721
+[gpub002:0/64] 2023-07-13 12:17:11,370 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub002:0/64] 2023-07-13 12:17:29,722 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 12:17:33,178 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabc05b7640>)
+[gpub002:0/64] 2023-07-13 12:17:33,178 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub002:0/64] 2023-07-13 12:17:33,184 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 12:23:24,781 (trainer:732) INFO: 44epoch:train:4901-5000batch: iter_time=1.333, forward_time=0.145, loss_ctc=75.905, loss_att=57.847, acc=0.711, loss=63.264, backward_time=1.039, grad_norm=126.648, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.424e-05, train_time=10.165
+[gpub002:0/64] 2023-07-13 12:25:45,045 (trainer:732) INFO: 44epoch:train:5001-5100batch: iter_time=1.084e-04, forward_time=0.146, loss_ctc=70.805, loss_att=55.156, acc=0.702, loss=59.850, backward_time=1.041, grad_norm=116.577, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.423e-05, train_time=2.805
+[gpub002:0/64] 2023-07-13 12:28:00,589 (trainer:732) INFO: 44epoch:train:5101-5200batch: iter_time=1.271e-04, forward_time=0.145, loss_ctc=78.881, loss_att=61.680, acc=0.692, loss=66.840, backward_time=1.027, grad_norm=123.318, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.422e-05, train_time=2.711
+[gpub002:0/64] 2023-07-13 12:30:16,542 (trainer:732) INFO: 44epoch:train:5201-5300batch: iter_time=1.268e-04, forward_time=0.147, loss_ctc=80.888, loss_att=54.612, acc=0.726, loss=62.495, backward_time=1.029, grad_norm=126.226, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.422e-05, train_time=2.719
+[gpub002:0/64] 2023-07-13 12:32:32,337 (trainer:732) INFO: 44epoch:train:5301-5400batch: iter_time=1.301e-04, forward_time=0.146, loss_ctc=65.520, loss_att=48.765, acc=0.704, loss=53.792, backward_time=1.028, grad_norm=115.361, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.421e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 12:34:59,810 (trainer:732) INFO: 44epoch:train:5401-5500batch: iter_time=1.201e-04, forward_time=0.146, loss_ctc=77.603, loss_att=55.499, acc=0.696, loss=62.130, backward_time=1.038, grad_norm=131.812, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.421e-05, train_time=2.949
+[gpub002:0/64] 2023-07-13 12:37:15,536 (trainer:732) INFO: 44epoch:train:5501-5600batch: iter_time=1.168e-04, forward_time=0.144, loss_ctc=73.151, loss_att=52.695, acc=0.714, loss=58.832, backward_time=1.027, grad_norm=108.368, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.420e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 12:39:31,588 (trainer:732) INFO: 44epoch:train:5601-5700batch: iter_time=1.156e-04, forward_time=0.145, loss_ctc=77.041, loss_att=62.776, acc=0.692, loss=67.056, backward_time=1.030, grad_norm=117.006, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.419e-05, train_time=2.721
+[gpub002:0/64] 2023-07-13 12:41:47,589 (trainer:732) INFO: 44epoch:train:5701-5800batch: iter_time=1.217e-04, forward_time=0.145, loss_ctc=75.964, loss_att=57.575, acc=0.711, loss=63.092, backward_time=1.029, grad_norm=131.182, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.419e-05, train_time=2.720
+[gpub002:0/64] 2023-07-13 12:42:32,777 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub002:0/64] 2023-07-13 12:42:51,004 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 12:42:54,458 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad420df7c0>)
+[gpub002:0/64] 2023-07-13 12:42:54,458 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub002:0/64] 2023-07-13 12:42:54,464 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 12:48:25,155 (trainer:732) INFO: 44epoch:train:5801-5900batch: iter_time=1.344, forward_time=0.188, loss_ctc=69.328, loss_att=52.361, acc=0.722, loss=57.451, backward_time=1.039, grad_norm=118.642, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.418e-05, train_time=7.951
+[gpub002:0/64] 2023-07-13 12:50:41,794 (trainer:732) INFO: 44epoch:train:5901-6000batch: iter_time=1.069e-04, forward_time=0.145, loss_ctc=71.719, loss_att=54.368, acc=0.713, loss=59.573, backward_time=1.028, grad_norm=148.006, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.417e-05, train_time=2.733
+[gpub002:0/64] 2023-07-13 12:52:58,663 (trainer:732) INFO: 44epoch:train:6001-6100batch: iter_time=1.112e-04, forward_time=0.146, loss_ctc=82.110, loss_att=60.893, acc=0.716, loss=67.258, backward_time=1.032, grad_norm=162.632, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.417e-05, train_time=2.737
+[gpub002:0/64] 2023-07-13 12:55:14,739 (trainer:732) INFO: 44epoch:train:6101-6200batch: iter_time=1.246e-04, forward_time=0.147, loss_ctc=76.069, loss_att=50.970, acc=0.722, loss=58.499, backward_time=1.030, grad_norm=127.718, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.416e-05, train_time=2.721
+[gpub002:0/64] 2023-07-13 12:57:31,152 (trainer:732) INFO: 44epoch:train:6201-6300batch: iter_time=1.203e-04, forward_time=0.147, loss_ctc=68.497, loss_att=51.883, acc=0.704, loss=56.867, backward_time=1.031, grad_norm=116.576, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.415e-05, train_time=2.728
+[gpub002:0/64] 2023-07-13 12:59:47,343 (trainer:732) INFO: 44epoch:train:6301-6400batch: iter_time=1.205e-04, forward_time=0.147, loss_ctc=79.729, loss_att=55.182, acc=0.711, loss=62.546, backward_time=1.030, grad_norm=126.405, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.415e-05, train_time=2.724
+[gpub002:0/64] 2023-07-13 13:02:03,500 (trainer:732) INFO: 44epoch:train:6401-6500batch: iter_time=1.254e-04, forward_time=0.147, loss_ctc=74.743, loss_att=55.259, acc=0.729, loss=61.104, backward_time=1.030, grad_norm=129.649, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.414e-05, train_time=2.723
+[gpub002:0/64] 2023-07-13 13:04:21,478 (trainer:732) INFO: 44epoch:train:6501-6600batch: iter_time=1.249e-04, forward_time=0.147, loss_ctc=72.287, loss_att=56.876, acc=0.707, loss=61.499, backward_time=1.035, grad_norm=121.228, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.414e-05, train_time=2.759
+[gpub002:0/64] 2023-07-13 13:06:07,952 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub002:0/64] 2023-07-13 13:06:26,685 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 13:06:30,127 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb2f334f4f0>)
+[gpub002:0/64] 2023-07-13 13:06:30,127 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub002:0/64] 2023-07-13 13:06:30,133 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 13:10:55,875 (trainer:732) INFO: 44epoch:train:6601-6700batch: iter_time=2.456, forward_time=0.155, loss_ctc=76.904, loss_att=57.906, acc=0.720, loss=63.606, backward_time=1.050, grad_norm=144.697, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.413e-05, train_time=7.888
+[gpub002:0/64] 2023-07-13 13:13:12,276 (trainer:732) INFO: 44epoch:train:6701-6800batch: iter_time=1.179e-04, forward_time=0.144, loss_ctc=71.936, loss_att=56.557, acc=0.703, loss=61.171, backward_time=1.027, grad_norm=116.798, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.412e-05, train_time=2.728
+[gpub002:0/64] 2023-07-13 13:15:28,808 (trainer:732) INFO: 44epoch:train:6801-6900batch: iter_time=1.220e-04, forward_time=0.145, loss_ctc=80.914, loss_att=61.465, acc=0.700, loss=67.299, backward_time=1.028, grad_norm=148.620, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.412e-05, train_time=2.730
+[gpub002:0/64] 2023-07-13 13:17:44,586 (trainer:732) INFO: 44epoch:train:6901-7000batch: iter_time=1.244e-04, forward_time=0.145, loss_ctc=77.724, loss_att=54.794, acc=0.706, loss=61.673, backward_time=1.027, grad_norm=112.450, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.411e-05, train_time=2.715
+[gpub002:0/64] 2023-07-13 13:20:00,070 (trainer:732) INFO: 44epoch:train:7001-7100batch: iter_time=1.287e-04, forward_time=0.144, loss_ctc=69.685, loss_att=51.830, acc=0.704, loss=57.186, backward_time=1.026, grad_norm=123.047, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.410e-05, train_time=2.709
+[gpub002:0/64] 2023-07-13 13:22:20,661 (trainer:732) INFO: 44epoch:train:7101-7200batch: iter_time=1.207e-04, forward_time=0.165, loss_ctc=81.406, loss_att=55.579, acc=0.704, loss=63.327, backward_time=1.031, grad_norm=171.513, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.410e-05, train_time=2.812
+[gpub002:0/64] 2023-07-13 13:24:36,675 (trainer:732) INFO: 44epoch:train:7201-7300batch: iter_time=1.127e-04, forward_time=0.145, loss_ctc=73.378, loss_att=57.476, acc=0.708, loss=62.246, backward_time=1.030, grad_norm=116.929, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.409e-05, train_time=2.720
+[gpub002:0/64] 2023-07-13 13:26:53,257 (trainer:732) INFO: 44epoch:train:7301-7400batch: iter_time=1.122e-04, forward_time=0.145, loss_ctc=72.420, loss_att=58.355, acc=0.694, loss=62.575, backward_time=1.029, grad_norm=124.267, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.408e-05, train_time=2.731
+[gpub002:0/64] 2023-07-13 13:29:10,199 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub002:0/64] 2023-07-13 13:29:28,349 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 13:29:31,773 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd42ecb4f0>)
+[gpub002:0/64] 2023-07-13 13:29:31,773 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub002:0/64] 2023-07-13 13:29:31,780 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 13:32:48,933 (trainer:732) INFO: 44epoch:train:7401-7500batch: iter_time=1.337, forward_time=0.199, loss_ctc=76.054, loss_att=55.067, acc=0.719, loss=61.363, backward_time=1.035, grad_norm=122.867, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.408e-05, train_time=7.113
+[gpub002:0/64] 2023-07-13 13:35:07,231 (trainer:732) INFO: 44epoch:train:7501-7600batch: iter_time=1.428e-04, forward_time=0.147, loss_ctc=72.133, loss_att=54.014, acc=0.717, loss=59.450, backward_time=1.036, grad_norm=138.467, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.407e-05, train_time=2.766
+[gpub002:0/64] 2023-07-13 13:37:23,964 (trainer:732) INFO: 44epoch:train:7601-7700batch: iter_time=9.887e-05, forward_time=0.145, loss_ctc=78.211, loss_att=59.906, acc=0.709, loss=65.397, backward_time=1.030, grad_norm=141.983, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.407e-05, train_time=2.734
+[gpub002:0/64] 2023-07-13 13:39:40,522 (trainer:732) INFO: 44epoch:train:7701-7800batch: iter_time=1.062e-04, forward_time=0.145, loss_ctc=81.364, loss_att=54.416, acc=0.730, loss=62.501, backward_time=1.028, grad_norm=135.340, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.406e-05, train_time=2.731
+[gpub002:0/64] 2023-07-13 13:41:55,986 (trainer:732) INFO: 44epoch:train:7801-7900batch: iter_time=1.134e-04, forward_time=0.144, loss_ctc=64.512, loss_att=47.472, acc=0.708, loss=52.584, backward_time=1.026, grad_norm=123.607, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.405e-05, train_time=2.709
+[gpub002:0/64] 2023-07-13 13:44:11,989 (trainer:732) INFO: 44epoch:train:7901-8000batch: iter_time=1.154e-04, forward_time=0.145, loss_ctc=77.259, loss_att=55.929, acc=0.709, loss=62.328, backward_time=1.029, grad_norm=131.598, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.405e-05, train_time=2.720
+[gpub002:0/64] 2023-07-13 13:46:37,906 (trainer:732) INFO: 44epoch:train:8001-8100batch: iter_time=1.120e-04, forward_time=0.145, loss_ctc=73.351, loss_att=53.304, acc=0.719, loss=59.318, backward_time=1.062, grad_norm=109.641, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.404e-05, train_time=2.918
+[gpub002:0/64] 2023-07-13 13:48:56,865 (trainer:732) INFO: 44epoch:train:8101-8200batch: iter_time=1.157e-04, forward_time=0.145, loss_ctc=74.136, loss_att=61.403, acc=0.701, loss=65.223, backward_time=1.035, grad_norm=118.234, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.403e-05, train_time=2.779
+[gpub002:0/64] 2023-07-13 13:51:17,363 (trainer:732) INFO: 44epoch:train:8201-8300batch: iter_time=1.169e-04, forward_time=0.145, loss_ctc=76.981, loss_att=57.043, acc=0.721, loss=63.025, backward_time=1.034, grad_norm=133.096, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.403e-05, train_time=2.810
+[gpub002:0/64] 2023-07-13 13:52:22,069 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub002:0/64] 2023-07-13 13:52:40,279 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 13:52:43,910 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabc1125f90>)
+[gpub002:0/64] 2023-07-13 13:52:43,910 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub002:0/64] 2023-07-13 13:52:43,916 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 13:59:20,241 (trainer:732) INFO: 44epoch:train:8301-8400batch: iter_time=2.263, forward_time=0.185, loss_ctc=68.303, loss_att=51.973, acc=0.725, loss=56.872, backward_time=1.048, grad_norm=114.275, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.402e-05, train_time=9.657
+[gpub002:0/64] 2023-07-13 14:01:37,614 (trainer:732) INFO: 44epoch:train:8401-8500batch: iter_time=9.752e-05, forward_time=0.144, loss_ctc=71.653, loss_att=56.730, acc=0.711, loss=61.207, backward_time=1.030, grad_norm=117.549, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.402e-05, train_time=2.748
+[gpub002:0/64] 2023-07-13 14:03:54,243 (trainer:732) INFO: 44epoch:train:8501-8600batch: iter_time=1.303e-04, forward_time=0.147, loss_ctc=84.550, loss_att=58.625, acc=0.720, loss=66.402, backward_time=1.031, grad_norm=124.781, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.401e-05, train_time=2.732
+[gpub002:0/64] 2023-07-13 14:06:11,819 (trainer:732) INFO: 44epoch:train:8601-8700batch: iter_time=1.454e-04, forward_time=0.146, loss_ctc=68.495, loss_att=47.376, acc=0.706, loss=53.712, backward_time=1.032, grad_norm=109.505, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.400e-05, train_time=2.751
+[gpub002:0/64] 2023-07-13 14:08:27,943 (trainer:732) INFO: 44epoch:train:8701-8800batch: iter_time=1.376e-04, forward_time=0.146, loss_ctc=74.817, loss_att=58.004, acc=0.704, loss=63.048, backward_time=1.031, grad_norm=116.827, clip=100.000, loss_scale=5.257e+32, optim_step_time=0.181, optim0_lr0=5.400e-05, train_time=2.722
+[gpub002:0/64] 2023-07-13 14:10:46,494 (trainer:732) INFO: 44epoch:train:8801-8900batch: iter_time=1.356e-04, forward_time=0.147, loss_ctc=78.590, loss_att=52.950, acc=0.730, loss=60.642, backward_time=1.032, grad_norm=116.926, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.399e-05, train_time=2.771
+[gpub002:0/64] 2023-07-13 14:13:06,393 (trainer:732) INFO: 44epoch:train:8901-9000batch: iter_time=1.178e-04, forward_time=0.148, loss_ctc=72.075, loss_att=54.319, acc=0.717, loss=59.646, backward_time=1.037, grad_norm=123.080, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.398e-05, train_time=2.798
+[gpub002:0/64] 2023-07-13 14:15:26,105 (trainer:732) INFO: 44epoch:train:9001-9100batch: iter_time=1.106e-04, forward_time=0.146, loss_ctc=79.231, loss_att=65.332, acc=0.701, loss=69.502, backward_time=1.047, grad_norm=112.101, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.398e-05, train_time=2.794
+[gpub002:0/64] 2023-07-13 14:17:01,910 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub002:0/64] 2023-07-13 14:17:20,361 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 14:17:24,093 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabc41bb460>)
+[gpub002:0/64] 2023-07-13 14:17:24,093 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub002:0/64] 2023-07-13 14:17:24,100 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 14:23:35,296 (trainer:732) INFO: 44epoch:train:9101-9200batch: iter_time=1.994, forward_time=0.152, loss_ctc=64.897, loss_att=47.438, acc=0.726, loss=52.676, backward_time=1.049, grad_norm=112.693, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.397e-05, train_time=9.784
+[gpub002:0/64] 2023-07-13 14:25:52,081 (trainer:732) INFO: 44epoch:train:9201-9300batch: iter_time=1.231e-04, forward_time=0.145, loss_ctc=69.610, loss_att=54.648, acc=0.710, loss=59.136, backward_time=1.028, grad_norm=111.652, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.396e-05, train_time=2.735
+[gpub002:0/64] 2023-07-13 14:28:09,358 (trainer:732) INFO: 44epoch:train:9301-9400batch: iter_time=1.312e-04, forward_time=0.145, loss_ctc=73.827, loss_att=60.050, acc=0.696, loss=64.183, backward_time=1.027, grad_norm=114.849, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.396e-05, train_time=2.745
+[gpub002:0/64] 2023-07-13 14:30:25,352 (trainer:732) INFO: 44epoch:train:9401-9500batch: iter_time=1.301e-04, forward_time=0.145, loss_ctc=84.509, loss_att=55.869, acc=0.724, loss=64.461, backward_time=1.028, grad_norm=134.698, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.395e-05, train_time=2.720
+[gpub002:0/64] 2023-07-13 14:32:41,493 (trainer:732) INFO: 44epoch:train:9501-9600batch: iter_time=1.238e-04, forward_time=0.146, loss_ctc=65.351, loss_att=46.905, acc=0.708, loss=52.439, backward_time=1.027, grad_norm=114.409, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.395e-05, train_time=2.723
+[gpub002:0/64] 2023-07-13 14:34:57,531 (trainer:732) INFO: 44epoch:train:9601-9700batch: iter_time=1.233e-04, forward_time=0.146, loss_ctc=79.646, loss_att=58.448, acc=0.702, loss=64.808, backward_time=1.030, grad_norm=123.060, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.394e-05, train_time=2.721
+[gpub002:0/64] 2023-07-13 14:37:13,895 (trainer:732) INFO: 44epoch:train:9701-9800batch: iter_time=1.296e-04, forward_time=0.146, loss_ctc=74.065, loss_att=51.749, acc=0.719, loss=58.444, backward_time=1.028, grad_norm=130.614, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.393e-05, train_time=2.727
+[gpub002:0/64] 2023-07-13 14:39:29,723 (trainer:732) INFO: 44epoch:train:9801-9900batch: iter_time=1.294e-04, forward_time=0.146, loss_ctc=75.390, loss_att=60.899, acc=0.695, loss=65.246, backward_time=1.028, grad_norm=124.549, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.393e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 14:41:45,627 (trainer:732) INFO: 44epoch:train:9901-10000batch: iter_time=1.350e-04, forward_time=0.147, loss_ctc=75.203, loss_att=57.881, acc=0.713, loss=63.078, backward_time=1.029, grad_norm=127.418, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.392e-05, train_time=2.718
+[gpub002:0/64] 2023-07-13 14:54:13,290 (trainer:338) INFO: 44epoch results: [train] iter_time=0.229, forward_time=0.148, loss_ctc=75.300, loss_att=56.099, acc=0.709, loss=61.860, backward_time=1.033, grad_norm=128.444, clip=100.000, loss_scale=2.826e+32, optim_step_time=0.182, optim0_lr0=5.423e-05, train_time=3.485, time=4 hours, 50 minutes and 36.87 seconds, total_count=410000, gpu_max_cached_mem_GB=37.574, [valid] loss_ctc=43.588, cer_ctc=0.256, loss_att=36.048, acc=0.706, cer=0.321, wer=0.984, loss=38.310, time=6 minutes and 1.21 seconds, total_count=41998, gpu_max_cached_mem_GB=37.574, [att_plot] time=6 minutes and 14.4 seconds, total_count=0, gpu_max_cached_mem_GB=37.574
+[gpub002:0/64] 2023-07-13 14:54:29,899 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count
+[gpub002:0/64] 2023-07-13 14:54:29,920 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/37epoch.pth, exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/39epoch.pth
+[gpub002:0/64] 2023-07-13 14:54:29,921 (trainer:272) INFO: 45/50epoch started. Estimated time to finish: 1 day, 6 hours and 41 minutes
+[gpub002:0/64] 2023-07-13 14:54:30,027 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub002:0/64] 2023-07-13 14:54:47,960 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 14:54:52,321 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb6abe4f010>)
+[gpub002:0/64] 2023-07-13 14:54:52,321 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub002:0/64] 2023-07-13 14:54:52,372 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 15:01:36,666 (trainer:732) INFO: 45epoch:train:1-100batch: iter_time=2.837, forward_time=0.166, loss_ctc=81.665, loss_att=63.090, acc=0.699, loss=68.662, backward_time=1.044, grad_norm=124.602, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.186, optim0_lr0=5.391e-05, train_time=8.533
+[gpub002:0/64] 2023-07-13 15:03:34,215 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-13 15:03:58,918 (trainer:732) INFO: 45epoch:train:101-200batch: iter_time=1.454e-04, forward_time=0.167, loss_ctc=78.164, loss_att=54.166, acc=0.720, loss=61.365, backward_time=1.033, grad_norm=145.604, clip=100.000, loss_scale=5.894e+32, optim_step_time=0.183, optim0_lr0=5.391e-05, train_time=2.845
+[gpub002:0/64] 2023-07-13 15:06:25,567 (trainer:732) INFO: 45epoch:train:201-300batch: iter_time=1.205e-04, forward_time=0.151, loss_ctc=68.261, loss_att=52.387, acc=0.704, loss=57.149, backward_time=1.044, grad_norm=106.301, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.390e-05, train_time=2.933
+[gpub002:0/64] 2023-07-13 15:08:51,283 (trainer:732) INFO: 45epoch:train:301-400batch: iter_time=1.249e-04, forward_time=0.151, loss_ctc=66.533, loss_att=51.447, acc=0.717, loss=55.973, backward_time=1.039, grad_norm=108.528, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.390e-05, train_time=2.914
+[gpub002:0/64] 2023-07-13 15:11:15,915 (trainer:732) INFO: 45epoch:train:401-500batch: iter_time=1.270e-04, forward_time=0.162, loss_ctc=75.336, loss_att=58.055, acc=0.709, loss=63.240, backward_time=1.031, grad_norm=122.397, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.389e-05, train_time=2.892
+[gpub002:0/64] 2023-07-13 15:13:39,370 (trainer:732) INFO: 45epoch:train:501-600batch: iter_time=1.190e-04, forward_time=0.175, loss_ctc=80.003, loss_att=63.160, acc=0.708, loss=68.213, backward_time=1.047, grad_norm=123.534, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.187, optim0_lr0=5.388e-05, train_time=2.868
+[gpub002:0/64] 2023-07-13 15:16:05,037 (trainer:732) INFO: 45epoch:train:601-700batch: iter_time=1.220e-04, forward_time=0.160, loss_ctc=66.333, loss_att=48.550, acc=0.711, loss=53.885, backward_time=1.041, grad_norm=123.517, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.388e-05, train_time=2.914
+[gpub002:0/64] 2023-07-13 15:18:34,274 (trainer:732) INFO: 45epoch:train:701-800batch: iter_time=1.331e-04, forward_time=0.169, loss_ctc=70.528, loss_att=52.616, acc=0.702, loss=57.989, backward_time=1.059, grad_norm=106.529, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.189, optim0_lr0=5.387e-05, train_time=2.985
+[gpub002:0/64] 2023-07-13 15:19:38,148 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub002:0/64] 2023-07-13 15:19:56,052 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 15:19:59,706 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb2135f3100>)
+[gpub002:0/64] 2023-07-13 15:19:59,706 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub002:0/64] 2023-07-13 15:19:59,712 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 15:25:05,470 (trainer:732) INFO: 45epoch:train:801-900batch: iter_time=1.925, forward_time=0.196, loss_ctc=70.359, loss_att=50.574, acc=0.717, loss=56.510, backward_time=1.041, grad_norm=156.159, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.189, optim0_lr0=5.386e-05, train_time=7.823
+[gpub002:0/64] 2023-07-13 15:27:22,701 (trainer:732) INFO: 45epoch:train:901-1000batch: iter_time=1.363e-04, forward_time=0.147, loss_ctc=79.920, loss_att=62.286, acc=0.707, loss=67.576, backward_time=1.033, grad_norm=133.899, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.386e-05, train_time=2.745
+[gpub002:0/64] 2023-07-13 15:29:38,869 (trainer:732) INFO: 45epoch:train:1001-1100batch: iter_time=1.411e-04, forward_time=0.147, loss_ctc=73.445, loss_att=53.330, acc=0.704, loss=59.364, backward_time=1.030, grad_norm=132.822, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.385e-05, train_time=2.723
+[gpub002:0/64] 2023-07-13 15:31:56,763 (trainer:732) INFO: 45epoch:train:1101-1200batch: iter_time=4.801e-04, forward_time=0.146, loss_ctc=71.514, loss_att=56.740, acc=0.723, loss=61.172, backward_time=1.034, grad_norm=142.017, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.189, optim0_lr0=5.385e-05, train_time=2.758
+[gpub002:0/64] 2023-07-13 15:34:12,180 (trainer:732) INFO: 45epoch:train:1201-1300batch: iter_time=9.975e-05, forward_time=0.143, loss_ctc=64.208, loss_att=48.306, acc=0.716, loss=53.077, backward_time=1.025, grad_norm=147.537, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.384e-05, train_time=2.708
+[gpub002:0/64] 2023-07-13 15:36:49,716 (trainer:732) INFO: 45epoch:train:1301-1400batch: iter_time=4.440e-04, forward_time=0.316, loss_ctc=76.597, loss_att=58.015, acc=0.710, loss=63.590, backward_time=1.059, grad_norm=118.802, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.189, optim0_lr0=5.383e-05, train_time=3.150
+[gpub002:0/64] 2023-07-13 15:39:05,648 (trainer:732) INFO: 45epoch:train:1401-1500batch: iter_time=1.019e-04, forward_time=0.145, loss_ctc=75.275, loss_att=58.906, acc=0.714, loss=63.817, backward_time=1.028, grad_norm=121.195, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.383e-05, train_time=2.719
+[gpub002:0/64] 2023-07-13 15:41:21,653 (trainer:732) INFO: 45epoch:train:1501-1600batch: iter_time=1.027e-04, forward_time=0.145, loss_ctc=67.840, loss_att=50.503, acc=0.699, loss=55.704, backward_time=1.028, grad_norm=110.323, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.382e-05, train_time=2.719
+[gpub002:0/64] 2023-07-13 15:43:03,648 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub002:0/64] 2023-07-13 15:43:21,668 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 15:43:25,353 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb6ac05f430>)
+[gpub002:0/64] 2023-07-13 15:43:25,353 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub002:0/64] 2023-07-13 15:43:25,360 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 15:47:55,266 (trainer:732) INFO: 45epoch:train:1601-1700batch: iter_time=2.508, forward_time=0.175, loss_ctc=74.468, loss_att=56.404, acc=0.712, loss=61.823, backward_time=1.040, grad_norm=106.781, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=5.381e-05, train_time=7.873
+[gpub002:0/64] 2023-07-13 15:50:12,061 (trainer:732) INFO: 45epoch:train:1701-1800batch: iter_time=1.176e-04, forward_time=0.148, loss_ctc=71.768, loss_att=53.722, acc=0.718, loss=59.136, backward_time=1.032, grad_norm=124.090, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.381e-05, train_time=2.736
+[gpub002:0/64] 2023-07-13 15:52:28,137 (trainer:732) INFO: 45epoch:train:1801-1900batch: iter_time=1.253e-04, forward_time=0.146, loss_ctc=73.393, loss_att=54.484, acc=0.706, loss=60.157, backward_time=1.028, grad_norm=122.871, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.380e-05, train_time=2.721
+[gpub002:0/64] 2023-07-13 15:54:44,035 (trainer:732) INFO: 45epoch:train:1901-2000batch: iter_time=1.209e-04, forward_time=0.145, loss_ctc=72.218, loss_att=56.883, acc=0.716, loss=61.484, backward_time=1.029, grad_norm=99.995, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.380e-05, train_time=2.718
+[gpub002:0/64] 2023-07-13 15:56:59,596 (trainer:732) INFO: 45epoch:train:2001-2100batch: iter_time=1.169e-04, forward_time=0.145, loss_ctc=64.329, loss_att=47.171, acc=0.728, loss=52.319, backward_time=1.027, grad_norm=106.538, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.379e-05, train_time=2.711
+[gpub002:0/64] 2023-07-13 15:59:15,764 (trainer:732) INFO: 45epoch:train:2101-2200batch: iter_time=1.161e-04, forward_time=0.147, loss_ctc=78.537, loss_att=63.446, acc=0.702, loss=67.973, backward_time=1.030, grad_norm=116.976, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.378e-05, train_time=2.723
+[gpub002:0/64] 2023-07-13 16:01:31,536 (trainer:732) INFO: 45epoch:train:2201-2300batch: iter_time=1.193e-04, forward_time=0.146, loss_ctc=72.395, loss_att=52.310, acc=0.716, loss=58.335, backward_time=1.027, grad_norm=121.900, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.378e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 16:04:07,432 (trainer:732) INFO: 45epoch:train:2301-2400batch: iter_time=4.133e-04, forward_time=0.291, loss_ctc=64.940, loss_att=49.301, acc=0.698, loss=53.992, backward_time=1.048, grad_norm=119.660, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.195, optim0_lr0=5.377e-05, train_time=3.119
+[gpub002:0/64] 2023-07-13 16:06:26,316 (trainer:732) INFO: 45epoch:train:2401-2500batch: iter_time=1.060e-04, forward_time=0.146, loss_ctc=66.773, loss_att=49.837, acc=0.725, loss=54.918, backward_time=1.033, grad_norm=113.051, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.376e-05, train_time=2.778
+[gpub002:0/64] 2023-07-13 16:06:47,569 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub002:0/64] 2023-07-13 16:07:05,636 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 16:07:09,245 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad413c7790>)
+[gpub002:0/64] 2023-07-13 16:07:09,246 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub002:0/64] 2023-07-13 16:07:09,252 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 16:13:45,255 (trainer:732) INFO: 45epoch:train:2501-2600batch: iter_time=2.932, forward_time=0.146, loss_ctc=75.245, loss_att=59.285, acc=0.705, loss=64.073, backward_time=1.045, grad_norm=143.838, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.376e-05, train_time=8.779
+[gpub002:0/64] 2023-07-13 16:16:02,493 (trainer:732) INFO: 45epoch:train:2601-2700batch: iter_time=1.125e-04, forward_time=0.145, loss_ctc=71.169, loss_att=50.380, acc=0.721, loss=56.617, backward_time=1.031, grad_norm=126.623, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.375e-05, train_time=2.745
+[gpub002:0/64] 2023-07-13 16:18:18,405 (trainer:732) INFO: 45epoch:train:2701-2800batch: iter_time=1.474e-04, forward_time=0.146, loss_ctc=72.864, loss_att=55.984, acc=0.701, loss=61.048, backward_time=1.030, grad_norm=110.866, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.375e-05, train_time=2.718
+[gpub002:0/64] 2023-07-13 16:20:34,197 (trainer:732) INFO: 45epoch:train:2801-2900batch: iter_time=1.338e-04, forward_time=0.146, loss_ctc=71.354, loss_att=54.781, acc=0.724, loss=59.753, backward_time=1.029, grad_norm=114.172, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.374e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 16:22:50,091 (trainer:732) INFO: 45epoch:train:2901-3000batch: iter_time=1.232e-04, forward_time=0.146, loss_ctc=63.704, loss_att=47.879, acc=0.721, loss=52.626, backward_time=1.031, grad_norm=113.241, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.373e-05, train_time=2.718
+[gpub002:0/64] 2023-07-13 16:25:08,892 (trainer:732) INFO: 45epoch:train:3001-3100batch: iter_time=1.287e-04, forward_time=0.146, loss_ctc=80.531, loss_att=66.219, acc=0.702, loss=70.512, backward_time=1.031, grad_norm=129.615, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.373e-05, train_time=2.776
+[gpub002:0/64] 2023-07-13 16:27:26,710 (trainer:732) INFO: 45epoch:train:3101-3200batch: iter_time=1.241e-04, forward_time=0.146, loss_ctc=74.038, loss_att=52.789, acc=0.722, loss=59.164, backward_time=1.033, grad_norm=120.734, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.372e-05, train_time=2.756
+[gpub002:0/64] 2023-07-13 16:29:45,474 (trainer:732) INFO: 45epoch:train:3201-3300batch: iter_time=1.212e-04, forward_time=0.145, loss_ctc=64.774, loss_att=50.047, acc=0.692, loss=54.465, backward_time=1.031, grad_norm=159.741, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.372e-05, train_time=2.775
+[gpub002:0/64] 2023-07-13 16:30:34,615 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub002:0/64] 2023-07-13 16:30:52,965 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 16:30:56,648 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabeb5df460>)
+[gpub002:0/64] 2023-07-13 16:30:56,648 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub002:0/64] 2023-07-13 16:30:56,655 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 16:36:51,770 (trainer:732) INFO: 45epoch:train:3301-3400batch: iter_time=1.357, forward_time=0.191, loss_ctc=74.879, loss_att=59.904, acc=0.707, loss=64.396, backward_time=1.042, grad_norm=130.493, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.371e-05, train_time=8.525
+[gpub002:0/64] 2023-07-13 16:39:07,620 (trainer:732) INFO: 45epoch:train:3401-3500batch: iter_time=1.165e-04, forward_time=0.145, loss_ctc=72.274, loss_att=48.909, acc=0.725, loss=55.919, backward_time=1.029, grad_norm=119.444, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.370e-05, train_time=2.718
+[gpub002:0/64] 2023-07-13 16:41:23,193 (trainer:732) INFO: 45epoch:train:3501-3600batch: iter_time=1.335e-04, forward_time=0.146, loss_ctc=73.254, loss_att=56.523, acc=0.710, loss=61.542, backward_time=1.027, grad_norm=136.299, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.370e-05, train_time=2.711
+[gpub002:0/64] 2023-07-13 16:43:38,990 (trainer:732) INFO: 45epoch:train:3601-3700batch: iter_time=1.194e-04, forward_time=0.146, loss_ctc=68.969, loss_att=55.099, acc=0.704, loss=59.260, backward_time=1.029, grad_norm=118.124, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.369e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 16:45:54,638 (trainer:732) INFO: 45epoch:train:3701-3800batch: iter_time=1.180e-04, forward_time=0.145, loss_ctc=67.300, loss_att=51.250, acc=0.710, loss=56.065, backward_time=1.027, grad_norm=122.728, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.368e-05, train_time=2.713
+[gpub002:0/64] 2023-07-13 16:48:11,353 (trainer:732) INFO: 45epoch:train:3801-3900batch: iter_time=1.176e-04, forward_time=0.147, loss_ctc=79.048, loss_att=65.393, acc=0.693, loss=69.490, backward_time=1.029, grad_norm=138.605, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.368e-05, train_time=2.734
+[gpub002:0/64] 2023-07-13 16:50:30,597 (trainer:732) INFO: 45epoch:train:3901-4000batch: iter_time=1.266e-04, forward_time=0.166, loss_ctc=67.808, loss_att=48.262, acc=0.723, loss=54.126, backward_time=1.032, grad_norm=118.096, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.367e-05, train_time=2.785
+[gpub002:0/64] 2023-07-13 16:52:46,308 (trainer:732) INFO: 45epoch:train:4001-4100batch: iter_time=1.301e-04, forward_time=0.146, loss_ctc=66.270, loss_att=49.916, acc=0.699, loss=54.822, backward_time=1.029, grad_norm=112.093, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.367e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 16:54:21,064 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub002:0/64] 2023-07-13 16:54:39,212 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 16:54:42,631 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad7159f460>)
+[gpub002:0/64] 2023-07-13 16:54:42,631 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub002:0/64] 2023-07-13 16:54:42,637 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 17:00:13,505 (trainer:732) INFO: 45epoch:train:4101-4200batch: iter_time=1.377, forward_time=0.185, loss_ctc=69.143, loss_att=51.631, acc=0.718, loss=56.885, backward_time=1.038, grad_norm=123.088, clip=100.000, loss_scale=3.829e+32, optim_step_time=0.184, optim0_lr0=5.366e-05, train_time=8.944
+[gpub002:0/64] 2023-07-13 17:02:31,518 (trainer:732) INFO: 45epoch:train:4201-4300batch: iter_time=1.013e-04, forward_time=0.147, loss_ctc=78.116, loss_att=61.080, acc=0.702, loss=66.191, backward_time=1.036, grad_norm=135.639, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.365e-05, train_time=2.760
+[gpub002:0/64] 2023-07-13 17:04:47,316 (trainer:732) INFO: 45epoch:train:4301-4400batch: iter_time=1.076e-04, forward_time=0.144, loss_ctc=73.863, loss_att=48.660, acc=0.731, loss=56.221, backward_time=1.028, grad_norm=111.515, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.365e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 17:07:04,029 (trainer:732) INFO: 45epoch:train:4401-4500batch: iter_time=1.058e-04, forward_time=0.146, loss_ctc=69.350, loss_att=55.129, acc=0.715, loss=59.396, backward_time=1.031, grad_norm=110.270, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.364e-05, train_time=2.734
+[gpub002:0/64] 2023-07-13 17:09:30,922 (trainer:732) INFO: 45epoch:train:4501-4600batch: iter_time=1.035e-04, forward_time=0.146, loss_ctc=64.703, loss_att=49.097, acc=0.721, loss=53.779, backward_time=1.044, grad_norm=107.915, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.363e-05, train_time=2.938
+[gpub002:0/64] 2023-07-13 17:12:18,786 (trainer:732) INFO: 45epoch:train:4601-4700batch: iter_time=1.141e-04, forward_time=0.145, loss_ctc=73.234, loss_att=56.968, acc=0.715, loss=61.847, backward_time=1.061, grad_norm=124.928, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.363e-05, train_time=3.357
+[gpub002:0/64] 2023-07-13 17:14:35,194 (trainer:732) INFO: 45epoch:train:4701-4800batch: iter_time=1.380e-04, forward_time=0.148, loss_ctc=76.055, loss_att=59.898, acc=0.712, loss=64.745, backward_time=1.032, grad_norm=120.870, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.362e-05, train_time=2.728
+[gpub002:0/64] 2023-07-13 17:16:50,801 (trainer:732) INFO: 45epoch:train:4801-4900batch: iter_time=1.407e-04, forward_time=0.146, loss_ctc=62.951, loss_att=45.591, acc=0.716, loss=50.799, backward_time=1.027, grad_norm=117.396, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.362e-05, train_time=2.712
+[gpub002:0/64] 2023-07-13 17:19:06,572 (trainer:732) INFO: 45epoch:train:4901-5000batch: iter_time=1.073e-04, forward_time=0.145, loss_ctc=68.808, loss_att=51.751, acc=0.704, loss=56.868, backward_time=1.028, grad_norm=136.363, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.361e-05, train_time=2.715
+[gpub002:0/64] 2023-07-13 17:19:21,389 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub002:0/64] 2023-07-13 17:19:39,673 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 17:19:43,090 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabc103b460>)
+[gpub002:0/64] 2023-07-13 17:19:43,090 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub002:0/64] 2023-07-13 17:19:43,096 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 17:24:59,179 (trainer:732) INFO: 45epoch:train:5001-5100batch: iter_time=2.009, forward_time=0.147, loss_ctc=73.429, loss_att=58.346, acc=0.701, loss=62.871, backward_time=1.048, grad_norm=125.174, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.360e-05, train_time=7.052
+[gpub002:0/64] 2023-07-13 17:27:16,621 (trainer:732) INFO: 45epoch:train:5101-5200batch: iter_time=1.144e-04, forward_time=0.145, loss_ctc=69.877, loss_att=49.329, acc=0.732, loss=55.493, backward_time=1.030, grad_norm=121.180, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.360e-05, train_time=2.749
+[gpub002:0/64] 2023-07-13 17:29:32,165 (trainer:732) INFO: 45epoch:train:5201-5300batch: iter_time=1.158e-04, forward_time=0.145, loss_ctc=72.769, loss_att=55.528, acc=0.703, loss=60.701, backward_time=1.026, grad_norm=161.988, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.359e-05, train_time=2.711
+[gpub002:0/64] 2023-07-13 17:31:47,590 (trainer:732) INFO: 45epoch:train:5301-5400batch: iter_time=1.207e-04, forward_time=0.144, loss_ctc=71.449, loss_att=55.740, acc=0.713, loss=60.453, backward_time=1.025, grad_norm=134.067, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.359e-05, train_time=2.708
+[gpub002:0/64] 2023-07-13 17:34:03,500 (trainer:732) INFO: 45epoch:train:5401-5500batch: iter_time=1.115e-04, forward_time=0.145, loss_ctc=63.744, loss_att=47.642, acc=0.716, loss=52.473, backward_time=1.028, grad_norm=120.630, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.358e-05, train_time=2.718
+[gpub002:0/64] 2023-07-13 17:36:19,787 (trainer:732) INFO: 45epoch:train:5501-5600batch: iter_time=1.088e-04, forward_time=0.145, loss_ctc=80.084, loss_att=68.051, acc=0.690, loss=71.661, backward_time=1.028, grad_norm=136.204, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.357e-05, train_time=2.726
+[gpub002:0/64] 2023-07-13 17:38:35,468 (trainer:732) INFO: 45epoch:train:5601-5700batch: iter_time=1.136e-04, forward_time=0.145, loss_ctc=73.364, loss_att=52.280, acc=0.718, loss=58.605, backward_time=1.026, grad_norm=133.967, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.357e-05, train_time=2.713
+[gpub002:0/64] 2023-07-13 17:40:50,747 (trainer:732) INFO: 45epoch:train:5701-5800batch: iter_time=1.299e-04, forward_time=0.145, loss_ctc=64.239, loss_att=50.021, acc=0.693, loss=54.286, backward_time=1.026, grad_norm=113.559, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.356e-05, train_time=2.705
+[gpub002:0/64] 2023-07-13 17:41:39,369 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub002:0/64] 2023-07-13 17:41:57,470 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 17:42:01,065 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7facf32a1fc0>)
+[gpub002:0/64] 2023-07-13 17:42:01,065 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub002:0/64] 2023-07-13 17:42:01,071 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 17:49:41,785 (trainer:732) INFO: 45epoch:train:5801-5900batch: iter_time=1.389, forward_time=0.199, loss_ctc=66.566, loss_att=47.542, acc=0.725, loss=53.249, backward_time=1.047, grad_norm=123.650, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.185, optim0_lr0=5.355e-05, train_time=10.620
+[gpub002:0/64] 2023-07-13 17:51:58,518 (trainer:732) INFO: 45epoch:train:5901-6000batch: iter_time=1.269e-04, forward_time=0.148, loss_ctc=76.867, loss_att=60.070, acc=0.716, loss=65.109, backward_time=1.030, grad_norm=146.171, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.355e-05, train_time=2.735
+[gpub002:0/64] 2023-07-13 17:54:14,647 (trainer:732) INFO: 45epoch:train:6001-6100batch: iter_time=1.264e-04, forward_time=0.146, loss_ctc=73.164, loss_att=52.146, acc=0.712, loss=58.451, backward_time=1.031, grad_norm=118.081, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.354e-05, train_time=2.722
+[gpub002:0/64] 2023-07-13 17:56:25,548 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-13 17:56:30,992 (trainer:732) INFO: 45epoch:train:6101-6200batch: iter_time=1.360e-04, forward_time=0.147, loss_ctc=72.297, loss_att=55.985, acc=0.727, loss=60.878, backward_time=1.033, grad_norm=127.067, clip=100.000, loss_scale=6.358e+32, optim_step_time=0.182, optim0_lr0=5.354e-05, train_time=2.727
+[gpub002:0/64] 2023-07-13 17:58:48,413 (trainer:732) INFO: 45epoch:train:6201-6300batch: iter_time=1.208e-04, forward_time=0.146, loss_ctc=62.624, loss_att=46.194, acc=0.728, loss=51.123, backward_time=1.042, grad_norm=124.806, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.353e-05, train_time=2.748
+[gpub002:0/64] 2023-07-13 18:01:05,345 (trainer:732) INFO: 45epoch:train:6301-6400batch: iter_time=9.910e-05, forward_time=0.146, loss_ctc=75.500, loss_att=57.283, acc=0.714, loss=62.749, backward_time=1.031, grad_norm=118.320, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.352e-05, train_time=2.738
+[gpub002:0/64] 2023-07-13 18:03:22,316 (trainer:732) INFO: 45epoch:train:6401-6500batch: iter_time=1.146e-04, forward_time=0.145, loss_ctc=70.999, loss_att=56.502, acc=0.721, loss=60.851, backward_time=1.031, grad_norm=119.958, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.352e-05, train_time=2.739
+[gpub002:0/64] 2023-07-13 18:05:38,177 (trainer:732) INFO: 45epoch:train:6501-6600batch: iter_time=1.006e-04, forward_time=0.144, loss_ctc=66.854, loss_att=49.259, acc=0.703, loss=54.538, backward_time=1.027, grad_norm=102.459, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.351e-05, train_time=2.717
+[gpub002:0/64] 2023-07-13 18:07:21,216 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub002:0/64] 2023-07-13 18:07:39,658 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 18:07:43,082 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fafda1af4f0>)
+[gpub002:0/64] 2023-07-13 18:07:43,082 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub002:0/64] 2023-07-13 18:07:43,088 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 18:12:41,336 (trainer:732) INFO: 45epoch:train:6601-6700batch: iter_time=1.633, forward_time=0.145, loss_ctc=73.582, loss_att=56.482, acc=0.708, loss=61.612, backward_time=1.040, grad_norm=136.038, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.351e-05, train_time=8.463
+[gpub002:0/64] 2023-07-13 18:14:58,720 (trainer:732) INFO: 45epoch:train:6701-6800batch: iter_time=1.127e-04, forward_time=0.145, loss_ctc=70.722, loss_att=52.661, acc=0.720, loss=58.080, backward_time=1.033, grad_norm=127.003, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.350e-05, train_time=2.747
+[gpub002:0/64] 2023-07-13 18:17:17,712 (trainer:732) INFO: 45epoch:train:6801-6900batch: iter_time=1.159e-04, forward_time=0.145, loss_ctc=73.147, loss_att=53.137, acc=0.710, loss=59.140, backward_time=1.047, grad_norm=130.410, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.349e-05, train_time=2.780
+[gpub002:0/64] 2023-07-13 18:19:40,571 (trainer:732) INFO: 45epoch:train:6901-7000batch: iter_time=1.008e-04, forward_time=0.145, loss_ctc=70.596, loss_att=56.317, acc=0.715, loss=60.601, backward_time=1.032, grad_norm=138.486, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.349e-05, train_time=2.857
+[gpub002:0/64] 2023-07-13 18:22:00,623 (trainer:732) INFO: 45epoch:train:7001-7100batch: iter_time=1.223e-04, forward_time=0.146, loss_ctc=63.705, loss_att=46.602, acc=0.721, loss=51.733, backward_time=1.033, grad_norm=101.592, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.348e-05, train_time=2.801
+[gpub002:0/64] 2023-07-13 18:24:43,695 (trainer:732) INFO: 45epoch:train:7101-7200batch: iter_time=1.209e-04, forward_time=0.145, loss_ctc=78.651, loss_att=65.443, acc=0.692, loss=69.405, backward_time=1.056, grad_norm=153.875, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.348e-05, train_time=3.261
+[gpub002:0/64] 2023-07-13 18:27:00,050 (trainer:732) INFO: 45epoch:train:7201-7300batch: iter_time=1.117e-04, forward_time=0.146, loss_ctc=69.650, loss_att=51.477, acc=0.714, loss=56.929, backward_time=1.030, grad_norm=126.353, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.347e-05, train_time=2.727
+[gpub002:0/64] 2023-07-13 18:29:15,908 (trainer:732) INFO: 45epoch:train:7301-7400batch: iter_time=1.172e-04, forward_time=0.145, loss_ctc=64.906, loss_att=48.559, acc=0.703, loss=53.463, backward_time=1.030, grad_norm=131.884, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.346e-05, train_time=2.717
+[gpub002:0/64] 2023-07-13 18:31:31,376 (trainer:732) INFO: 45epoch:train:7401-7500batch: iter_time=1.120e-04, forward_time=0.145, loss_ctc=66.285, loss_att=49.804, acc=0.719, loss=54.748, backward_time=1.027, grad_norm=119.238, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.346e-05, train_time=2.709
+[gpub002:0/64] 2023-07-13 18:31:39,809 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub002:0/64] 2023-07-13 18:31:58,133 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 18:32:01,554 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fadabd1ff40>)
+[gpub002:0/64] 2023-07-13 18:32:01,554 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub002:0/64] 2023-07-13 18:32:01,560 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 18:38:55,570 (trainer:732) INFO: 45epoch:train:7501-7600batch: iter_time=2.968, forward_time=0.205, loss_ctc=74.821, loss_att=58.310, acc=0.701, loss=63.263, backward_time=1.049, grad_norm=140.580, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.345e-05, train_time=8.883
+[gpub002:0/64] 2023-07-13 18:41:12,994 (trainer:732) INFO: 45epoch:train:7601-7700batch: iter_time=1.401e-04, forward_time=0.146, loss_ctc=70.499, loss_att=49.199, acc=0.733, loss=55.589, backward_time=1.030, grad_norm=115.910, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.344e-05, train_time=2.749
+[gpub002:0/64] 2023-07-13 18:43:29,468 (trainer:732) INFO: 45epoch:train:7701-7800batch: iter_time=1.289e-04, forward_time=0.146, loss_ctc=72.586, loss_att=55.027, acc=0.709, loss=60.295, backward_time=1.027, grad_norm=112.394, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.344e-05, train_time=2.729
+[gpub002:0/64] 2023-07-13 18:45:45,793 (trainer:732) INFO: 45epoch:train:7801-7900batch: iter_time=1.293e-04, forward_time=0.144, loss_ctc=72.517, loss_att=54.985, acc=0.716, loss=60.244, backward_time=1.025, grad_norm=117.890, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.343e-05, train_time=2.726
+[gpub002:0/64] 2023-07-13 18:48:24,077 (trainer:732) INFO: 45epoch:train:7901-8000batch: iter_time=1.442e-04, forward_time=0.145, loss_ctc=63.111, loss_att=46.756, acc=0.716, loss=51.662, backward_time=1.057, grad_norm=120.211, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.343e-05, train_time=3.165
+[gpub002:0/64] 2023-07-13 18:50:40,521 (trainer:732) INFO: 45epoch:train:8001-8100batch: iter_time=1.278e-04, forward_time=0.147, loss_ctc=78.732, loss_att=66.222, acc=0.695, loss=69.975, backward_time=1.031, grad_norm=170.247, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.342e-05, train_time=2.729
+[gpub002:0/64] 2023-07-13 18:52:56,982 (trainer:732) INFO: 45epoch:train:8101-8200batch: iter_time=1.082e-04, forward_time=0.146, loss_ctc=71.237, loss_att=50.828, acc=0.721, loss=56.951, backward_time=1.031, grad_norm=144.493, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.341e-05, train_time=2.729
+[gpub002:0/64] 2023-07-13 18:55:12,607 (trainer:732) INFO: 45epoch:train:8201-8300batch: iter_time=1.288e-04, forward_time=0.145, loss_ctc=63.342, loss_att=47.314, acc=0.705, loss=52.123, backward_time=1.027, grad_norm=120.140, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.341e-05, train_time=2.712
+[gpub002:0/64] 2023-07-13 18:56:02,679 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub002:0/64] 2023-07-13 18:56:21,043 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 18:56:24,475 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fac3fa106a0>)
+[gpub002:0/64] 2023-07-13 18:56:24,475 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub002:0/64] 2023-07-13 18:56:24,481 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 19:01:54,411 (trainer:732) INFO: 45epoch:train:8301-8400batch: iter_time=1.403, forward_time=0.145, loss_ctc=67.802, loss_att=47.923, acc=0.722, loss=53.886, backward_time=1.041, grad_norm=103.039, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.340e-05, train_time=8.036
+[gpub002:0/64] 2023-07-13 19:04:12,062 (trainer:732) INFO: 45epoch:train:8401-8500batch: iter_time=1.296e-04, forward_time=0.144, loss_ctc=77.578, loss_att=59.511, acc=0.713, loss=64.931, backward_time=1.028, grad_norm=113.538, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.340e-05, train_time=2.753
+[gpub002:0/64] 2023-07-13 19:06:36,114 (trainer:732) INFO: 45epoch:train:8501-8600batch: iter_time=1.099e-04, forward_time=0.144, loss_ctc=72.515, loss_att=51.973, acc=0.709, loss=58.136, backward_time=1.037, grad_norm=112.074, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.339e-05, train_time=2.881
+[gpub002:0/64] 2023-07-13 19:08:58,016 (trainer:732) INFO: 45epoch:train:8601-8700batch: iter_time=1.206e-04, forward_time=0.145, loss_ctc=70.363, loss_att=55.476, acc=0.722, loss=59.942, backward_time=1.052, grad_norm=139.344, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.338e-05, train_time=2.838
+[gpub002:0/64] 2023-07-13 19:11:23,653 (trainer:732) INFO: 45epoch:train:8701-8800batch: iter_time=1.251e-04, forward_time=0.145, loss_ctc=61.325, loss_att=45.766, acc=0.718, loss=50.433, backward_time=1.067, grad_norm=128.516, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.338e-05, train_time=2.913
+[gpub002:0/64] 2023-07-13 19:13:45,579 (trainer:732) INFO: 45epoch:train:8801-8900batch: iter_time=1.264e-04, forward_time=0.146, loss_ctc=75.654, loss_att=57.454, acc=0.712, loss=62.914, backward_time=1.033, grad_norm=163.219, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.337e-05, train_time=2.838
+[gpub002:0/64] 2023-07-13 19:16:01,117 (trainer:732) INFO: 45epoch:train:8901-9000batch: iter_time=1.248e-04, forward_time=0.145, loss_ctc=71.877, loss_att=58.040, acc=0.706, loss=62.191, backward_time=1.026, grad_norm=150.120, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.337e-05, train_time=2.711
+[gpub002:0/64] 2023-07-13 19:18:23,985 (trainer:732) INFO: 45epoch:train:9001-9100batch: iter_time=1.213e-04, forward_time=0.145, loss_ctc=65.441, loss_att=49.344, acc=0.702, loss=54.173, backward_time=1.036, grad_norm=131.564, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.336e-05, train_time=2.857
+[gpub002:0/64] 2023-07-13 19:20:22,038 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub002:0/64] 2023-07-13 19:20:40,563 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 19:20:44,273 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fac0f930220>)
+[gpub002:0/64] 2023-07-13 19:20:44,273 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub002:0/64] 2023-07-13 19:20:44,279 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 19:26:50,491 (trainer:732) INFO: 45epoch:train:9101-9200batch: iter_time=1.709, forward_time=0.179, loss_ctc=67.413, loss_att=51.765, acc=0.712, loss=56.459, backward_time=1.050, grad_norm=122.252, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.335e-05, train_time=10.130
+[gpub002:0/64] 2023-07-13 19:29:07,652 (trainer:732) INFO: 45epoch:train:9201-9300batch: iter_time=1.127e-04, forward_time=0.146, loss_ctc=76.983, loss_att=59.756, acc=0.700, loss=64.924, backward_time=1.032, grad_norm=139.788, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.335e-05, train_time=2.742
+[gpub002:0/64] 2023-07-13 19:31:25,100 (trainer:732) INFO: 45epoch:train:9301-9400batch: iter_time=9.813e-05, forward_time=0.145, loss_ctc=72.296, loss_att=47.126, acc=0.734, loss=54.677, backward_time=1.028, grad_norm=113.311, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.334e-05, train_time=2.750
+[gpub002:0/64] 2023-07-13 19:33:41,318 (trainer:732) INFO: 45epoch:train:9401-9500batch: iter_time=9.632e-05, forward_time=0.145, loss_ctc=71.884, loss_att=54.495, acc=0.719, loss=59.712, backward_time=1.027, grad_norm=117.673, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.334e-05, train_time=2.724
+[gpub002:0/64] 2023-07-13 19:35:57,136 (trainer:732) INFO: 45epoch:train:9501-9600batch: iter_time=1.092e-04, forward_time=0.145, loss_ctc=64.752, loss_att=48.917, acc=0.719, loss=53.668, backward_time=1.026, grad_norm=117.335, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.333e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 19:38:13,202 (trainer:732) INFO: 45epoch:train:9601-9700batch: iter_time=1.130e-04, forward_time=0.145, loss_ctc=71.752, loss_att=55.035, acc=0.712, loss=60.050, backward_time=1.028, grad_norm=125.109, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.332e-05, train_time=2.721
+[gpub002:0/64] 2023-07-13 19:40:28,939 (trainer:732) INFO: 45epoch:train:9701-9800batch: iter_time=1.014e-04, forward_time=0.145, loss_ctc=75.944, loss_att=60.847, acc=0.702, loss=65.376, backward_time=1.027, grad_norm=149.069, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.332e-05, train_time=2.715
+[gpub002:0/64] 2023-07-13 19:42:44,392 (trainer:732) INFO: 45epoch:train:9801-9900batch: iter_time=1.022e-04, forward_time=0.143, loss_ctc=61.928, loss_att=45.638, acc=0.717, loss=50.525, backward_time=1.025, grad_norm=106.833, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.331e-05, train_time=2.709
+[gpub002:0/64] 2023-07-13 19:44:59,782 (trainer:732) INFO: 45epoch:train:9901-10000batch: iter_time=9.628e-05, forward_time=0.144, loss_ctc=68.171, loss_att=50.374, acc=0.709, loss=55.713, backward_time=1.024, grad_norm=116.675, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.330e-05, train_time=2.708
+[gpub002:0/64] 2023-07-13 19:59:12,558 (trainer:338) INFO: 45epoch results: [train] iter_time=0.241, forward_time=0.153, loss_ctc=71.101, loss_att=53.886, acc=0.712, loss=59.051, backward_time=1.034, grad_norm=125.414, clip=100.000, loss_scale=3.957e+32, optim_step_time=0.183, optim0_lr0=5.361e-05, train_time=3.486, time=4 hours, 50 minutes and 52.55 seconds, total_count=420000, gpu_max_cached_mem_GB=37.574, [valid] loss_ctc=42.917, cer_ctc=0.255, loss_att=35.722, acc=0.704, cer=0.334, wer=0.986, loss=37.881, time=7 minutes and 50.24 seconds, total_count=43010, gpu_max_cached_mem_GB=37.574, [att_plot] time=5 minutes and 59.73 seconds, total_count=0, gpu_max_cached_mem_GB=37.574
+[gpub002:0/64] 2023-07-13 19:59:28,695 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub002:0/64] 2023-07-13 19:59:28,788 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/29epoch.pth, exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/40epoch.pth
+[gpub002:0/64] 2023-07-13 19:59:28,788 (trainer:272) INFO: 46/50epoch started. Estimated time to finish: 1 day, 1 hour and 33 minutes
+[gpub002:0/64] 2023-07-13 19:59:28,792 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub002:0/64] 2023-07-13 19:59:46,914 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 19:59:50,345 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbc6d665de0>)
+[gpub002:0/64] 2023-07-13 19:59:50,345 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub002:0/64] 2023-07-13 19:59:50,352 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 20:04:40,186 (trainer:732) INFO: 46epoch:train:1-100batch: iter_time=1.647, forward_time=0.188, loss_ctc=65.883, loss_att=56.587, acc=0.712, loss=59.376, backward_time=1.048, grad_norm=121.603, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.330e-05, train_time=6.227
+[gpub002:0/64] 2023-07-13 20:06:57,011 (trainer:732) INFO: 46epoch:train:101-200batch: iter_time=1.304e-04, forward_time=0.146, loss_ctc=73.119, loss_att=52.389, acc=0.708, loss=58.608, backward_time=1.033, grad_norm=126.577, clip=100.000, loss_scale=3.375e+32, optim_step_time=0.182, optim0_lr0=5.329e-05, train_time=2.737
+[gpub002:0/64] 2023-07-13 20:09:13,490 (trainer:732) INFO: 46epoch:train:201-300batch: iter_time=1.219e-04, forward_time=0.144, loss_ctc=70.101, loss_att=49.203, acc=0.726, loss=55.472, backward_time=1.027, grad_norm=136.978, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.329e-05, train_time=2.729
+[gpub002:0/64] 2023-07-13 20:11:29,153 (trainer:732) INFO: 46epoch:train:301-400batch: iter_time=1.195e-04, forward_time=0.145, loss_ctc=67.230, loss_att=54.634, acc=0.706, loss=58.413, backward_time=1.026, grad_norm=117.807, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.328e-05, train_time=2.713
+[gpub002:0/64] 2023-07-13 20:13:47,110 (trainer:732) INFO: 46epoch:train:401-500batch: iter_time=1.302e-04, forward_time=0.144, loss_ctc=68.084, loss_att=51.893, acc=0.716, loss=56.751, backward_time=1.029, grad_norm=120.025, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.327e-05, train_time=2.759
+[gpub002:0/64] 2023-07-13 20:16:12,488 (trainer:732) INFO: 46epoch:train:501-600batch: iter_time=1.310e-04, forward_time=0.144, loss_ctc=81.257, loss_att=58.875, acc=0.709, loss=65.590, backward_time=1.037, grad_norm=140.480, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.327e-05, train_time=2.907
+[gpub002:0/64] 2023-07-13 20:18:37,169 (trainer:732) INFO: 46epoch:train:601-700batch: iter_time=1.257e-04, forward_time=0.145, loss_ctc=62.945, loss_att=44.662, acc=0.715, loss=50.147, backward_time=1.036, grad_norm=124.728, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.326e-05, train_time=2.893
+[gpub002:0/64] 2023-07-13 20:21:00,820 (trainer:732) INFO: 46epoch:train:701-800batch: iter_time=1.192e-04, forward_time=0.145, loss_ctc=73.224, loss_att=50.781, acc=0.730, loss=57.514, backward_time=1.034, grad_norm=141.813, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.326e-05, train_time=2.873
+[gpub002:0/64] 2023-07-13 20:22:00,295 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub002:0/64] 2023-07-13 20:22:18,119 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 20:22:21,538 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbb5e311c30>)
+[gpub002:0/64] 2023-07-13 20:22:21,538 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub002:0/64] 2023-07-13 20:22:21,544 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 20:28:54,861 (trainer:732) INFO: 46epoch:train:801-900batch: iter_time=3.233, forward_time=0.196, loss_ctc=66.161, loss_att=51.935, acc=0.720, loss=56.203, backward_time=1.045, grad_norm=126.426, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.190, optim0_lr0=5.325e-05, train_time=9.480
+[gpub002:0/64] 2023-07-13 20:31:12,985 (trainer:732) INFO: 46epoch:train:901-1000batch: iter_time=0.001, forward_time=0.153, loss_ctc=74.152, loss_att=55.993, acc=0.706, loss=61.441, backward_time=1.032, grad_norm=120.946, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.324e-05, train_time=2.763
+[gpub002:0/64] 2023-07-13 20:33:29,468 (trainer:732) INFO: 46epoch:train:1001-1100batch: iter_time=0.001, forward_time=0.146, loss_ctc=65.898, loss_att=48.417, acc=0.733, loss=53.661, backward_time=1.032, grad_norm=117.557, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.324e-05, train_time=2.729
+[gpub002:0/64] 2023-07-13 20:36:03,141 (trainer:732) INFO: 46epoch:train:1101-1200batch: iter_time=2.935e-04, forward_time=0.273, loss_ctc=68.912, loss_att=53.279, acc=0.714, loss=57.969, backward_time=1.053, grad_norm=151.138, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.189, optim0_lr0=5.323e-05, train_time=3.072
+[gpub002:0/64] 2023-07-13 20:38:19,536 (trainer:732) INFO: 46epoch:train:1201-1300batch: iter_time=1.362e-04, forward_time=0.146, loss_ctc=70.800, loss_att=51.181, acc=0.720, loss=57.067, backward_time=1.029, grad_norm=152.229, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.323e-05, train_time=2.729
+[gpub002:0/64] 2023-07-13 20:40:36,527 (trainer:732) INFO: 46epoch:train:1301-1400batch: iter_time=1.269e-04, forward_time=0.150, loss_ctc=73.810, loss_att=54.374, acc=0.723, loss=60.205, backward_time=1.030, grad_norm=167.798, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.322e-05, train_time=2.740
+[gpub002:0/64] 2023-07-13 20:42:52,466 (trainer:732) INFO: 46epoch:train:1401-1500batch: iter_time=1.267e-04, forward_time=0.146, loss_ctc=68.079, loss_att=49.216, acc=0.708, loss=54.875, backward_time=1.026, grad_norm=209.029, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.321e-05, train_time=2.719
+[gpub002:0/64] 2023-07-13 20:45:08,125 (trainer:732) INFO: 46epoch:train:1501-1600batch: iter_time=1.277e-04, forward_time=0.144, loss_ctc=69.590, loss_att=48.587, acc=0.725, loss=54.888, backward_time=1.026, grad_norm=143.890, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.321e-05, train_time=2.713
+[gpub002:0/64] 2023-07-13 20:46:58,016 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub002:0/64] 2023-07-13 20:47:16,447 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 20:47:19,843 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb1bb607700>)
+[gpub002:0/64] 2023-07-13 20:47:19,843 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub002:0/64] 2023-07-13 20:47:19,979 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 20:53:32,464 (trainer:732) INFO: 46epoch:train:1601-1700batch: iter_time=3.523, forward_time=0.178, loss_ctc=68.290, loss_att=51.711, acc=0.723, loss=56.685, backward_time=1.040, grad_norm=114.130, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.184, optim0_lr0=5.320e-05, train_time=10.086
+[gpub002:0/64] 2023-07-13 20:55:49,050 (trainer:732) INFO: 46epoch:train:1701-1800batch: iter_time=1.217e-04, forward_time=0.145, loss_ctc=69.254, loss_att=57.546, acc=0.707, loss=61.058, backward_time=1.030, grad_norm=147.850, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.320e-05, train_time=2.732
+[gpub002:0/64] 2023-07-13 20:58:04,545 (trainer:732) INFO: 46epoch:train:1801-1900batch: iter_time=1.289e-04, forward_time=0.144, loss_ctc=67.059, loss_att=46.493, acc=0.719, loss=52.663, backward_time=1.026, grad_norm=125.550, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.319e-05, train_time=2.710
+[gpub002:0/64] 2023-07-13 21:00:20,791 (trainer:732) INFO: 46epoch:train:1901-2000batch: iter_time=1.198e-04, forward_time=0.145, loss_ctc=67.667, loss_att=50.905, acc=0.727, loss=55.934, backward_time=1.029, grad_norm=127.276, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.318e-05, train_time=2.725
+[gpub002:0/64] 2023-07-13 21:02:36,612 (trainer:732) INFO: 46epoch:train:2001-2100batch: iter_time=1.225e-04, forward_time=0.146, loss_ctc=67.126, loss_att=48.506, acc=0.720, loss=54.092, backward_time=1.028, grad_norm=118.373, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.318e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 21:04:53,111 (trainer:732) INFO: 46epoch:train:2101-2200batch: iter_time=1.325e-04, forward_time=0.148, loss_ctc=72.830, loss_att=55.843, acc=0.709, loss=60.939, backward_time=1.030, grad_norm=138.565, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.317e-05, train_time=2.729
+[gpub002:0/64] 2023-07-13 21:07:08,871 (trainer:732) INFO: 46epoch:train:2201-2300batch: iter_time=1.351e-04, forward_time=0.145, loss_ctc=69.103, loss_att=52.736, acc=0.704, loss=57.646, backward_time=1.028, grad_norm=122.386, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.317e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 21:09:24,380 (trainer:732) INFO: 46epoch:train:2301-2400batch: iter_time=1.543e-04, forward_time=0.145, loss_ctc=63.429, loss_att=44.374, acc=0.725, loss=50.090, backward_time=1.027, grad_norm=140.538, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.316e-05, train_time=2.710
+[gpub002:0/64] 2023-07-13 21:11:48,859 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub002:0/64] 2023-07-13 21:12:06,991 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 21:12:10,395 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabc5af8250>)
+[gpub002:0/64] 2023-07-13 21:12:10,395 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub002:0/64] 2023-07-13 21:12:10,401 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 21:17:51,096 (trainer:732) INFO: 46epoch:train:2401-2500batch: iter_time=1.340, forward_time=0.145, loss_ctc=74.058, loss_att=50.477, acc=0.726, loss=57.551, backward_time=1.066, grad_norm=158.986, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.315e-05, train_time=10.134
+[gpub002:0/64] 2023-07-13 21:20:12,148 (trainer:732) INFO: 46epoch:train:2501-2600batch: iter_time=1.226e-04, forward_time=0.145, loss_ctc=73.951, loss_att=59.611, acc=0.702, loss=63.913, backward_time=1.038, grad_norm=135.632, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.315e-05, train_time=2.821
+[gpub002:0/64] 2023-07-13 21:22:28,950 (trainer:732) INFO: 46epoch:train:2601-2700batch: iter_time=1.170e-04, forward_time=0.143, loss_ctc=65.472, loss_att=47.308, acc=0.721, loss=52.757, backward_time=1.030, grad_norm=119.313, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.314e-05, train_time=2.736
+[gpub002:0/64] 2023-07-13 21:24:44,625 (trainer:732) INFO: 46epoch:train:2701-2800batch: iter_time=1.242e-04, forward_time=0.145, loss_ctc=70.360, loss_att=52.584, acc=0.716, loss=57.916, backward_time=1.028, grad_norm=125.573, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.314e-05, train_time=2.713
+[gpub002:0/64] 2023-07-13 21:27:00,394 (trainer:732) INFO: 46epoch:train:2801-2900batch: iter_time=1.253e-04, forward_time=0.146, loss_ctc=66.237, loss_att=48.990, acc=0.715, loss=54.164, backward_time=1.028, grad_norm=119.418, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.313e-05, train_time=2.715
+[gpub002:0/64] 2023-07-13 21:29:16,078 (trainer:732) INFO: 46epoch:train:2901-3000batch: iter_time=1.298e-04, forward_time=0.146, loss_ctc=69.138, loss_att=52.011, acc=0.715, loss=57.149, backward_time=1.028, grad_norm=140.082, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.312e-05, train_time=2.713
+[gpub002:0/64] 2023-07-13 21:31:31,702 (trainer:732) INFO: 46epoch:train:3001-3100batch: iter_time=1.423e-04, forward_time=0.145, loss_ctc=69.682, loss_att=52.735, acc=0.704, loss=57.819, backward_time=1.028, grad_norm=112.062, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.312e-05, train_time=2.712
+[gpub002:0/64] 2023-07-13 21:33:47,412 (trainer:732) INFO: 46epoch:train:3101-3200batch: iter_time=1.092e-04, forward_time=0.146, loss_ctc=67.778, loss_att=47.656, acc=0.722, loss=53.693, backward_time=1.027, grad_norm=128.979, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.311e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 21:36:02,959 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-13 21:36:02,969 (trainer:732) INFO: 46epoch:train:3201-3300batch: iter_time=1.148e-04, forward_time=0.146, loss_ctc=74.814, loss_att=52.797, acc=0.718, loss=59.402, backward_time=1.028, grad_norm=125.040, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.311e-05, train_time=2.711
+[gpub002:0/64] 2023-07-13 21:36:49,099 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub002:0/64] 2023-07-13 21:37:07,122 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 21:37:10,547 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd2f8534c0>)
+[gpub002:0/64] 2023-07-13 21:37:10,547 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub002:0/64] 2023-07-13 21:37:10,663 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 21:42:10,676 (trainer:732) INFO: 46epoch:train:3301-3400batch: iter_time=1.578, forward_time=0.207, loss_ctc=67.011, loss_att=54.152, acc=0.720, loss=58.010, backward_time=1.045, grad_norm=167.072, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.310e-05, train_time=7.354
+[gpub002:0/64] 2023-07-13 21:44:26,985 (trainer:732) INFO: 46epoch:train:3401-3500batch: iter_time=1.279e-04, forward_time=0.146, loss_ctc=73.361, loss_att=54.970, acc=0.712, loss=60.487, backward_time=1.029, grad_norm=126.355, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.309e-05, train_time=2.726
+[gpub002:0/64] 2023-07-13 21:46:42,557 (trainer:732) INFO: 46epoch:train:3501-3600batch: iter_time=1.298e-04, forward_time=0.145, loss_ctc=64.685, loss_att=47.711, acc=0.736, loss=52.804, backward_time=1.026, grad_norm=107.177, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.309e-05, train_time=2.711
+[gpub002:0/64] 2023-07-13 21:48:58,259 (trainer:732) INFO: 46epoch:train:3601-3700batch: iter_time=1.307e-04, forward_time=0.146, loss_ctc=68.193, loss_att=53.136, acc=0.717, loss=57.653, backward_time=1.026, grad_norm=121.607, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.308e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 21:51:15,506 (trainer:732) INFO: 46epoch:train:3701-3800batch: iter_time=1.059e-04, forward_time=0.147, loss_ctc=69.434, loss_att=50.735, acc=0.724, loss=56.345, backward_time=1.030, grad_norm=108.644, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.308e-05, train_time=2.744
+[gpub002:0/64] 2023-07-13 21:53:38,632 (trainer:732) INFO: 46epoch:train:3801-3900batch: iter_time=1.193e-04, forward_time=0.157, loss_ctc=71.575, loss_att=53.589, acc=0.723, loss=58.985, backward_time=1.046, grad_norm=142.656, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.307e-05, train_time=2.863
+[gpub002:0/64] 2023-07-13 21:55:57,708 (trainer:732) INFO: 46epoch:train:3901-4000batch: iter_time=1.164e-04, forward_time=0.146, loss_ctc=67.232, loss_att=49.694, acc=0.711, loss=54.956, backward_time=1.033, grad_norm=118.653, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.306e-05, train_time=2.781
+[gpub002:0/64] 2023-07-13 21:58:13,850 (trainer:732) INFO: 46epoch:train:4001-4100batch: iter_time=1.079e-04, forward_time=0.145, loss_ctc=69.507, loss_att=48.626, acc=0.726, loss=54.891, backward_time=1.030, grad_norm=115.628, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.306e-05, train_time=2.723
+[gpub002:0/64] 2023-07-13 22:00:07,007 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub002:0/64] 2023-07-13 22:00:25,302 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 22:00:28,740 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd4c8e3fd0>)
+[gpub002:0/64] 2023-07-13 22:00:28,740 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub002:0/64] 2023-07-13 22:00:28,746 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 22:06:11,914 (trainer:732) INFO: 46epoch:train:4101-4200batch: iter_time=3.280, forward_time=0.182, loss_ctc=67.564, loss_att=50.602, acc=0.727, loss=55.691, backward_time=1.048, grad_norm=124.825, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.305e-05, train_time=9.561
+[gpub002:0/64] 2023-07-13 22:08:28,453 (trainer:732) INFO: 46epoch:train:4201-4300batch: iter_time=1.204e-04, forward_time=0.144, loss_ctc=69.532, loss_att=59.594, acc=0.704, loss=62.576, backward_time=1.031, grad_norm=120.778, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.305e-05, train_time=2.731
+[gpub002:0/64] 2023-07-13 22:10:44,255 (trainer:732) INFO: 46epoch:train:4301-4400batch: iter_time=1.145e-04, forward_time=0.144, loss_ctc=67.758, loss_att=47.109, acc=0.721, loss=53.304, backward_time=1.027, grad_norm=107.527, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.304e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 22:12:59,720 (trainer:732) INFO: 46epoch:train:4401-4500batch: iter_time=1.420e-04, forward_time=0.144, loss_ctc=68.105, loss_att=51.162, acc=0.727, loss=56.245, backward_time=1.025, grad_norm=134.544, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.303e-05, train_time=2.709
+[gpub002:0/64] 2023-07-13 22:15:15,435 (trainer:732) INFO: 46epoch:train:4501-4600batch: iter_time=1.536e-04, forward_time=0.146, loss_ctc=67.447, loss_att=49.089, acc=0.720, loss=54.596, backward_time=1.028, grad_norm=123.003, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.303e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 22:17:31,236 (trainer:732) INFO: 46epoch:train:4601-4700batch: iter_time=1.373e-04, forward_time=0.146, loss_ctc=72.378, loss_att=55.337, acc=0.710, loss=60.450, backward_time=1.028, grad_norm=127.410, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.302e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 22:19:46,893 (trainer:732) INFO: 46epoch:train:4701-4800batch: iter_time=1.286e-04, forward_time=0.146, loss_ctc=68.304, loss_att=52.814, acc=0.705, loss=57.461, backward_time=1.027, grad_norm=115.040, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.302e-05, train_time=2.713
+[gpub002:0/64] 2023-07-13 22:22:02,247 (trainer:732) INFO: 46epoch:train:4801-4900batch: iter_time=1.396e-04, forward_time=0.145, loss_ctc=62.431, loss_att=43.727, acc=0.729, loss=49.338, backward_time=1.025, grad_norm=120.051, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.301e-05, train_time=2.707
+[gpub002:0/64] 2023-07-13 22:24:18,146 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub002:0/64] 2023-07-13 22:24:36,789 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 22:24:40,257 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fac349cf8e0>)
+[gpub002:0/64] 2023-07-13 22:24:40,257 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub002:0/64] 2023-07-13 22:24:40,263 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 22:31:08,903 (trainer:732) INFO: 46epoch:train:4901-5000batch: iter_time=1.286, forward_time=0.146, loss_ctc=71.786, loss_att=49.940, acc=0.728, loss=56.494, backward_time=1.039, grad_norm=110.395, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.300e-05, train_time=10.932
+[gpub002:0/64] 2023-07-13 22:33:27,419 (trainer:732) INFO: 46epoch:train:5001-5100batch: iter_time=1.223e-04, forward_time=0.149, loss_ctc=64.751, loss_att=53.246, acc=0.721, loss=56.697, backward_time=1.037, grad_norm=99.967, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.300e-05, train_time=2.771
+[gpub002:0/64] 2023-07-13 22:35:43,951 (trainer:732) INFO: 46epoch:train:5101-5200batch: iter_time=1.150e-04, forward_time=0.144, loss_ctc=72.158, loss_att=51.378, acc=0.713, loss=57.612, backward_time=1.028, grad_norm=111.287, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.299e-05, train_time=2.730
+[gpub002:0/64] 2023-07-13 22:37:59,756 (trainer:732) INFO: 46epoch:train:5201-5300batch: iter_time=1.149e-04, forward_time=0.145, loss_ctc=67.483, loss_att=47.864, acc=0.735, loss=53.750, backward_time=1.028, grad_norm=125.499, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.299e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 22:40:23,841 (trainer:732) INFO: 46epoch:train:5301-5400batch: iter_time=1.153e-04, forward_time=0.145, loss_ctc=65.944, loss_att=51.781, acc=0.714, loss=56.030, backward_time=1.042, grad_norm=118.487, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.298e-05, train_time=2.881
+[gpub002:0/64] 2023-07-13 22:42:39,494 (trainer:732) INFO: 46epoch:train:5401-5500batch: iter_time=1.323e-04, forward_time=0.145, loss_ctc=68.524, loss_att=52.761, acc=0.713, loss=57.490, backward_time=1.028, grad_norm=118.967, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.297e-05, train_time=2.713
+[gpub002:0/64] 2023-07-13 22:44:55,465 (trainer:732) INFO: 46epoch:train:5501-5600batch: iter_time=1.347e-04, forward_time=0.146, loss_ctc=75.505, loss_att=56.510, acc=0.706, loss=62.208, backward_time=1.031, grad_norm=124.408, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.297e-05, train_time=2.719
+[gpub002:0/64] 2023-07-13 22:47:10,920 (trainer:732) INFO: 46epoch:train:5601-5700batch: iter_time=1.452e-04, forward_time=0.145, loss_ctc=62.459, loss_att=43.488, acc=0.724, loss=49.180, backward_time=1.026, grad_norm=107.132, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.296e-05, train_time=2.709
+[gpub002:0/64] 2023-07-13 22:49:26,868 (trainer:732) INFO: 46epoch:train:5701-5800batch: iter_time=1.384e-04, forward_time=0.146, loss_ctc=72.636, loss_att=50.457, acc=0.737, loss=57.111, backward_time=1.030, grad_norm=130.378, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.296e-05, train_time=2.719
+[gpub002:0/64] 2023-07-13 22:50:15,326 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub002:0/64] 2023-07-13 22:50:33,588 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 22:50:37,036 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd4c804af0>)
+[gpub002:0/64] 2023-07-13 22:50:37,036 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub002:0/64] 2023-07-13 22:50:37,042 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 22:58:22,565 (trainer:732) INFO: 46epoch:train:5801-5900batch: iter_time=1.406, forward_time=0.196, loss_ctc=70.088, loss_att=56.947, acc=0.697, loss=60.889, backward_time=1.041, grad_norm=108.830, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.295e-05, train_time=10.713
+[gpub002:0/64] 2023-07-13 23:00:39,434 (trainer:732) INFO: 46epoch:train:5901-6000batch: iter_time=1.184e-04, forward_time=0.146, loss_ctc=69.077, loss_att=49.338, acc=0.718, loss=55.259, backward_time=1.029, grad_norm=127.466, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.295e-05, train_time=2.738
+[gpub002:0/64] 2023-07-13 23:02:54,972 (trainer:732) INFO: 46epoch:train:6001-6100batch: iter_time=1.176e-04, forward_time=0.145, loss_ctc=66.432, loss_att=45.825, acc=0.735, loss=52.007, backward_time=1.025, grad_norm=111.877, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.294e-05, train_time=2.711
+[gpub002:0/64] 2023-07-13 23:05:10,758 (trainer:732) INFO: 46epoch:train:6101-6200batch: iter_time=1.257e-04, forward_time=0.146, loss_ctc=68.644, loss_att=53.212, acc=0.712, loss=57.841, backward_time=1.028, grad_norm=126.634, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.293e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 23:07:27,287 (trainer:732) INFO: 46epoch:train:6201-6300batch: iter_time=1.196e-04, forward_time=0.145, loss_ctc=68.525, loss_att=54.107, acc=0.705, loss=58.432, backward_time=1.032, grad_norm=130.338, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.293e-05, train_time=2.730
+[gpub002:0/64] 2023-07-13 23:09:42,933 (trainer:732) INFO: 46epoch:train:6301-6400batch: iter_time=1.285e-04, forward_time=0.145, loss_ctc=73.030, loss_att=55.687, acc=0.706, loss=60.890, backward_time=1.028, grad_norm=122.293, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.292e-05, train_time=2.713
+[gpub002:0/64] 2023-07-13 23:11:58,473 (trainer:732) INFO: 46epoch:train:6401-6500batch: iter_time=1.294e-04, forward_time=0.145, loss_ctc=62.489, loss_att=42.039, acc=0.728, loss=48.174, backward_time=1.027, grad_norm=110.857, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.292e-05, train_time=2.711
+[gpub002:0/64] 2023-07-13 23:14:18,395 (trainer:732) INFO: 46epoch:train:6501-6600batch: iter_time=1.265e-04, forward_time=0.145, loss_ctc=70.293, loss_att=50.054, acc=0.730, loss=56.126, backward_time=1.033, grad_norm=116.507, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.291e-05, train_time=2.798
+[gpub002:0/64] 2023-07-13 23:16:08,051 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub002:0/64] 2023-07-13 23:16:26,216 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 23:16:29,635 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb29dac7790>)
+[gpub002:0/64] 2023-07-13 23:16:29,635 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub002:0/64] 2023-07-13 23:16:29,682 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 23:21:49,471 (trainer:732) INFO: 46epoch:train:6601-6700batch: iter_time=3.060, forward_time=0.146, loss_ctc=72.928, loss_att=58.616, acc=0.707, loss=62.910, backward_time=1.044, grad_norm=131.228, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.290e-05, train_time=9.021
+[gpub002:0/64] 2023-07-13 23:24:06,689 (trainer:732) INFO: 46epoch:train:6701-6800batch: iter_time=1.253e-04, forward_time=0.146, loss_ctc=72.161, loss_att=56.066, acc=0.711, loss=60.894, backward_time=1.031, grad_norm=141.373, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.290e-05, train_time=2.744
+[gpub002:0/64] 2023-07-13 23:26:23,450 (trainer:732) INFO: 46epoch:train:6801-6900batch: iter_time=1.254e-04, forward_time=0.147, loss_ctc=66.501, loss_att=47.899, acc=0.733, loss=53.480, backward_time=1.029, grad_norm=134.963, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.289e-05, train_time=2.735
+[gpub002:0/64] 2023-07-13 23:28:50,171 (trainer:732) INFO: 46epoch:train:6901-7000batch: iter_time=1.255e-04, forward_time=0.145, loss_ctc=67.438, loss_att=53.891, acc=0.724, loss=57.955, backward_time=1.034, grad_norm=109.976, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.289e-05, train_time=2.934
+[gpub002:0/64] 2023-07-13 23:31:05,996 (trainer:732) INFO: 46epoch:train:7001-7100batch: iter_time=1.261e-04, forward_time=0.145, loss_ctc=66.567, loss_att=49.018, acc=0.721, loss=54.283, backward_time=1.028, grad_norm=118.826, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.288e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 23:33:21,934 (trainer:732) INFO: 46epoch:train:7101-7200batch: iter_time=1.132e-04, forward_time=0.144, loss_ctc=73.747, loss_att=54.406, acc=0.722, loss=60.208, backward_time=1.029, grad_norm=143.217, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.287e-05, train_time=2.719
+[gpub002:0/64] 2023-07-13 23:35:37,952 (trainer:732) INFO: 46epoch:train:7201-7300batch: iter_time=1.097e-04, forward_time=0.146, loss_ctc=64.890, loss_att=47.420, acc=0.716, loss=52.661, backward_time=1.031, grad_norm=122.847, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.287e-05, train_time=2.720
+[gpub002:0/64] 2023-07-13 23:37:54,841 (trainer:732) INFO: 46epoch:train:7301-7400batch: iter_time=1.090e-04, forward_time=0.145, loss_ctc=69.972, loss_att=48.798, acc=0.732, loss=55.150, backward_time=1.031, grad_norm=107.927, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.286e-05, train_time=2.738
+[gpub002:0/64] 2023-07-13 23:40:11,039 (trainer:732) INFO: 46epoch:train:7401-7500batch: iter_time=1.048e-04, forward_time=0.146, loss_ctc=72.249, loss_att=55.096, acc=0.722, loss=60.242, backward_time=1.030, grad_norm=116.626, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.286e-05, train_time=2.724
+[gpub002:0/64] 2023-07-13 23:40:12,748 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub002:0/64] 2023-07-13 23:40:30,664 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 23:40:34,096 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabbc827520>)
+[gpub002:0/64] 2023-07-13 23:40:34,096 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub002:0/64] 2023-07-13 23:40:34,102 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 23:46:45,686 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-13 23:48:38,986 (trainer:732) INFO: 46epoch:train:7501-7600batch: iter_time=1.321, forward_time=0.191, loss_ctc=65.367, loss_att=55.036, acc=0.722, loss=58.135, backward_time=1.048, grad_norm=119.689, clip=100.000, loss_scale=3.775e+32, optim_step_time=0.186, optim0_lr0=5.285e-05, train_time=10.159
+[gpub002:0/64] 2023-07-13 23:50:55,149 (trainer:732) INFO: 46epoch:train:7601-7700batch: iter_time=1.241e-04, forward_time=0.143, loss_ctc=72.022, loss_att=51.304, acc=0.717, loss=57.519, backward_time=1.028, grad_norm=133.215, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.284e-05, train_time=2.723
+[gpub002:0/64] 2023-07-13 23:53:11,294 (trainer:732) INFO: 46epoch:train:7701-7800batch: iter_time=1.063e-04, forward_time=0.144, loss_ctc=67.643, loss_att=47.760, acc=0.736, loss=53.725, backward_time=1.028, grad_norm=130.713, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.284e-05, train_time=2.723
+[gpub002:0/64] 2023-07-13 23:55:28,212 (trainer:732) INFO: 46epoch:train:7801-7900batch: iter_time=1.197e-04, forward_time=0.145, loss_ctc=64.828, loss_att=52.504, acc=0.715, loss=56.201, backward_time=1.026, grad_norm=142.216, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.283e-05, train_time=2.738
+[gpub002:0/64] 2023-07-13 23:57:43,849 (trainer:732) INFO: 46epoch:train:7901-8000batch: iter_time=1.242e-04, forward_time=0.145, loss_ctc=67.229, loss_att=51.224, acc=0.726, loss=56.026, backward_time=1.027, grad_norm=115.103, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.283e-05, train_time=2.713
+[gpub002:0/64] 2023-07-13 23:59:59,805 (trainer:732) INFO: 46epoch:train:8001-8100batch: iter_time=1.158e-04, forward_time=0.145, loss_ctc=73.639, loss_att=56.195, acc=0.717, loss=61.428, backward_time=1.028, grad_norm=125.297, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.282e-05, train_time=2.719
+[gpub002:0/64] 2023-07-14 00:02:20,477 (trainer:732) INFO: 46epoch:train:8101-8200batch: iter_time=1.146e-04, forward_time=0.166, loss_ctc=62.564, loss_att=44.204, acc=0.720, loss=49.712, backward_time=1.037, grad_norm=108.944, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.282e-05, train_time=2.813
+[gpub002:0/64] 2023-07-14 00:04:43,201 (trainer:732) INFO: 46epoch:train:8201-8300batch: iter_time=1.046e-04, forward_time=0.150, loss_ctc=71.384, loss_att=50.456, acc=0.737, loss=56.734, backward_time=1.035, grad_norm=124.346, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.281e-05, train_time=2.854
+[gpub002:0/64] 2023-07-14 00:05:53,094 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub002:0/64] 2023-07-14 00:06:11,216 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 00:06:14,920 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fac5a50f4f0>)
+[gpub002:0/64] 2023-07-14 00:06:14,920 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub002:0/64] 2023-07-14 00:06:14,927 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 00:13:47,812 (trainer:732) INFO: 46epoch:train:8301-8400batch: iter_time=3.876, forward_time=0.196, loss_ctc=69.752, loss_att=55.635, acc=0.712, loss=59.870, backward_time=1.068, grad_norm=118.005, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=5.280e-05, train_time=10.891
+[gpub002:0/64] 2023-07-14 00:16:05,245 (trainer:732) INFO: 46epoch:train:8401-8500batch: iter_time=1.318e-04, forward_time=0.145, loss_ctc=67.335, loss_att=48.827, acc=0.725, loss=54.379, backward_time=1.027, grad_norm=107.394, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.280e-05, train_time=2.749
+[gpub002:0/64] 2023-07-14 00:18:21,297 (trainer:732) INFO: 46epoch:train:8501-8600batch: iter_time=1.145e-04, forward_time=0.145, loss_ctc=67.132, loss_att=46.641, acc=0.733, loss=52.788, backward_time=1.028, grad_norm=107.093, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.279e-05, train_time=2.721
+[gpub002:0/64] 2023-07-14 00:20:42,957 (trainer:732) INFO: 46epoch:train:8601-8700batch: iter_time=1.030e-04, forward_time=0.145, loss_ctc=66.963, loss_att=52.861, acc=0.720, loss=57.092, backward_time=1.048, grad_norm=130.169, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.279e-05, train_time=2.833
+[gpub002:0/64] 2023-07-14 00:23:00,099 (trainer:732) INFO: 46epoch:train:8701-8800batch: iter_time=1.097e-04, forward_time=0.146, loss_ctc=70.472, loss_att=52.762, acc=0.723, loss=58.075, backward_time=1.032, grad_norm=139.851, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.278e-05, train_time=2.743
+[gpub002:0/64] 2023-07-14 00:25:20,177 (trainer:732) INFO: 46epoch:train:8801-8900batch: iter_time=1.112e-04, forward_time=0.146, loss_ctc=71.545, loss_att=54.284, acc=0.718, loss=59.463, backward_time=1.034, grad_norm=122.838, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.277e-05, train_time=2.801
+[gpub002:0/64] 2023-07-14 00:27:37,496 (trainer:732) INFO: 46epoch:train:8901-9000batch: iter_time=1.124e-04, forward_time=0.147, loss_ctc=62.966, loss_att=43.277, acc=0.727, loss=49.184, backward_time=1.032, grad_norm=110.704, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.277e-05, train_time=2.746
+[gpub002:0/64] 2023-07-14 00:29:53,320 (trainer:732) INFO: 46epoch:train:9001-9100batch: iter_time=1.331e-04, forward_time=0.145, loss_ctc=71.349, loss_att=50.693, acc=0.732, loss=56.890, backward_time=1.028, grad_norm=112.882, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.276e-05, train_time=2.716
+[gpub002:0/64] 2023-07-14 00:31:47,729 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub002:0/64] 2023-07-14 00:32:06,387 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 00:32:09,845 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fac12703b50>)
+[gpub002:0/64] 2023-07-14 00:32:09,845 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub002:0/64] 2023-07-14 00:32:09,852 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 00:38:23,995 (trainer:732) INFO: 46epoch:train:9101-9200batch: iter_time=3.641, forward_time=0.187, loss_ctc=72.415, loss_att=55.674, acc=0.717, loss=60.696, backward_time=1.057, grad_norm=127.477, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.276e-05, train_time=10.213
+[gpub002:0/64] 2023-07-14 00:40:40,945 (trainer:732) INFO: 46epoch:train:9201-9300batch: iter_time=1.248e-04, forward_time=0.145, loss_ctc=67.604, loss_att=57.124, acc=0.707, loss=60.268, backward_time=1.028, grad_norm=127.038, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.275e-05, train_time=2.739
+[gpub002:0/64] 2023-07-14 00:42:57,963 (trainer:732) INFO: 46epoch:train:9301-9400batch: iter_time=1.030e-04, forward_time=0.143, loss_ctc=66.251, loss_att=46.442, acc=0.724, loss=52.385, backward_time=1.027, grad_norm=112.768, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.274e-05, train_time=2.740
+[gpub002:0/64] 2023-07-14 00:45:13,954 (trainer:732) INFO: 46epoch:train:9401-9500batch: iter_time=1.018e-04, forward_time=0.144, loss_ctc=67.687, loss_att=51.450, acc=0.727, loss=56.321, backward_time=1.026, grad_norm=130.442, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.274e-05, train_time=2.718
+[gpub002:0/64] 2023-07-14 00:47:30,437 (trainer:732) INFO: 46epoch:train:9501-9600batch: iter_time=1.065e-04, forward_time=0.147, loss_ctc=66.976, loss_att=48.132, acc=0.723, loss=53.785, backward_time=1.028, grad_norm=111.136, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.273e-05, train_time=2.731
+[gpub002:0/64] 2023-07-14 00:49:46,162 (trainer:732) INFO: 46epoch:train:9601-9700batch: iter_time=1.500e-04, forward_time=0.146, loss_ctc=73.129, loss_att=56.047, acc=0.713, loss=61.172, backward_time=1.028, grad_norm=121.197, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.273e-05, train_time=2.714
+[gpub002:0/64] 2023-07-14 00:52:01,715 (trainer:732) INFO: 46epoch:train:9701-9800batch: iter_time=1.470e-04, forward_time=0.146, loss_ctc=68.415, loss_att=54.013, acc=0.702, loss=58.333, backward_time=1.027, grad_norm=120.706, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.272e-05, train_time=2.711
+[gpub002:0/64] 2023-07-14 00:54:17,220 (trainer:732) INFO: 46epoch:train:9801-9900batch: iter_time=1.497e-04, forward_time=0.146, loss_ctc=63.092, loss_att=45.005, acc=0.726, loss=50.431, backward_time=1.027, grad_norm=115.827, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.272e-05, train_time=2.710
+[gpub002:0/64] 2023-07-14 00:56:33,163 (trainer:732) INFO: 46epoch:train:9901-10000batch: iter_time=1.520e-04, forward_time=0.147, loss_ctc=71.355, loss_att=49.992, acc=0.729, loss=56.401, backward_time=1.029, grad_norm=110.394, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.271e-05, train_time=2.719
+[gpub002:0/64] 2023-07-14 01:10:58,653 (trainer:338) INFO: 46epoch results: [train] iter_time=0.292, forward_time=0.151, loss_ctc=68.954, loss_att=51.397, acc=0.719, loss=56.664, backward_time=1.032, grad_norm=125.536, clip=100.000, loss_scale=4.322e+32, optim_step_time=0.183, optim0_lr0=5.300e-05, train_time=3.565, time=4 hours, 57 minutes and 19.06 seconds, total_count=430000, gpu_max_cached_mem_GB=37.574, [valid] loss_ctc=43.169, cer_ctc=0.254, loss_att=38.108, acc=0.681, cer=0.392, wer=0.992, loss=39.627, time=8 minutes and 21.07 seconds, total_count=44022, gpu_max_cached_mem_GB=37.574, [att_plot] time=5 minutes and 49.73 seconds, total_count=0, gpu_max_cached_mem_GB=37.574
+[gpub002:0/64] 2023-07-14 01:11:14,472 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub002:0/64] 2023-07-14 01:11:14,537 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/41epoch.pth
+[gpub002:0/64] 2023-07-14 01:11:14,537 (trainer:272) INFO: 47/50epoch started. Estimated time to finish: 20 hours, 29 minutes and 24.2 seconds
+[gpub002:0/64] 2023-07-14 01:11:14,540 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub002:0/64] 2023-07-14 01:11:32,416 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 01:11:35,757 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb6abec2a70>)
+[gpub002:0/64] 2023-07-14 01:11:35,757 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub002:0/64] 2023-07-14 01:11:35,764 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 01:15:48,585 (trainer:732) INFO: 47epoch:train:1-100batch: iter_time=1.312, forward_time=0.182, loss_ctc=75.825, loss_att=61.109, acc=0.689, loss=65.524, backward_time=1.041, grad_norm=158.964, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.270e-05, train_time=5.481
+[gpub002:0/64] 2023-07-14 01:18:27,141 (trainer:732) INFO: 47epoch:train:101-200batch: iter_time=8.032e-04, forward_time=0.180, loss_ctc=64.299, loss_att=48.764, acc=0.701, loss=53.424, backward_time=1.052, grad_norm=127.674, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.270e-05, train_time=3.170
+[gpub002:0/64] 2023-07-14 01:20:57,571 (trainer:732) INFO: 47epoch:train:201-300batch: iter_time=1.242e-04, forward_time=0.156, loss_ctc=68.624, loss_att=50.174, acc=0.700, loss=55.709, backward_time=1.042, grad_norm=132.411, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.269e-05, train_time=3.010
+[gpub002:0/64] 2023-07-14 01:23:21,420 (trainer:732) INFO: 47epoch:train:301-400batch: iter_time=1.294e-04, forward_time=0.147, loss_ctc=73.786, loss_att=55.907, acc=0.690, loss=61.271, backward_time=1.039, grad_norm=144.858, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.269e-05, train_time=2.876
+[gpub002:0/64] 2023-07-14 01:25:56,177 (trainer:732) INFO: 47epoch:train:401-500batch: iter_time=1.221e-04, forward_time=0.145, loss_ctc=65.105, loss_att=48.139, acc=0.706, loss=53.229, backward_time=1.057, grad_norm=144.107, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.268e-05, train_time=3.096
+[gpub002:0/64] 2023-07-14 01:28:14,232 (trainer:732) INFO: 47epoch:train:501-600batch: iter_time=1.183e-04, forward_time=0.146, loss_ctc=71.910, loss_att=51.661, acc=0.724, loss=57.735, backward_time=1.032, grad_norm=145.246, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.267e-05, train_time=2.761
+[gpub002:0/64] 2023-07-14 01:30:34,563 (trainer:732) INFO: 47epoch:train:601-700batch: iter_time=1.124e-04, forward_time=0.143, loss_ctc=66.419, loss_att=46.645, acc=0.717, loss=52.577, backward_time=1.042, grad_norm=116.967, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.267e-05, train_time=2.806
+[gpub002:0/64] 2023-07-14 01:32:55,156 (trainer:732) INFO: 47epoch:train:701-800batch: iter_time=1.111e-04, forward_time=0.144, loss_ctc=75.585, loss_att=57.869, acc=0.701, loss=63.184, backward_time=1.031, grad_norm=132.340, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.266e-05, train_time=2.812
+[gpub002:0/64] 2023-07-14 01:33:50,705 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub002:0/64] 2023-07-14 01:34:08,334 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 01:34:11,683 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad40917c40>)
+[gpub002:0/64] 2023-07-14 01:34:11,683 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub002:0/64] 2023-07-14 01:34:11,703 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 01:40:02,226 (trainer:732) INFO: 47epoch:train:801-900batch: iter_time=2.580, forward_time=0.174, loss_ctc=75.755, loss_att=60.417, acc=0.703, loss=65.018, backward_time=1.045, grad_norm=133.637, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.266e-05, train_time=8.541
+[gpub002:0/64] 2023-07-14 01:42:20,381 (trainer:732) INFO: 47epoch:train:901-1000batch: iter_time=1.239e-04, forward_time=0.146, loss_ctc=69.051, loss_att=52.908, acc=0.716, loss=57.751, backward_time=1.030, grad_norm=128.359, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.265e-05, train_time=2.763
+[gpub002:0/64] 2023-07-14 01:44:36,647 (trainer:732) INFO: 47epoch:train:1001-1100batch: iter_time=1.291e-04, forward_time=0.145, loss_ctc=67.260, loss_att=48.787, acc=0.713, loss=54.329, backward_time=1.033, grad_norm=133.083, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.264e-05, train_time=2.725
+[gpub002:0/64] 2023-07-14 01:46:52,658 (trainer:732) INFO: 47epoch:train:1101-1200batch: iter_time=1.208e-04, forward_time=0.146, loss_ctc=69.474, loss_att=52.446, acc=0.702, loss=57.555, backward_time=1.027, grad_norm=149.543, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.264e-05, train_time=2.720
+[gpub002:0/64] 2023-07-14 01:49:08,730 (trainer:732) INFO: 47epoch:train:1201-1300batch: iter_time=1.289e-04, forward_time=0.146, loss_ctc=63.276, loss_att=48.489, acc=0.718, loss=52.925, backward_time=1.028, grad_norm=139.425, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.263e-05, train_time=2.721
+[gpub002:0/64] 2023-07-14 01:51:24,328 (trainer:732) INFO: 47epoch:train:1301-1400batch: iter_time=1.570e-04, forward_time=0.145, loss_ctc=72.456, loss_att=49.418, acc=0.727, loss=56.329, backward_time=1.028, grad_norm=130.808, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.263e-05, train_time=2.712
+[gpub002:0/64] 2023-07-14 01:53:40,003 (trainer:732) INFO: 47epoch:train:1401-1500batch: iter_time=1.235e-04, forward_time=0.146, loss_ctc=63.452, loss_att=48.009, acc=0.722, loss=52.642, backward_time=1.027, grad_norm=121.157, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.262e-05, train_time=2.713
+[gpub002:0/64] 2023-07-14 01:55:56,022 (trainer:732) INFO: 47epoch:train:1501-1600batch: iter_time=1.239e-04, forward_time=0.146, loss_ctc=78.127, loss_att=57.946, acc=0.718, loss=64.000, backward_time=1.030, grad_norm=110.143, clip=100.000, loss_scale=5.906e+32, optim_step_time=0.183, optim0_lr0=5.262e-05, train_time=2.720
+[gpub002:0/64] 2023-07-14 01:57:27,328 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub002:0/64] 2023-07-14 01:57:45,350 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 01:57:48,763 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad3033bfd0>)
+[gpub002:0/64] 2023-07-14 01:57:48,763 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub002:0/64] 2023-07-14 01:57:48,769 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 02:02:30,932 (trainer:732) INFO: 47epoch:train:1601-1700batch: iter_time=1.331, forward_time=0.182, loss_ctc=77.305, loss_att=58.961, acc=0.710, loss=64.464, backward_time=1.043, grad_norm=114.992, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.184, optim0_lr0=5.261e-05, train_time=7.898
+[gpub002:0/64] 2023-07-14 02:04:47,565 (trainer:732) INFO: 47epoch:train:1701-1800batch: iter_time=1.311e-04, forward_time=0.146, loss_ctc=68.654, loss_att=53.205, acc=0.712, loss=57.840, backward_time=1.031, grad_norm=151.268, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.260e-05, train_time=2.732
+[gpub002:0/64] 2023-07-14 02:07:03,513 (trainer:732) INFO: 47epoch:train:1801-1900batch: iter_time=1.282e-04, forward_time=0.146, loss_ctc=65.765, loss_att=46.697, acc=0.724, loss=52.417, backward_time=1.029, grad_norm=120.537, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.260e-05, train_time=2.719
+[gpub002:0/64] 2023-07-14 02:09:20,232 (trainer:732) INFO: 47epoch:train:1901-2000batch: iter_time=1.057e-04, forward_time=0.145, loss_ctc=67.723, loss_att=50.397, acc=0.716, loss=55.595, backward_time=1.030, grad_norm=131.389, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.259e-05, train_time=2.734
+[gpub002:0/64] 2023-07-14 02:11:36,110 (trainer:732) INFO: 47epoch:train:2001-2100batch: iter_time=1.112e-04, forward_time=0.145, loss_ctc=69.804, loss_att=52.306, acc=0.713, loss=57.555, backward_time=1.028, grad_norm=112.445, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.259e-05, train_time=2.717
+[gpub002:0/64] 2023-07-14 02:13:51,747 (trainer:732) INFO: 47epoch:train:2101-2200batch: iter_time=1.098e-04, forward_time=0.144, loss_ctc=69.056, loss_att=49.240, acc=0.712, loss=55.185, backward_time=1.025, grad_norm=113.889, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.258e-05, train_time=2.713
+[gpub002:0/64] 2023-07-14 02:16:07,448 (trainer:732) INFO: 47epoch:train:2201-2300batch: iter_time=1.067e-04, forward_time=0.144, loss_ctc=67.405, loss_att=49.335, acc=0.732, loss=54.756, backward_time=1.026, grad_norm=125.340, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.258e-05, train_time=2.714
+[gpub002:0/64] 2023-07-14 02:18:23,460 (trainer:732) INFO: 47epoch:train:2301-2400batch: iter_time=1.052e-04, forward_time=0.145, loss_ctc=71.576, loss_att=54.166, acc=0.723, loss=59.389, backward_time=1.028, grad_norm=130.633, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.257e-05, train_time=2.720
+[gpub002:0/64] 2023-07-14 02:20:48,284 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub002:0/64] 2023-07-14 02:21:06,460 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 02:21:09,867 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad3ff7f460>)
+[gpub002:0/64] 2023-07-14 02:21:09,868 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub002:0/64] 2023-07-14 02:21:09,874 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 02:27:11,938 (trainer:732) INFO: 47epoch:train:2401-2500batch: iter_time=1.320, forward_time=0.168, loss_ctc=72.393, loss_att=54.066, acc=0.713, loss=59.565, backward_time=1.066, grad_norm=115.635, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.256e-05, train_time=10.569
+[gpub002:0/64] 2023-07-14 02:29:29,559 (trainer:732) INFO: 47epoch:train:2501-2600batch: iter_time=1.496e-04, forward_time=0.146, loss_ctc=73.743, loss_att=59.618, acc=0.696, loss=63.855, backward_time=1.035, grad_norm=134.162, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.256e-05, train_time=2.752
+[gpub002:0/64] 2023-07-14 02:31:45,140 (trainer:732) INFO: 47epoch:train:2601-2700batch: iter_time=1.647e-04, forward_time=0.145, loss_ctc=63.864, loss_att=48.114, acc=0.703, loss=52.839, backward_time=1.028, grad_norm=120.583, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.255e-05, train_time=2.711
+[gpub002:0/64] 2023-07-14 02:34:00,714 (trainer:732) INFO: 47epoch:train:2701-2800batch: iter_time=1.460e-04, forward_time=0.146, loss_ctc=68.746, loss_att=49.931, acc=0.705, loss=55.575, backward_time=1.027, grad_norm=150.817, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.255e-05, train_time=2.711
+[gpub002:0/64] 2023-07-14 02:36:18,845 (trainer:732) INFO: 47epoch:train:2801-2900batch: iter_time=1.734e-04, forward_time=0.168, loss_ctc=70.481, loss_att=52.577, acc=0.699, loss=57.948, backward_time=1.029, grad_norm=134.419, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.184, optim0_lr0=5.254e-05, train_time=2.762
+[gpub002:0/64] 2023-07-14 02:38:42,459 (trainer:732) INFO: 47epoch:train:2901-3000batch: iter_time=1.060e-04, forward_time=0.144, loss_ctc=64.475, loss_att=47.169, acc=0.714, loss=52.361, backward_time=1.057, grad_norm=124.697, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.253e-05, train_time=2.872
+[gpub002:0/64] 2023-07-14 02:41:05,117 (trainer:732) INFO: 47epoch:train:3001-3100batch: iter_time=3.092e-04, forward_time=0.157, loss_ctc=72.537, loss_att=51.195, acc=0.726, loss=57.597, backward_time=1.039, grad_norm=137.411, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.253e-05, train_time=2.853
+[gpub002:0/64] 2023-07-14 02:43:46,956 (trainer:732) INFO: 47epoch:train:3101-3200batch: iter_time=0.002, forward_time=0.194, loss_ctc=64.519, loss_att=46.475, acc=0.719, loss=51.888, backward_time=1.090, grad_norm=114.179, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.200, optim0_lr0=5.252e-05, train_time=3.236
+[gpub002:0/64] 2023-07-14 02:44:01,707 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-14 02:46:11,080 (trainer:732) INFO: 47epoch:train:3201-3300batch: iter_time=1.368e-04, forward_time=0.146, loss_ctc=75.702, loss_att=56.674, acc=0.706, loss=62.383, backward_time=1.046, grad_norm=152.557, clip=100.000, loss_scale=3.510e+32, optim_step_time=0.183, optim0_lr0=5.252e-05, train_time=2.883
+[gpub002:0/64] 2023-07-14 02:47:14,725 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub002:0/64] 2023-07-14 02:47:32,956 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 02:47:36,422 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabc619be50>)
+[gpub002:0/64] 2023-07-14 02:47:36,422 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub002:0/64] 2023-07-14 02:47:36,472 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 02:54:55,647 (trainer:732) INFO: 47epoch:train:3301-3400batch: iter_time=3.444, forward_time=0.146, loss_ctc=70.929, loss_att=56.270, acc=0.703, loss=60.668, backward_time=1.052, grad_norm=114.309, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.251e-05, train_time=10.491
+[gpub002:0/64] 2023-07-14 02:57:12,121 (trainer:732) INFO: 47epoch:train:3401-3500batch: iter_time=1.342e-04, forward_time=0.145, loss_ctc=63.963, loss_att=47.556, acc=0.723, loss=52.478, backward_time=1.031, grad_norm=101.174, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.251e-05, train_time=2.729
+[gpub002:0/64] 2023-07-14 02:59:28,257 (trainer:732) INFO: 47epoch:train:3501-3600batch: iter_time=1.333e-04, forward_time=0.147, loss_ctc=66.806, loss_att=50.043, acc=0.710, loss=55.072, backward_time=1.029, grad_norm=138.787, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.250e-05, train_time=2.722
+[gpub002:0/64] 2023-07-14 03:01:48,809 (trainer:732) INFO: 47epoch:train:3601-3700batch: iter_time=1.177e-04, forward_time=0.146, loss_ctc=71.928, loss_att=54.464, acc=0.712, loss=59.703, backward_time=1.036, grad_norm=200.177, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.249e-05, train_time=2.811
+[gpub002:0/64] 2023-07-14 03:04:07,361 (trainer:732) INFO: 47epoch:train:3701-3800batch: iter_time=1.198e-04, forward_time=0.147, loss_ctc=67.152, loss_att=46.952, acc=0.720, loss=53.012, backward_time=1.030, grad_norm=148.586, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.249e-05, train_time=2.771
+[gpub002:0/64] 2023-07-14 03:06:28,698 (trainer:732) INFO: 47epoch:train:3801-3900batch: iter_time=1.379e-04, forward_time=0.145, loss_ctc=65.566, loss_att=47.542, acc=0.728, loss=52.949, backward_time=1.052, grad_norm=109.596, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.248e-05, train_time=2.827
+[gpub002:0/64] 2023-07-14 03:08:45,304 (trainer:732) INFO: 47epoch:train:3901-4000batch: iter_time=1.467e-04, forward_time=0.145, loss_ctc=70.262, loss_att=51.105, acc=0.728, loss=56.852, backward_time=1.027, grad_norm=130.886, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.248e-05, train_time=2.732
+[gpub002:0/64] 2023-07-14 03:11:01,331 (trainer:732) INFO: 47epoch:train:4001-4100batch: iter_time=1.377e-04, forward_time=0.146, loss_ctc=68.748, loss_att=49.916, acc=0.725, loss=55.566, backward_time=1.029, grad_norm=137.121, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.247e-05, train_time=2.720
+[gpub002:0/64] 2023-07-14 03:12:38,470 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub002:0/64] 2023-07-14 03:12:56,794 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 03:13:00,241 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad8c897520>)
+[gpub002:0/64] 2023-07-14 03:13:00,241 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub002:0/64] 2023-07-14 03:13:00,247 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 03:19:05,776 (trainer:732) INFO: 47epoch:train:4101-4200batch: iter_time=1.352, forward_time=0.208, loss_ctc=74.165, loss_att=58.149, acc=0.700, loss=62.954, backward_time=1.172, grad_norm=121.785, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.187, optim0_lr0=5.247e-05, train_time=9.688
+[gpub002:0/64] 2023-07-14 03:21:21,928 (trainer:732) INFO: 47epoch:train:4201-4300batch: iter_time=1.398e-04, forward_time=0.145, loss_ctc=65.068, loss_att=49.135, acc=0.702, loss=53.915, backward_time=1.028, grad_norm=131.373, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.246e-05, train_time=2.723
+[gpub002:0/64] 2023-07-14 03:23:38,012 (trainer:732) INFO: 47epoch:train:4301-4400batch: iter_time=1.409e-04, forward_time=0.144, loss_ctc=67.892, loss_att=51.883, acc=0.698, loss=56.686, backward_time=1.029, grad_norm=124.443, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.245e-05, train_time=2.721
+[gpub002:0/64] 2023-07-14 03:25:53,885 (trainer:732) INFO: 47epoch:train:4401-4500batch: iter_time=1.313e-04, forward_time=0.144, loss_ctc=68.165, loss_att=48.760, acc=0.707, loss=54.582, backward_time=1.028, grad_norm=148.361, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.245e-05, train_time=2.717
+[gpub002:0/64] 2023-07-14 03:28:09,352 (trainer:732) INFO: 47epoch:train:4501-4600batch: iter_time=1.304e-04, forward_time=0.144, loss_ctc=62.490, loss_att=47.304, acc=0.713, loss=51.860, backward_time=1.026, grad_norm=114.356, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.244e-05, train_time=2.709
+[gpub002:0/64] 2023-07-14 03:30:25,107 (trainer:732) INFO: 47epoch:train:4601-4700batch: iter_time=1.325e-04, forward_time=0.144, loss_ctc=73.055, loss_att=52.955, acc=0.722, loss=58.985, backward_time=1.027, grad_norm=135.483, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.244e-05, train_time=2.715
+[gpub002:0/64] 2023-07-14 03:32:40,456 (trainer:732) INFO: 47epoch:train:4701-4800batch: iter_time=1.270e-04, forward_time=0.143, loss_ctc=64.160, loss_att=45.152, acc=0.720, loss=50.854, backward_time=1.025, grad_norm=152.783, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.243e-05, train_time=2.707
+[gpub002:0/64] 2023-07-14 03:34:55,883 (trainer:732) INFO: 47epoch:train:4801-4900batch: iter_time=1.281e-04, forward_time=0.144, loss_ctc=75.470, loss_att=57.125, acc=0.710, loss=62.628, backward_time=1.026, grad_norm=128.779, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.242e-05, train_time=2.708
+[gpub002:0/64] 2023-07-14 03:37:11,333 (trainer:732) INFO: 47epoch:train:4901-5000batch: iter_time=1.184e-04, forward_time=0.145, loss_ctc=68.637, loss_att=53.134, acc=0.699, loss=57.785, backward_time=1.027, grad_norm=135.549, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.242e-05, train_time=2.709
+[gpub002:0/64] 2023-07-14 03:37:13,747 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub002:0/64] 2023-07-14 03:37:32,350 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 03:37:35,790 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabc34c74f0>)
+[gpub002:0/64] 2023-07-14 03:37:35,790 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub002:0/64] 2023-07-14 03:37:35,797 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 03:45:17,483 (trainer:732) INFO: 47epoch:train:5001-5100batch: iter_time=1.349, forward_time=0.238, loss_ctc=74.372, loss_att=58.913, acc=0.696, loss=63.550, backward_time=1.050, grad_norm=124.226, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.187, optim0_lr0=5.241e-05, train_time=9.723
+[gpub002:0/64] 2023-07-14 03:47:33,846 (trainer:732) INFO: 47epoch:train:5101-5200batch: iter_time=1.250e-04, forward_time=0.145, loss_ctc=63.535, loss_att=47.253, acc=0.709, loss=52.138, backward_time=1.028, grad_norm=146.127, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.241e-05, train_time=2.727
+[gpub002:0/64] 2023-07-14 03:49:56,309 (trainer:732) INFO: 47epoch:train:5201-5300batch: iter_time=1.404e-04, forward_time=0.145, loss_ctc=67.170, loss_att=48.352, acc=0.708, loss=53.997, backward_time=1.032, grad_norm=104.227, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.240e-05, train_time=2.849
+[gpub002:0/64] 2023-07-14 03:52:23,539 (trainer:732) INFO: 47epoch:train:5301-5400batch: iter_time=1.378e-04, forward_time=0.147, loss_ctc=70.328, loss_att=52.677, acc=0.704, loss=57.973, backward_time=1.041, grad_norm=125.767, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.240e-05, train_time=2.944
+[gpub002:0/64] 2023-07-14 03:54:39,620 (trainer:732) INFO: 47epoch:train:5401-5500batch: iter_time=1.212e-04, forward_time=0.145, loss_ctc=63.753, loss_att=46.245, acc=0.719, loss=51.498, backward_time=1.030, grad_norm=101.661, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.239e-05, train_time=2.721
+[gpub002:0/64] 2023-07-14 03:56:55,460 (trainer:732) INFO: 47epoch:train:5501-5600batch: iter_time=1.463e-04, forward_time=0.145, loss_ctc=70.240, loss_att=51.409, acc=0.725, loss=57.058, backward_time=1.029, grad_norm=153.144, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.238e-05, train_time=2.717
+[gpub002:0/64] 2023-07-14 03:59:20,845 (trainer:732) INFO: 47epoch:train:5601-5700batch: iter_time=7.024e-04, forward_time=0.186, loss_ctc=63.760, loss_att=44.689, acc=0.728, loss=50.410, backward_time=1.053, grad_norm=104.609, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.238e-05, train_time=2.907
+[gpub002:0/64] 2023-07-14 04:01:56,667 (trainer:732) INFO: 47epoch:train:5701-5800batch: iter_time=1.300e-04, forward_time=0.145, loss_ctc=75.062, loss_att=56.213, acc=0.712, loss=61.868, backward_time=1.096, grad_norm=113.891, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.237e-05, train_time=3.117
+[gpub002:0/64] 2023-07-14 04:03:00,524 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub002:0/64] 2023-07-14 04:03:18,672 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 04:03:22,119 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad41037640>)
+[gpub002:0/64] 2023-07-14 04:03:22,120 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub002:0/64] 2023-07-14 04:03:22,126 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 04:08:19,987 (trainer:732) INFO: 47epoch:train:5801-5900batch: iter_time=2.282, forward_time=0.213, loss_ctc=75.092, loss_att=60.659, acc=0.705, loss=64.989, backward_time=1.059, grad_norm=119.456, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.237e-05, train_time=7.666
+[gpub002:0/64] 2023-07-14 04:10:36,391 (trainer:732) INFO: 47epoch:train:5901-6000batch: iter_time=1.312e-04, forward_time=0.146, loss_ctc=66.678, loss_att=50.374, acc=0.726, loss=55.266, backward_time=1.030, grad_norm=136.916, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.236e-05, train_time=2.728
+[gpub002:0/64] 2023-07-14 04:12:52,162 (trainer:732) INFO: 47epoch:train:6001-6100batch: iter_time=1.254e-04, forward_time=0.145, loss_ctc=65.131, loss_att=47.955, acc=0.721, loss=53.108, backward_time=1.026, grad_norm=129.089, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.236e-05, train_time=2.715
+[gpub002:0/64] 2023-07-14 04:15:07,834 (trainer:732) INFO: 47epoch:train:6101-6200batch: iter_time=1.218e-04, forward_time=0.145, loss_ctc=67.860, loss_att=50.576, acc=0.714, loss=55.761, backward_time=1.026, grad_norm=109.883, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.235e-05, train_time=2.713
+[gpub002:0/64] 2023-07-14 04:17:23,387 (trainer:732) INFO: 47epoch:train:6201-6300batch: iter_time=1.233e-04, forward_time=0.144, loss_ctc=61.925, loss_att=47.562, acc=0.718, loss=51.871, backward_time=1.027, grad_norm=111.806, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.234e-05, train_time=2.711
+[gpub002:0/64] 2023-07-14 04:19:46,670 (trainer:732) INFO: 47epoch:train:6301-6400batch: iter_time=0.005, forward_time=0.187, loss_ctc=70.970, loss_att=49.802, acc=0.728, loss=56.152, backward_time=1.043, grad_norm=134.690, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.188, optim0_lr0=5.234e-05, train_time=2.865
+[gpub002:0/64] 2023-07-14 04:22:02,533 (trainer:732) INFO: 47epoch:train:6401-6500batch: iter_time=1.219e-04, forward_time=0.145, loss_ctc=63.773, loss_att=48.033, acc=0.726, loss=52.755, backward_time=1.025, grad_norm=110.767, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.233e-05, train_time=2.718
+[gpub002:0/64] 2023-07-14 04:24:18,474 (trainer:732) INFO: 47epoch:train:6501-6600batch: iter_time=1.290e-04, forward_time=0.145, loss_ctc=76.393, loss_att=57.916, acc=0.722, loss=63.459, backward_time=1.028, grad_norm=126.533, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.233e-05, train_time=2.719
+[gpub002:0/64] 2023-07-14 04:25:50,082 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub002:0/64] 2023-07-14 04:26:08,645 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 04:26:12,059 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb1264634f0>)
+[gpub002:0/64] 2023-07-14 04:26:12,060 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub002:0/64] 2023-07-14 04:26:12,066 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 04:30:38,669 (trainer:732) INFO: 47epoch:train:6601-6700batch: iter_time=1.313, forward_time=0.145, loss_ctc=74.407, loss_att=56.874, acc=0.714, loss=62.134, backward_time=1.044, grad_norm=148.576, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.232e-05, train_time=7.604
+[gpub002:0/64] 2023-07-14 04:32:55,250 (trainer:732) INFO: 47epoch:train:6701-6800batch: iter_time=1.179e-04, forward_time=0.145, loss_ctc=66.947, loss_att=51.659, acc=0.708, loss=56.246, backward_time=1.029, grad_norm=104.621, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.232e-05, train_time=2.731
+[gpub002:0/64] 2023-07-14 04:35:11,755 (trainer:732) INFO: 47epoch:train:6801-6900batch: iter_time=1.132e-04, forward_time=0.145, loss_ctc=64.313, loss_att=47.002, acc=0.716, loss=52.196, backward_time=1.028, grad_norm=98.831, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.231e-05, train_time=2.730
+[gpub002:0/64] 2023-07-14 04:37:28,932 (trainer:732) INFO: 47epoch:train:6901-7000batch: iter_time=3.968e-04, forward_time=0.152, loss_ctc=66.847, loss_att=51.004, acc=0.707, loss=55.757, backward_time=1.032, grad_norm=99.645, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.230e-05, train_time=2.743
+[gpub002:0/64] 2023-07-14 04:39:53,468 (trainer:732) INFO: 47epoch:train:7001-7100batch: iter_time=1.192e-04, forward_time=0.143, loss_ctc=67.687, loss_att=49.843, acc=0.716, loss=55.196, backward_time=1.040, grad_norm=119.497, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.230e-05, train_time=2.891
+[gpub002:0/64] 2023-07-14 04:42:25,260 (trainer:732) INFO: 47epoch:train:7101-7200batch: iter_time=1.263e-04, forward_time=0.146, loss_ctc=68.017, loss_att=47.851, acc=0.713, loss=53.900, backward_time=1.067, grad_norm=115.656, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.229e-05, train_time=3.036
+[gpub002:0/64] 2023-07-14 04:44:41,100 (trainer:732) INFO: 47epoch:train:7201-7300batch: iter_time=1.187e-04, forward_time=0.146, loss_ctc=66.541, loss_att=47.903, acc=0.728, loss=53.495, backward_time=1.029, grad_norm=118.808, clip=100.000, loss_scale=6.166e+32, optim_step_time=0.183, optim0_lr0=5.229e-05, train_time=2.717
+[gpub002:0/64] 2023-07-14 04:46:57,499 (trainer:732) INFO: 47epoch:train:7301-7400batch: iter_time=1.337e-04, forward_time=0.146, loss_ctc=71.183, loss_att=51.940, acc=0.725, loss=57.713, backward_time=1.029, grad_norm=115.971, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.228e-05, train_time=2.728
+[gpub002:0/64] 2023-07-14 04:49:48,888 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub002:0/64] 2023-07-14 04:50:07,256 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 04:50:10,689 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabbcb5fd90>)
+[gpub002:0/64] 2023-07-14 04:50:10,689 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub002:0/64] 2023-07-14 04:50:10,710 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 04:54:02,831 (trainer:732) INFO: 47epoch:train:7401-7500batch: iter_time=2.585, forward_time=0.260, loss_ctc=71.936, loss_att=53.052, acc=0.710, loss=58.717, backward_time=1.061, grad_norm=137.288, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.188, optim0_lr0=5.228e-05, train_time=8.505
+[gpub002:0/64] 2023-07-14 04:56:20,415 (trainer:732) INFO: 47epoch:train:7501-7600batch: iter_time=1.183e-04, forward_time=0.146, loss_ctc=68.399, loss_att=53.056, acc=0.694, loss=57.658, backward_time=1.034, grad_norm=124.323, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.227e-05, train_time=2.753
+[gpub002:0/64] 2023-07-14 04:58:36,815 (trainer:732) INFO: 47epoch:train:7601-7700batch: iter_time=1.276e-04, forward_time=0.144, loss_ctc=67.099, loss_att=48.016, acc=0.713, loss=53.741, backward_time=1.027, grad_norm=111.682, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.226e-05, train_time=2.728
+[gpub002:0/64] 2023-07-14 05:00:53,216 (trainer:732) INFO: 47epoch:train:7701-7800batch: iter_time=1.241e-04, forward_time=0.144, loss_ctc=67.867, loss_att=52.326, acc=0.704, loss=56.988, backward_time=1.029, grad_norm=134.364, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.226e-05, train_time=2.728
+[gpub002:0/64] 2023-07-14 05:01:20,178 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-14 05:03:08,612 (trainer:732) INFO: 47epoch:train:7801-7900batch: iter_time=1.185e-04, forward_time=0.144, loss_ctc=63.118, loss_att=45.662, acc=0.720, loss=50.899, backward_time=1.027, grad_norm=101.710, clip=100.000, loss_scale=3.841e+32, optim_step_time=0.182, optim0_lr0=5.225e-05, train_time=2.708
+[gpub002:0/64] 2023-07-14 05:05:24,239 (trainer:732) INFO: 47epoch:train:7901-8000batch: iter_time=1.352e-04, forward_time=0.145, loss_ctc=70.809, loss_att=49.978, acc=0.711, loss=56.227, backward_time=1.028, grad_norm=119.098, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.225e-05, train_time=2.712
+[gpub002:0/64] 2023-07-14 05:07:39,760 (trainer:732) INFO: 47epoch:train:8001-8100batch: iter_time=1.291e-04, forward_time=0.145, loss_ctc=65.678, loss_att=48.315, acc=0.720, loss=53.524, backward_time=1.028, grad_norm=121.079, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.224e-05, train_time=2.710
+[gpub002:0/64] 2023-07-14 05:09:55,413 (trainer:732) INFO: 47epoch:train:8101-8200batch: iter_time=1.275e-04, forward_time=0.146, loss_ctc=72.102, loss_att=52.782, acc=0.728, loss=58.578, backward_time=1.027, grad_norm=124.500, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.224e-05, train_time=2.713
+[gpub002:0/64] 2023-07-14 05:12:11,383 (trainer:732) INFO: 47epoch:train:8201-8300batch: iter_time=1.328e-04, forward_time=0.146, loss_ctc=70.788, loss_att=52.774, acc=0.710, loss=58.178, backward_time=1.031, grad_norm=115.453, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.223e-05, train_time=2.719
+[gpub002:0/64] 2023-07-14 05:12:57,530 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub002:0/64] 2023-07-14 05:13:15,615 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 05:13:19,325 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbb5e2fba90>)
+[gpub002:0/64] 2023-07-14 05:13:19,325 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub002:0/64] 2023-07-14 05:13:19,331 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 05:19:34,885 (trainer:732) INFO: 47epoch:train:8301-8400batch: iter_time=1.320, forward_time=0.157, loss_ctc=69.342, loss_att=56.350, acc=0.696, loss=60.247, backward_time=1.040, grad_norm=126.658, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.222e-05, train_time=8.870
+[gpub002:0/64] 2023-07-14 05:21:52,367 (trainer:732) INFO: 47epoch:train:8401-8500batch: iter_time=1.110e-04, forward_time=0.145, loss_ctc=63.969, loss_att=47.931, acc=0.724, loss=52.742, backward_time=1.031, grad_norm=107.000, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.222e-05, train_time=2.749
+[gpub002:0/64] 2023-07-14 05:24:08,298 (trainer:732) INFO: 47epoch:train:8501-8600batch: iter_time=1.100e-04, forward_time=0.144, loss_ctc=65.267, loss_att=48.804, acc=0.713, loss=53.743, backward_time=1.027, grad_norm=121.119, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.221e-05, train_time=2.718
+[gpub002:0/64] 2023-07-14 05:26:24,844 (trainer:732) INFO: 47epoch:train:8601-8700batch: iter_time=1.158e-04, forward_time=0.144, loss_ctc=72.559, loss_att=55.089, acc=0.712, loss=60.330, backward_time=1.029, grad_norm=122.276, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.221e-05, train_time=2.731
+[gpub002:0/64] 2023-07-14 05:28:40,559 (trainer:732) INFO: 47epoch:train:8701-8800batch: iter_time=1.175e-04, forward_time=0.145, loss_ctc=65.539, loss_att=47.000, acc=0.719, loss=52.562, backward_time=1.027, grad_norm=122.552, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.220e-05, train_time=2.714
+[gpub002:0/64] 2023-07-14 05:30:56,463 (trainer:732) INFO: 47epoch:train:8801-8900batch: iter_time=1.200e-04, forward_time=0.145, loss_ctc=66.154, loss_att=47.927, acc=0.733, loss=53.395, backward_time=1.029, grad_norm=129.162, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.220e-05, train_time=2.718
+[gpub002:0/64] 2023-07-14 05:33:12,008 (trainer:732) INFO: 47epoch:train:8901-9000batch: iter_time=1.259e-04, forward_time=0.145, loss_ctc=71.851, loss_att=52.837, acc=0.726, loss=58.541, backward_time=1.027, grad_norm=117.162, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.219e-05, train_time=2.711
+[gpub002:0/64] 2023-07-14 05:35:27,593 (trainer:732) INFO: 47epoch:train:9001-9100batch: iter_time=1.289e-04, forward_time=0.146, loss_ctc=69.806, loss_att=51.495, acc=0.721, loss=56.989, backward_time=1.027, grad_norm=118.907, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.218e-05, train_time=2.711
+[gpub002:0/64] 2023-07-14 05:37:02,534 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub002:0/64] 2023-07-14 05:37:20,733 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 05:37:24,243 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabbcb51060>)
+[gpub002:0/64] 2023-07-14 05:37:24,243 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub002:0/64] 2023-07-14 05:37:24,250 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 05:41:19,308 (trainer:732) INFO: 47epoch:train:9101-9200batch: iter_time=1.370, forward_time=0.154, loss_ctc=73.500, loss_att=59.056, acc=0.706, loss=63.389, backward_time=1.043, grad_norm=129.829, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.218e-05, train_time=7.034
+[gpub002:0/64] 2023-07-14 05:43:35,917 (trainer:732) INFO: 47epoch:train:9201-9300batch: iter_time=1.274e-04, forward_time=0.145, loss_ctc=66.338, loss_att=51.966, acc=0.717, loss=56.278, backward_time=1.031, grad_norm=120.789, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.217e-05, train_time=2.732
+[gpub002:0/64] 2023-07-14 05:45:52,711 (trainer:732) INFO: 47epoch:train:9301-9400batch: iter_time=1.314e-04, forward_time=0.146, loss_ctc=64.912, loss_att=46.474, acc=0.726, loss=52.005, backward_time=1.029, grad_norm=133.594, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.217e-05, train_time=2.736
+[gpub002:0/64] 2023-07-14 05:48:13,955 (trainer:732) INFO: 47epoch:train:9401-9500batch: iter_time=1.413e-04, forward_time=0.191, loss_ctc=67.262, loss_att=50.534, acc=0.717, loss=55.552, backward_time=1.033, grad_norm=131.021, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=5.216e-05, train_time=2.825
+[gpub002:0/64] 2023-07-14 05:50:43,138 (trainer:732) INFO: 47epoch:train:9501-9600batch: iter_time=0.001, forward_time=0.230, loss_ctc=68.713, loss_att=50.464, acc=0.725, loss=55.939, backward_time=1.041, grad_norm=113.311, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.200, optim0_lr0=5.216e-05, train_time=2.981
+[gpub002:0/64] 2023-07-14 05:53:04,340 (trainer:732) INFO: 47epoch:train:9601-9700batch: iter_time=1.376e-04, forward_time=0.146, loss_ctc=67.075, loss_att=47.370, acc=0.719, loss=53.282, backward_time=1.032, grad_norm=122.282, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.215e-05, train_time=2.826
+[gpub002:0/64] 2023-07-14 05:55:20,063 (trainer:732) INFO: 47epoch:train:9701-9800batch: iter_time=1.301e-04, forward_time=0.145, loss_ctc=67.510, loss_att=48.979, acc=0.734, loss=54.538, backward_time=1.028, grad_norm=124.005, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.214e-05, train_time=2.714
+[gpub002:0/64] 2023-07-14 05:57:35,588 (trainer:732) INFO: 47epoch:train:9801-9900batch: iter_time=1.115e-04, forward_time=0.143, loss_ctc=69.885, loss_att=51.819, acc=0.728, loss=57.239, backward_time=1.028, grad_norm=112.202, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.214e-05, train_time=2.710
+[gpub002:0/64] 2023-07-14 05:59:51,428 (trainer:732) INFO: 47epoch:train:9901-10000batch: iter_time=9.828e-05, forward_time=0.144, loss_ctc=71.596, loss_att=53.648, acc=0.720, loss=59.033, backward_time=1.029, grad_norm=136.522, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.213e-05, train_time=2.717
+[gpub002:0/64] 2023-07-14 06:14:05,194 (trainer:338) INFO: 47epoch results: [train] iter_time=0.216, forward_time=0.153, loss_ctc=68.897, loss_att=51.327, acc=0.714, loss=56.598, backward_time=1.037, grad_norm=126.595, clip=100.000, loss_scale=3.991e+32, optim_step_time=0.183, optim0_lr0=5.242e-05, train_time=3.463, time=4 hours, 48 minutes and 59.64 seconds, total_count=440000, gpu_max_cached_mem_GB=37.574, [valid] loss_ctc=43.333, cer_ctc=0.254, loss_att=36.942, acc=0.674, cer=0.429, wer=0.998, loss=38.859, time=7 minutes and 55.45 seconds, total_count=45034, gpu_max_cached_mem_GB=37.574, [att_plot] time=5 minutes and 55.55 seconds, total_count=0, gpu_max_cached_mem_GB=37.574
+[gpub002:0/64] 2023-07-14 06:14:21,862 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub002:0/64] 2023-07-14 06:14:21,871 (trainer:272) INFO: 48/50epoch started. Estimated time to finish: 15 hours, 20 minutes and 28.01 seconds
+[gpub002:0/64] 2023-07-14 06:14:22,214 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub002:0/64] 2023-07-14 06:14:41,401 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 06:14:44,825 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd7c14f970>)
+[gpub002:0/64] 2023-07-14 06:14:44,825 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub002:0/64] 2023-07-14 06:14:44,832 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 06:25:13,027 (trainer:732) INFO: 48epoch:train:1-100batch: iter_time=5.066, forward_time=0.185, loss_ctc=72.566, loss_att=50.721, acc=0.708, loss=57.275, backward_time=1.043, grad_norm=138.002, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.198, optim0_lr0=5.213e-05, train_time=13.017
+[gpub002:0/64] 2023-07-14 06:27:29,352 (trainer:732) INFO: 48epoch:train:101-200batch: iter_time=1.335e-04, forward_time=0.145, loss_ctc=77.852, loss_att=63.076, acc=0.696, loss=67.509, backward_time=1.031, grad_norm=128.219, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.212e-05, train_time=2.726
+[gpub002:0/64] 2023-07-14 06:29:52,948 (trainer:732) INFO: 48epoch:train:201-300batch: iter_time=1.227e-04, forward_time=0.144, loss_ctc=74.900, loss_att=50.312, acc=0.726, loss=57.688, backward_time=1.027, grad_norm=135.033, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.212e-05, train_time=2.872
+[gpub002:0/64] 2023-07-14 06:32:08,576 (trainer:732) INFO: 48epoch:train:301-400batch: iter_time=1.240e-04, forward_time=0.144, loss_ctc=76.914, loss_att=54.854, acc=0.698, loss=61.472, backward_time=1.026, grad_norm=140.589, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.211e-05, train_time=2.712
+[gpub002:0/64] 2023-07-14 06:34:24,747 (trainer:732) INFO: 48epoch:train:401-500batch: iter_time=1.252e-04, forward_time=0.143, loss_ctc=64.489, loss_att=50.286, acc=0.704, loss=54.546, backward_time=1.025, grad_norm=120.535, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.210e-05, train_time=2.723
+[gpub002:0/64] 2023-07-14 06:36:44,732 (trainer:732) INFO: 48epoch:train:501-600batch: iter_time=1.248e-04, forward_time=0.145, loss_ctc=72.098, loss_att=53.893, acc=0.695, loss=59.355, backward_time=1.026, grad_norm=121.753, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.210e-05, train_time=2.799
+[gpub002:0/64] 2023-07-14 06:39:10,733 (trainer:732) INFO: 48epoch:train:601-700batch: iter_time=0.004, forward_time=0.187, loss_ctc=68.179, loss_att=50.186, acc=0.717, loss=55.584, backward_time=1.038, grad_norm=119.134, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.209e-05, train_time=2.919
+[gpub002:0/64] 2023-07-14 06:41:44,082 (trainer:732) INFO: 48epoch:train:701-800batch: iter_time=1.232e-04, forward_time=0.239, loss_ctc=72.896, loss_att=50.389, acc=0.701, loss=57.141, backward_time=1.045, grad_norm=117.642, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.188, optim0_lr0=5.209e-05, train_time=3.068
+[gpub002:0/64] 2023-07-14 06:42:40,091 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub002:0/64] 2023-07-14 06:42:57,858 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 06:43:01,178 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb73caf7790>)
+[gpub002:0/64] 2023-07-14 06:43:01,178 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub002:0/64] 2023-07-14 06:43:01,184 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 06:47:36,689 (trainer:732) INFO: 48epoch:train:801-900batch: iter_time=1.777, forward_time=0.193, loss_ctc=75.968, loss_att=53.040, acc=0.714, loss=59.919, backward_time=1.043, grad_norm=123.686, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=5.208e-05, train_time=7.051
+[gpub002:0/64] 2023-07-14 06:50:07,555 (trainer:732) INFO: 48epoch:train:901-1000batch: iter_time=1.021e-04, forward_time=0.146, loss_ctc=80.428, loss_att=59.359, acc=0.718, loss=65.679, backward_time=1.046, grad_norm=134.626, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.208e-05, train_time=3.018
+[gpub002:0/64] 2023-07-14 06:52:23,798 (trainer:732) INFO: 48epoch:train:1001-1100batch: iter_time=1.193e-04, forward_time=0.144, loss_ctc=74.892, loss_att=50.678, acc=0.724, loss=57.942, backward_time=1.031, grad_norm=128.411, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.207e-05, train_time=2.725
+[gpub002:0/64] 2023-07-14 06:54:40,151 (trainer:732) INFO: 48epoch:train:1101-1200batch: iter_time=1.110e-04, forward_time=0.143, loss_ctc=74.451, loss_att=54.112, acc=0.722, loss=60.214, backward_time=1.030, grad_norm=147.532, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.207e-05, train_time=2.727
+[gpub002:0/64] 2023-07-14 06:56:57,546 (trainer:732) INFO: 48epoch:train:1201-1300batch: iter_time=1.023e-04, forward_time=0.144, loss_ctc=63.785, loss_att=48.558, acc=0.713, loss=53.126, backward_time=1.031, grad_norm=111.642, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.206e-05, train_time=2.748
+[gpub002:0/64] 2023-07-14 06:59:13,500 (trainer:732) INFO: 48epoch:train:1301-1400batch: iter_time=1.072e-04, forward_time=0.144, loss_ctc=72.696, loss_att=53.103, acc=0.718, loss=58.981, backward_time=1.030, grad_norm=130.057, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.205e-05, train_time=2.719
+[gpub002:0/64] 2023-07-14 07:01:29,121 (trainer:732) INFO: 48epoch:train:1401-1500batch: iter_time=1.109e-04, forward_time=0.143, loss_ctc=68.627, loss_att=48.296, acc=0.723, loss=54.395, backward_time=1.027, grad_norm=116.485, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.205e-05, train_time=2.712
+[gpub002:0/64] 2023-07-14 07:03:53,791 (trainer:732) INFO: 48epoch:train:1501-1600batch: iter_time=8.219e-04, forward_time=0.204, loss_ctc=67.797, loss_att=49.570, acc=0.714, loss=55.038, backward_time=1.040, grad_norm=129.227, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.196, optim0_lr0=5.204e-05, train_time=2.893
+[gpub002:0/64] 2023-07-14 07:05:50,238 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub002:0/64] 2023-07-14 07:06:08,419 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 07:06:11,812 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad40f67f40>)
+[gpub002:0/64] 2023-07-14 07:06:11,812 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub002:0/64] 2023-07-14 07:06:11,818 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 07:11:36,453 (trainer:732) INFO: 48epoch:train:1601-1700batch: iter_time=3.165, forward_time=0.201, loss_ctc=77.679, loss_att=56.373, acc=0.700, loss=62.765, backward_time=1.041, grad_norm=134.671, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.204e-05, train_time=9.252
+[gpub002:0/64] 2023-07-14 07:13:53,755 (trainer:732) INFO: 48epoch:train:1701-1800batch: iter_time=1.167e-04, forward_time=0.147, loss_ctc=67.651, loss_att=49.981, acc=0.716, loss=55.282, backward_time=1.033, grad_norm=139.985, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.203e-05, train_time=2.747
+[gpub002:0/64] 2023-07-14 07:16:10,215 (trainer:732) INFO: 48epoch:train:1801-1900batch: iter_time=1.174e-04, forward_time=0.146, loss_ctc=81.909, loss_att=60.376, acc=0.710, loss=66.836, backward_time=1.031, grad_norm=150.019, clip=100.000, loss_scale=5.841e+32, optim_step_time=0.182, optim0_lr0=5.203e-05, train_time=2.729
+[gpub002:0/64] 2023-07-14 07:18:26,045 (trainer:732) INFO: 48epoch:train:1901-2000batch: iter_time=9.725e-05, forward_time=0.145, loss_ctc=72.694, loss_att=50.879, acc=0.730, loss=57.424, backward_time=1.029, grad_norm=100.349, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.202e-05, train_time=2.716
+[gpub002:0/64] 2023-07-14 07:20:41,800 (trainer:732) INFO: 48epoch:train:2001-2100batch: iter_time=9.471e-05, forward_time=0.144, loss_ctc=75.595, loss_att=54.872, acc=0.713, loss=61.089, backward_time=1.028, grad_norm=135.140, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.201e-05, train_time=2.715
+[gpub002:0/64] 2023-07-14 07:22:57,479 (trainer:732) INFO: 48epoch:train:2101-2200batch: iter_time=8.940e-05, forward_time=0.144, loss_ctc=63.218, loss_att=46.176, acc=0.721, loss=51.288, backward_time=1.029, grad_norm=166.606, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.201e-05, train_time=2.713
+[gpub002:0/64] 2023-07-14 07:25:13,488 (trainer:732) INFO: 48epoch:train:2201-2300batch: iter_time=9.836e-05, forward_time=0.146, loss_ctc=70.710, loss_att=53.282, acc=0.717, loss=58.510, backward_time=1.031, grad_norm=133.467, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.200e-05, train_time=2.720
+[gpub002:0/64] 2023-07-14 07:27:29,318 (trainer:732) INFO: 48epoch:train:2301-2400batch: iter_time=9.628e-05, forward_time=0.144, loss_ctc=69.445, loss_att=49.286, acc=0.727, loss=55.334, backward_time=1.029, grad_norm=133.909, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.200e-05, train_time=2.716
+[gpub002:0/64] 2023-07-14 07:28:58,595 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-14 07:29:44,718 (trainer:732) INFO: 48epoch:train:2401-2500batch: iter_time=9.391e-05, forward_time=0.144, loss_ctc=69.277, loss_att=52.038, acc=0.706, loss=57.209, backward_time=1.028, grad_norm=129.891, clip=100.000, loss_scale=5.364e+32, optim_step_time=0.182, optim0_lr0=5.199e-05, train_time=2.708
+[gpub002:0/64] 2023-07-14 07:30:01,213 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub002:0/64] 2023-07-14 07:30:19,332 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 07:30:22,768 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad40e64e20>)
+[gpub002:0/64] 2023-07-14 07:30:22,768 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub002:0/64] 2023-07-14 07:30:22,775 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 07:37:23,008 (trainer:732) INFO: 48epoch:train:2501-2600batch: iter_time=3.157, forward_time=0.175, loss_ctc=64.931, loss_att=49.988, acc=0.701, loss=54.471, backward_time=1.040, grad_norm=125.113, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.199e-05, train_time=9.166
+[gpub002:0/64] 2023-07-14 07:39:39,101 (trainer:732) INFO: 48epoch:train:2601-2700batch: iter_time=9.689e-05, forward_time=0.144, loss_ctc=77.176, loss_att=57.787, acc=0.708, loss=63.604, backward_time=1.027, grad_norm=129.649, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.198e-05, train_time=2.722
+[gpub002:0/64] 2023-07-14 07:41:55,336 (trainer:732) INFO: 48epoch:train:2701-2800batch: iter_time=1.280e-04, forward_time=0.147, loss_ctc=78.705, loss_att=57.307, acc=0.716, loss=63.727, backward_time=1.031, grad_norm=161.522, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.198e-05, train_time=2.724
+[gpub002:0/64] 2023-07-14 07:44:11,382 (trainer:732) INFO: 48epoch:train:2801-2900batch: iter_time=1.355e-04, forward_time=0.146, loss_ctc=71.666, loss_att=51.698, acc=0.722, loss=57.689, backward_time=1.029, grad_norm=143.465, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.197e-05, train_time=2.721
+[gpub002:0/64] 2023-07-14 07:46:27,152 (trainer:732) INFO: 48epoch:train:2901-3000batch: iter_time=1.246e-04, forward_time=0.147, loss_ctc=67.826, loss_att=52.816, acc=0.696, loss=57.319, backward_time=1.028, grad_norm=112.690, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.196e-05, train_time=2.715
+[gpub002:0/64] 2023-07-14 07:48:42,648 (trainer:732) INFO: 48epoch:train:3001-3100batch: iter_time=1.373e-04, forward_time=0.146, loss_ctc=62.490, loss_att=46.664, acc=0.714, loss=51.412, backward_time=1.026, grad_norm=110.321, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.196e-05, train_time=2.710
+[gpub002:0/64] 2023-07-14 07:50:58,216 (trainer:732) INFO: 48epoch:train:3101-3200batch: iter_time=1.433e-04, forward_time=0.147, loss_ctc=71.138, loss_att=53.409, acc=0.703, loss=58.728, backward_time=1.027, grad_norm=124.701, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.195e-05, train_time=2.711
+[gpub002:0/64] 2023-07-14 07:53:13,640 (trainer:732) INFO: 48epoch:train:3201-3300batch: iter_time=1.522e-04, forward_time=0.145, loss_ctc=70.454, loss_att=51.309, acc=0.718, loss=57.052, backward_time=1.025, grad_norm=137.045, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.195e-05, train_time=2.708
+[gpub002:0/64] 2023-07-14 07:54:01,692 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub002:0/64] 2023-07-14 07:54:19,955 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 07:54:23,377 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad414474f0>)
+[gpub002:0/64] 2023-07-14 07:54:23,377 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub002:0/64] 2023-07-14 07:54:23,383 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 07:59:50,081 (trainer:732) INFO: 48epoch:train:3301-3400batch: iter_time=1.291, forward_time=0.146, loss_ctc=67.251, loss_att=52.714, acc=0.689, loss=57.075, backward_time=1.040, grad_norm=162.610, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.194e-05, train_time=7.929
+[gpub002:0/64] 2023-07-14 08:02:06,659 (trainer:732) INFO: 48epoch:train:3401-3500batch: iter_time=1.254e-04, forward_time=0.147, loss_ctc=74.470, loss_att=50.103, acc=0.717, loss=57.413, backward_time=1.030, grad_norm=146.633, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.194e-05, train_time=2.731
+[gpub002:0/64] 2023-07-14 08:04:22,707 (trainer:732) INFO: 48epoch:train:3501-3600batch: iter_time=1.132e-04, forward_time=0.144, loss_ctc=78.766, loss_att=59.983, acc=0.709, loss=65.618, backward_time=1.031, grad_norm=139.024, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.193e-05, train_time=2.721
+[gpub002:0/64] 2023-07-14 08:06:38,451 (trainer:732) INFO: 48epoch:train:3601-3700batch: iter_time=9.308e-05, forward_time=0.144, loss_ctc=69.199, loss_att=49.395, acc=0.729, loss=55.336, backward_time=1.029, grad_norm=127.650, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.192e-05, train_time=2.715
+[gpub002:0/64] 2023-07-14 08:08:54,104 (trainer:732) INFO: 48epoch:train:3701-3800batch: iter_time=1.209e-04, forward_time=0.144, loss_ctc=71.796, loss_att=51.842, acc=0.698, loss=57.828, backward_time=1.028, grad_norm=149.795, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.192e-05, train_time=2.713
+[gpub002:0/64] 2023-07-14 08:09:50,838 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-14 08:11:09,674 (trainer:732) INFO: 48epoch:train:3801-3900batch: iter_time=1.016e-04, forward_time=0.144, loss_ctc=66.375, loss_att=50.166, acc=0.713, loss=55.029, backward_time=1.028, grad_norm=168.422, clip=100.000, loss_scale=2.285e+32, optim_step_time=0.182, optim0_lr0=5.191e-05, train_time=2.711
+[gpub002:0/64] 2023-07-14 08:13:25,076 (trainer:732) INFO: 48epoch:train:3901-4000batch: iter_time=1.125e-04, forward_time=0.143, loss_ctc=72.317, loss_att=55.151, acc=0.699, loss=60.301, backward_time=1.027, grad_norm=131.187, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.191e-05, train_time=2.708
+[gpub002:0/64] 2023-07-14 08:15:40,836 (trainer:732) INFO: 48epoch:train:4001-4100batch: iter_time=1.123e-04, forward_time=0.145, loss_ctc=69.182, loss_att=49.859, acc=0.719, loss=55.656, backward_time=1.029, grad_norm=144.542, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.190e-05, train_time=2.715
+[gpub002:0/64] 2023-07-14 08:17:43,061 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub002:0/64] 2023-07-14 08:18:01,046 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 08:18:04,458 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad93a2fc70>)
+[gpub002:0/64] 2023-07-14 08:18:04,458 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub002:0/64] 2023-07-14 08:18:04,464 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 08:23:10,162 (trainer:732) INFO: 48epoch:train:4101-4200batch: iter_time=2.902, forward_time=0.247, loss_ctc=68.952, loss_att=51.746, acc=0.708, loss=56.908, backward_time=1.052, grad_norm=116.379, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.220, optim0_lr0=5.190e-05, train_time=8.986
+[gpub002:0/64] 2023-07-14 08:25:28,112 (trainer:732) INFO: 48epoch:train:4201-4300batch: iter_time=1.154e-04, forward_time=0.148, loss_ctc=75.515, loss_att=51.753, acc=0.724, loss=58.882, backward_time=1.034, grad_norm=126.570, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.189e-05, train_time=2.759
+[gpub002:0/64] 2023-07-14 08:27:44,220 (trainer:732) INFO: 48epoch:train:4301-4400batch: iter_time=1.206e-04, forward_time=0.146, loss_ctc=74.155, loss_att=55.853, acc=0.722, loss=61.344, backward_time=1.031, grad_norm=127.353, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.189e-05, train_time=2.722
+[gpub002:0/64] 2023-07-14 08:30:00,444 (trainer:732) INFO: 48epoch:train:4401-4500batch: iter_time=1.105e-04, forward_time=0.146, loss_ctc=74.196, loss_att=52.771, acc=0.728, loss=59.198, backward_time=1.032, grad_norm=142.333, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.188e-05, train_time=2.724
+[gpub002:0/64] 2023-07-14 08:32:16,273 (trainer:732) INFO: 48epoch:train:4501-4600batch: iter_time=1.142e-04, forward_time=0.145, loss_ctc=69.251, loss_att=50.837, acc=0.715, loss=56.361, backward_time=1.029, grad_norm=131.558, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.187e-05, train_time=2.716
+[gpub002:0/64] 2023-07-14 08:34:31,949 (trainer:732) INFO: 48epoch:train:4601-4700batch: iter_time=1.235e-04, forward_time=0.146, loss_ctc=58.768, loss_att=43.134, acc=0.731, loss=47.825, backward_time=1.028, grad_norm=109.747, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.187e-05, train_time=2.713
+[gpub002:0/64] 2023-07-14 08:36:47,789 (trainer:732) INFO: 48epoch:train:4701-4800batch: iter_time=1.187e-04, forward_time=0.146, loss_ctc=71.818, loss_att=54.359, acc=0.714, loss=59.597, backward_time=1.029, grad_norm=139.913, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.186e-05, train_time=2.717
+[gpub002:0/64] 2023-07-14 08:39:03,632 (trainer:732) INFO: 48epoch:train:4801-4900batch: iter_time=1.235e-04, forward_time=0.146, loss_ctc=69.204, loss_att=51.952, acc=0.719, loss=57.128, backward_time=1.029, grad_norm=128.452, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.186e-05, train_time=2.717
+[gpub002:0/64] 2023-07-14 08:41:19,530 (trainer:732) INFO: 48epoch:train:4901-5000batch: iter_time=1.175e-04, forward_time=0.146, loss_ctc=76.342, loss_att=54.027, acc=0.704, loss=60.721, backward_time=1.030, grad_norm=125.370, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.185e-05, train_time=2.718
+[gpub002:0/64] 2023-07-14 08:41:22,274 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub002:0/64] 2023-07-14 08:41:40,677 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 08:41:44,120 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad436fba60>)
+[gpub002:0/64] 2023-07-14 08:41:44,120 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub002:0/64] 2023-07-14 08:41:44,127 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 08:47:46,295 (trainer:732) INFO: 48epoch:train:5001-5100batch: iter_time=1.324, forward_time=0.145, loss_ctc=64.448, loss_att=47.570, acc=0.724, loss=52.633, backward_time=1.044, grad_norm=141.410, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.185e-05, train_time=7.735
+[gpub002:0/64] 2023-07-14 08:50:02,881 (trainer:732) INFO: 48epoch:train:5101-5200batch: iter_time=8.791e-05, forward_time=0.143, loss_ctc=76.350, loss_att=54.968, acc=0.718, loss=61.383, backward_time=1.029, grad_norm=120.961, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.184e-05, train_time=2.731
+[gpub002:0/64] 2023-07-14 08:52:20,062 (trainer:732) INFO: 48epoch:train:5201-5300batch: iter_time=8.431e-05, forward_time=0.145, loss_ctc=78.698, loss_att=54.190, acc=0.731, loss=61.542, backward_time=1.034, grad_norm=145.742, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.184e-05, train_time=2.743
+[gpub002:0/64] 2023-07-14 08:54:36,566 (trainer:732) INFO: 48epoch:train:5301-5400batch: iter_time=8.884e-05, forward_time=0.144, loss_ctc=71.014, loss_att=50.305, acc=0.733, loss=56.518, backward_time=1.031, grad_norm=149.754, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.183e-05, train_time=2.730
+[gpub002:0/64] 2023-07-14 08:56:54,010 (trainer:732) INFO: 48epoch:train:5401-5500batch: iter_time=1.200e-04, forward_time=0.145, loss_ctc=66.905, loss_att=50.167, acc=0.717, loss=55.189, backward_time=1.029, grad_norm=107.271, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.182e-05, train_time=2.749
+[gpub002:0/64] 2023-07-14 08:59:13,151 (trainer:732) INFO: 48epoch:train:5501-5600batch: iter_time=1.282e-04, forward_time=0.145, loss_ctc=62.122, loss_att=44.641, acc=0.726, loss=49.885, backward_time=1.028, grad_norm=133.648, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.182e-05, train_time=2.783
+[gpub002:0/64] 2023-07-14 09:01:33,741 (trainer:732) INFO: 48epoch:train:5601-5700batch: iter_time=1.289e-04, forward_time=0.146, loss_ctc=69.735, loss_att=53.019, acc=0.721, loss=58.034, backward_time=1.038, grad_norm=127.138, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.181e-05, train_time=2.812
+[gpub002:0/64] 2023-07-14 09:03:52,868 (trainer:732) INFO: 48epoch:train:5701-5800batch: iter_time=1.326e-04, forward_time=0.146, loss_ctc=69.218, loss_att=50.409, acc=0.725, loss=56.052, backward_time=1.035, grad_norm=115.986, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.181e-05, train_time=2.782
+[gpub002:0/64] 2023-07-14 09:04:41,018 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub002:0/64] 2023-07-14 09:04:59,726 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 09:05:03,191 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad43a33580>)
+[gpub002:0/64] 2023-07-14 09:05:03,191 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub002:0/64] 2023-07-14 09:05:03,197 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 09:11:40,827 (trainer:732) INFO: 48epoch:train:5801-5900batch: iter_time=3.228, forward_time=0.187, loss_ctc=64.451, loss_att=51.138, acc=0.698, loss=55.132, backward_time=1.041, grad_norm=126.975, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.180e-05, train_time=9.358
+[gpub002:0/64] 2023-07-14 09:13:57,967 (trainer:732) INFO: 48epoch:train:5901-6000batch: iter_time=1.175e-04, forward_time=0.144, loss_ctc=74.538, loss_att=51.602, acc=0.714, loss=58.483, backward_time=1.028, grad_norm=138.164, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.186, optim0_lr0=5.180e-05, train_time=2.743
+[gpub002:0/64] 2023-07-14 09:16:16,645 (trainer:732) INFO: 48epoch:train:6001-6100batch: iter_time=7.038e-04, forward_time=0.146, loss_ctc=78.354, loss_att=60.750, acc=0.709, loss=66.031, backward_time=1.033, grad_norm=141.686, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.179e-05, train_time=2.773
+[gpub002:0/64] 2023-07-14 09:18:33,621 (trainer:732) INFO: 48epoch:train:6101-6200batch: iter_time=1.237e-04, forward_time=0.146, loss_ctc=69.005, loss_att=49.078, acc=0.732, loss=55.056, backward_time=1.031, grad_norm=107.431, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.188, optim0_lr0=5.179e-05, train_time=2.739
+[gpub002:0/64] 2023-07-14 09:20:49,845 (trainer:732) INFO: 48epoch:train:6201-6300batch: iter_time=1.292e-04, forward_time=0.145, loss_ctc=71.735, loss_att=50.224, acc=0.706, loss=56.677, backward_time=1.031, grad_norm=145.134, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.186, optim0_lr0=5.178e-05, train_time=2.724
+[gpub002:0/64] 2023-07-14 09:23:24,114 (trainer:732) INFO: 48epoch:train:6301-6400batch: iter_time=1.285e-04, forward_time=0.283, loss_ctc=66.846, loss_att=50.323, acc=0.714, loss=55.280, backward_time=1.045, grad_norm=114.520, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.189, optim0_lr0=5.177e-05, train_time=3.085
+[gpub002:0/64] 2023-07-14 09:25:40,456 (trainer:732) INFO: 48epoch:train:6401-6500batch: iter_time=1.196e-04, forward_time=0.145, loss_ctc=72.179, loss_att=54.681, acc=0.704, loss=59.930, backward_time=1.029, grad_norm=148.277, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.177e-05, train_time=2.727
+[gpub002:0/64] 2023-07-14 09:28:20,556 (trainer:732) INFO: 48epoch:train:6501-6600batch: iter_time=1.396e-04, forward_time=0.146, loss_ctc=67.464, loss_att=48.020, acc=0.724, loss=53.853, backward_time=1.113, grad_norm=125.636, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.176e-05, train_time=3.202
+[gpub002:0/64] 2023-07-14 09:30:11,734 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub002:0/64] 2023-07-14 09:30:29,893 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 09:30:33,364 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fae1031fcd0>)
+[gpub002:0/64] 2023-07-14 09:30:33,364 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub002:0/64] 2023-07-14 09:30:33,370 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 09:34:29,820 (trainer:732) INFO: 48epoch:train:6601-6700batch: iter_time=2.210, forward_time=0.145, loss_ctc=65.650, loss_att=48.209, acc=0.712, loss=53.441, backward_time=1.036, grad_norm=123.400, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.176e-05, train_time=7.385
+[gpub002:0/64] 2023-07-14 09:36:46,637 (trainer:732) INFO: 48epoch:train:6701-6800batch: iter_time=1.166e-04, forward_time=0.146, loss_ctc=68.915, loss_att=49.606, acc=0.726, loss=55.399, backward_time=1.031, grad_norm=124.451, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.175e-05, train_time=2.736
+[gpub002:0/64] 2023-07-14 09:39:03,225 (trainer:732) INFO: 48epoch:train:6801-6900batch: iter_time=1.266e-04, forward_time=0.146, loss_ctc=79.366, loss_att=60.971, acc=0.714, loss=66.490, backward_time=1.031, grad_norm=177.915, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.175e-05, train_time=2.732
+[gpub002:0/64] 2023-07-14 09:41:26,301 (trainer:732) INFO: 48epoch:train:6901-7000batch: iter_time=1.171e-04, forward_time=0.145, loss_ctc=71.703, loss_att=47.655, acc=0.738, loss=54.869, backward_time=1.033, grad_norm=118.979, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.185, optim0_lr0=5.174e-05, train_time=2.861
+[gpub002:0/64] 2023-07-14 09:43:47,692 (trainer:732) INFO: 48epoch:train:7001-7100batch: iter_time=1.210e-04, forward_time=0.145, loss_ctc=74.135, loss_att=51.319, acc=0.722, loss=58.164, backward_time=1.037, grad_norm=130.583, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.174e-05, train_time=2.828
+[gpub002:0/64] 2023-07-14 09:46:22,408 (trainer:732) INFO: 48epoch:train:7101-7200batch: iter_time=1.192e-04, forward_time=0.145, loss_ctc=61.004, loss_att=45.940, acc=0.727, loss=50.459, backward_time=1.063, grad_norm=116.241, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.173e-05, train_time=3.094
+[gpub002:0/64] 2023-07-14 09:48:38,387 (trainer:732) INFO: 48epoch:train:7201-7300batch: iter_time=1.124e-04, forward_time=0.146, loss_ctc=71.858, loss_att=52.784, acc=0.722, loss=58.507, backward_time=1.030, grad_norm=147.551, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.172e-05, train_time=2.720
+[gpub002:0/64] 2023-07-14 09:51:09,299 (trainer:732) INFO: 48epoch:train:7301-7400batch: iter_time=1.101e-04, forward_time=0.147, loss_ctc=69.932, loss_att=48.151, acc=0.732, loss=54.685, backward_time=1.064, grad_norm=130.713, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.172e-05, train_time=3.018
+[gpub002:0/64] 2023-07-14 09:53:26,312 (trainer:732) INFO: 48epoch:train:7401-7500batch: iter_time=1.054e-04, forward_time=0.146, loss_ctc=67.229, loss_att=50.273, acc=0.714, loss=55.360, backward_time=1.031, grad_norm=147.697, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.171e-05, train_time=2.740
+[gpub002:0/64] 2023-07-14 09:53:28,551 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub002:0/64] 2023-07-14 09:53:46,672 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 09:53:50,086 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb1934034f0>)
+[gpub002:0/64] 2023-07-14 09:53:50,086 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub002:0/64] 2023-07-14 09:53:50,092 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 10:00:46,448 (trainer:732) INFO: 48epoch:train:7501-7600batch: iter_time=1.330, forward_time=0.146, loss_ctc=64.829, loss_att=48.046, acc=0.710, loss=53.081, backward_time=1.043, grad_norm=121.945, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.171e-05, train_time=8.803
+[gpub002:0/64] 2023-07-14 10:03:03,225 (trainer:732) INFO: 48epoch:train:7601-7700batch: iter_time=1.167e-04, forward_time=0.145, loss_ctc=73.763, loss_att=54.433, acc=0.716, loss=60.232, backward_time=1.031, grad_norm=121.388, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.170e-05, train_time=2.735
+[gpub002:0/64] 2023-07-14 10:05:19,335 (trainer:732) INFO: 48epoch:train:7701-7800batch: iter_time=1.329e-04, forward_time=0.145, loss_ctc=77.979, loss_att=56.628, acc=0.718, loss=63.034, backward_time=1.030, grad_norm=145.939, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.170e-05, train_time=2.722
+[gpub002:0/64] 2023-07-14 10:07:35,239 (trainer:732) INFO: 48epoch:train:7801-7900batch: iter_time=1.459e-04, forward_time=0.144, loss_ctc=70.685, loss_att=50.240, acc=0.729, loss=56.374, backward_time=1.030, grad_norm=121.249, clip=100.000, loss_scale=2.564e+32, optim_step_time=0.182, optim0_lr0=5.169e-05, train_time=2.718
+[gpub002:0/64] 2023-07-14 10:10:05,010 (trainer:732) INFO: 48epoch:train:7901-8000batch: iter_time=1.328e-04, forward_time=0.239, loss_ctc=67.160, loss_att=50.782, acc=0.700, loss=55.695, backward_time=1.051, grad_norm=128.964, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.188, optim0_lr0=5.169e-05, train_time=2.995
+[gpub002:0/64] 2023-07-14 10:12:20,278 (trainer:732) INFO: 48epoch:train:8001-8100batch: iter_time=1.339e-04, forward_time=0.144, loss_ctc=63.723, loss_att=47.622, acc=0.712, loss=52.453, backward_time=1.025, grad_norm=112.150, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.168e-05, train_time=2.705
+[gpub002:0/64] 2023-07-14 10:14:36,085 (trainer:732) INFO: 48epoch:train:8101-8200batch: iter_time=1.193e-04, forward_time=0.144, loss_ctc=74.529, loss_att=54.062, acc=0.708, loss=60.202, backward_time=1.027, grad_norm=108.241, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.167e-05, train_time=2.716
+[gpub002:0/64] 2023-07-14 10:16:52,011 (trainer:732) INFO: 48epoch:train:8201-8300batch: iter_time=1.164e-04, forward_time=0.145, loss_ctc=68.909, loss_att=49.305, acc=0.729, loss=55.186, backward_time=1.028, grad_norm=144.843, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.167e-05, train_time=2.718
+[gpub002:0/64] 2023-07-14 10:17:38,915 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub002:0/64] 2023-07-14 10:17:57,252 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 10:18:00,656 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fae102e0f70>)
+[gpub002:0/64] 2023-07-14 10:18:00,656 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub002:0/64] 2023-07-14 10:18:00,663 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 10:22:44,622 (trainer:732) INFO: 48epoch:train:8301-8400batch: iter_time=2.087, forward_time=0.144, loss_ctc=66.298, loss_att=48.900, acc=0.708, loss=54.119, backward_time=1.041, grad_norm=129.888, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.166e-05, train_time=7.052
+[gpub002:0/64] 2023-07-14 10:25:04,181 (trainer:732) INFO: 48epoch:train:8401-8500batch: iter_time=1.242e-04, forward_time=0.146, loss_ctc=79.695, loss_att=59.887, acc=0.715, loss=65.829, backward_time=1.030, grad_norm=136.298, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.166e-05, train_time=2.791
+[gpub002:0/64] 2023-07-14 10:27:20,855 (trainer:732) INFO: 48epoch:train:8501-8600batch: iter_time=1.298e-04, forward_time=0.145, loss_ctc=73.605, loss_att=52.399, acc=0.724, loss=58.761, backward_time=1.029, grad_norm=129.008, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.165e-05, train_time=2.733
+[gpub002:0/64] 2023-07-14 10:29:36,805 (trainer:732) INFO: 48epoch:train:8601-8700batch: iter_time=1.263e-04, forward_time=0.145, loss_ctc=73.931, loss_att=52.596, acc=0.720, loss=58.997, backward_time=1.027, grad_norm=123.600, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.165e-05, train_time=2.719
+[gpub002:0/64] 2023-07-14 10:31:52,421 (trainer:732) INFO: 48epoch:train:8701-8800batch: iter_time=1.245e-04, forward_time=0.145, loss_ctc=62.565, loss_att=48.800, acc=0.699, loss=52.930, backward_time=1.025, grad_norm=111.238, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.164e-05, train_time=2.712
+[gpub002:0/64] 2023-07-14 10:34:08,232 (trainer:732) INFO: 48epoch:train:8801-8900batch: iter_time=1.307e-04, forward_time=0.145, loss_ctc=71.599, loss_att=54.096, acc=0.705, loss=59.347, backward_time=1.027, grad_norm=114.515, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.164e-05, train_time=2.716
+[gpub002:0/64] 2023-07-14 10:36:23,781 (trainer:732) INFO: 48epoch:train:8901-9000batch: iter_time=1.220e-04, forward_time=0.145, loss_ctc=67.382, loss_att=47.060, acc=0.724, loss=53.156, backward_time=1.026, grad_norm=109.344, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.163e-05, train_time=2.711
+[gpub002:0/64] 2023-07-14 10:38:39,320 (trainer:732) INFO: 48epoch:train:9001-9100batch: iter_time=1.261e-04, forward_time=0.144, loss_ctc=65.844, loss_att=47.748, acc=0.721, loss=53.177, backward_time=1.027, grad_norm=117.156, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.163e-05, train_time=2.711
+[gpub002:0/64] 2023-07-14 10:40:20,761 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub002:0/64] 2023-07-14 10:40:38,784 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 10:40:42,509 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad99dd74f0>)
+[gpub002:0/64] 2023-07-14 10:40:42,509 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub002:0/64] 2023-07-14 10:40:42,516 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 10:44:51,880 (trainer:732) INFO: 48epoch:train:9101-9200batch: iter_time=2.166, forward_time=0.173, loss_ctc=76.442, loss_att=55.319, acc=0.703, loss=61.656, backward_time=1.038, grad_norm=122.819, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.162e-05, train_time=7.451
+[gpub002:0/64] 2023-07-14 10:47:08,550 (trainer:732) INFO: 48epoch:train:9201-9300batch: iter_time=1.199e-04, forward_time=0.144, loss_ctc=68.780, loss_att=49.932, acc=0.724, loss=55.586, backward_time=1.031, grad_norm=107.542, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.161e-05, train_time=2.733
+[gpub002:0/64] 2023-07-14 10:49:26,299 (trainer:732) INFO: 48epoch:train:9301-9400batch: iter_time=1.263e-04, forward_time=0.146, loss_ctc=79.867, loss_att=61.875, acc=0.714, loss=67.273, backward_time=1.031, grad_norm=156.270, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.161e-05, train_time=2.755
+[gpub002:0/64] 2023-07-14 10:51:42,933 (trainer:732) INFO: 48epoch:train:9401-9500batch: iter_time=1.196e-04, forward_time=0.145, loss_ctc=72.470, loss_att=48.680, acc=0.737, loss=55.817, backward_time=1.029, grad_norm=122.617, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.160e-05, train_time=2.732
+[gpub002:0/64] 2023-07-14 10:53:58,937 (trainer:732) INFO: 48epoch:train:9501-9600batch: iter_time=1.210e-04, forward_time=0.146, loss_ctc=70.022, loss_att=50.699, acc=0.724, loss=56.496, backward_time=1.029, grad_norm=132.905, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.160e-05, train_time=2.720
+[gpub002:0/64] 2023-07-14 10:56:14,682 (trainer:732) INFO: 48epoch:train:9601-9700batch: iter_time=1.227e-04, forward_time=0.145, loss_ctc=60.015, loss_att=45.859, acc=0.727, loss=50.106, backward_time=1.028, grad_norm=143.949, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.159e-05, train_time=2.715
+[gpub002:0/64] 2023-07-14 10:58:33,862 (trainer:732) INFO: 48epoch:train:9701-9800batch: iter_time=1.277e-04, forward_time=0.146, loss_ctc=70.164, loss_att=53.081, acc=0.720, loss=58.206, backward_time=1.036, grad_norm=114.650, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.159e-05, train_time=2.783
+[gpub002:0/64] 2023-07-14 11:00:52,286 (trainer:732) INFO: 48epoch:train:9801-9900batch: iter_time=1.221e-04, forward_time=0.147, loss_ctc=67.592, loss_att=47.935, acc=0.733, loss=53.832, backward_time=1.034, grad_norm=113.911, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.158e-05, train_time=2.768
+[gpub002:0/64] 2023-07-14 11:03:07,869 (trainer:732) INFO: 48epoch:train:9901-10000batch: iter_time=1.237e-04, forward_time=0.145, loss_ctc=66.172, loss_att=49.889, acc=0.716, loss=54.774, backward_time=1.028, grad_norm=112.218, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.158e-05, train_time=2.711
+[gpub002:0/64] 2023-07-14 11:17:21,394 (trainer:338) INFO: 48epoch results: [train] iter_time=0.297, forward_time=0.153, loss_ctc=70.855, loss_att=51.822, acc=0.716, loss=57.532, backward_time=1.033, grad_norm=130.378, clip=100.000, loss_scale=2.805e+32, optim_step_time=0.183, optim0_lr0=5.185e-05, train_time=3.465, time=4 hours, 49 minutes and 0.35 seconds, total_count=450000, gpu_max_cached_mem_GB=37.574, [valid] loss_ctc=42.674, cer_ctc=0.250, loss_att=37.042, acc=0.682, cer=0.410, wer=0.996, loss=38.731, time=7 minutes and 58.92 seconds, total_count=46046, gpu_max_cached_mem_GB=37.574, [att_plot] time=6 minutes and 0.19 seconds, total_count=0, gpu_max_cached_mem_GB=37.574
+[gpub002:0/64] 2023-07-14 11:17:37,205 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub002:0/64] 2023-07-14 11:17:37,216 (trainer:272) INFO: 49/50epoch started. Estimated time to finish: 10 hours, 12 minutes and 51.12 seconds
+[gpub002:0/64] 2023-07-14 11:17:37,220 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub002:0/64] 2023-07-14 11:17:55,035 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 11:17:58,436 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd882bace0>)
+[gpub002:0/64] 2023-07-14 11:17:58,436 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub002:0/64] 2023-07-14 11:17:58,442 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 11:23:36,393 (trainer:732) INFO: 49epoch:train:1-100batch: iter_time=1.992, forward_time=0.180, loss_ctc=75.424, loss_att=56.205, acc=0.707, loss=61.970, backward_time=1.067, grad_norm=126.161, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=5.157e-05, train_time=7.183
+[gpub002:0/64] 2023-07-14 11:25:52,971 (trainer:732) INFO: 49epoch:train:101-200batch: iter_time=1.338e-04, forward_time=0.145, loss_ctc=78.109, loss_att=58.369, acc=0.696, loss=64.291, backward_time=1.030, grad_norm=156.563, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.156e-05, train_time=2.732
+[gpub002:0/64] 2023-07-14 11:28:10,045 (trainer:732) INFO: 49epoch:train:201-300batch: iter_time=1.411e-04, forward_time=0.145, loss_ctc=71.274, loss_att=53.833, acc=0.706, loss=59.065, backward_time=1.031, grad_norm=117.395, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.156e-05, train_time=2.741
+[gpub002:0/64] 2023-07-14 11:30:33,978 (trainer:732) INFO: 49epoch:train:301-400batch: iter_time=1.382e-04, forward_time=0.143, loss_ctc=82.935, loss_att=67.130, acc=0.686, loss=71.872, backward_time=1.041, grad_norm=143.181, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.155e-05, train_time=2.878
+[gpub002:0/64] 2023-07-14 11:33:11,725 (trainer:732) INFO: 49epoch:train:401-500batch: iter_time=1.370e-04, forward_time=0.144, loss_ctc=67.558, loss_att=49.800, acc=0.725, loss=55.128, backward_time=1.047, grad_norm=137.364, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.155e-05, train_time=3.155
+[gpub002:0/64] 2023-07-14 11:35:47,703 (trainer:732) INFO: 49epoch:train:501-600batch: iter_time=1.246e-04, forward_time=0.146, loss_ctc=67.201, loss_att=46.260, acc=0.720, loss=52.542, backward_time=1.045, grad_norm=114.336, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.154e-05, train_time=3.119
+[gpub002:0/64] 2023-07-14 11:38:18,577 (trainer:732) INFO: 49epoch:train:601-700batch: iter_time=1.325e-04, forward_time=0.150, loss_ctc=70.668, loss_att=51.546, acc=0.714, loss=57.283, backward_time=1.040, grad_norm=119.406, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.154e-05, train_time=3.017
+[gpub002:0/64] 2023-07-14 11:40:45,549 (trainer:732) INFO: 49epoch:train:701-800batch: iter_time=1.343e-04, forward_time=0.144, loss_ctc=61.423, loss_att=43.823, acc=0.717, loss=49.103, backward_time=1.037, grad_norm=107.937, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.153e-05, train_time=2.939
+[gpub002:0/64] 2023-07-14 11:41:43,214 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub002:0/64] 2023-07-14 11:42:01,107 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 11:42:04,459 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd2e9df160>)
+[gpub002:0/64] 2023-07-14 11:42:04,459 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub002:0/64] 2023-07-14 11:42:04,480 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 11:48:17,184 (trainer:732) INFO: 49epoch:train:801-900batch: iter_time=2.782, forward_time=0.203, loss_ctc=83.766, loss_att=63.502, acc=0.709, loss=69.582, backward_time=1.053, grad_norm=161.753, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.187, optim0_lr0=5.153e-05, train_time=9.032
+[gpub002:0/64] 2023-07-14 11:50:46,902 (trainer:732) INFO: 49epoch:train:901-1000batch: iter_time=9.525e-05, forward_time=0.144, loss_ctc=74.996, loss_att=53.416, acc=0.702, loss=59.890, backward_time=1.043, grad_norm=131.720, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.152e-05, train_time=2.994
+[gpub002:0/64] 2023-07-14 11:53:03,014 (trainer:732) INFO: 49epoch:train:1001-1100batch: iter_time=8.920e-05, forward_time=0.144, loss_ctc=71.753, loss_att=56.326, acc=0.702, loss=60.954, backward_time=1.032, grad_norm=130.090, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.152e-05, train_time=2.722
+[gpub002:0/64] 2023-07-14 11:55:18,875 (trainer:732) INFO: 49epoch:train:1101-1200batch: iter_time=1.102e-04, forward_time=0.143, loss_ctc=79.119, loss_att=61.363, acc=0.706, loss=66.690, backward_time=1.030, grad_norm=115.657, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.151e-05, train_time=2.717
+[gpub002:0/64] 2023-07-14 11:57:34,399 (trainer:732) INFO: 49epoch:train:1201-1300batch: iter_time=1.012e-04, forward_time=0.144, loss_ctc=72.908, loss_att=53.208, acc=0.712, loss=59.118, backward_time=1.028, grad_norm=115.476, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.150e-05, train_time=2.710
+[gpub002:0/64] 2023-07-14 11:59:49,721 (trainer:732) INFO: 49epoch:train:1301-1400batch: iter_time=1.097e-04, forward_time=0.142, loss_ctc=61.698, loss_att=43.601, acc=0.720, loss=49.030, backward_time=1.027, grad_norm=118.028, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.150e-05, train_time=2.706
+[gpub002:0/64] 2023-07-14 12:02:24,426 (trainer:732) INFO: 49epoch:train:1401-1500batch: iter_time=0.010, forward_time=0.243, loss_ctc=69.799, loss_att=51.631, acc=0.718, loss=57.082, backward_time=1.055, grad_norm=137.188, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.201, optim0_lr0=5.149e-05, train_time=3.093
+[gpub002:0/64] 2023-07-14 12:04:40,243 (trainer:732) INFO: 49epoch:train:1501-1600batch: iter_time=1.270e-04, forward_time=0.145, loss_ctc=62.191, loss_att=43.167, acc=0.720, loss=48.874, backward_time=1.028, grad_norm=110.624, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.149e-05, train_time=2.717
+[gpub002:0/64] 2023-07-14 12:06:21,070 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub002:0/64] 2023-07-14 12:06:39,230 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 12:06:42,623 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb21fa4e470>)
+[gpub002:0/64] 2023-07-14 12:06:42,623 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub002:0/64] 2023-07-14 12:06:42,629 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 12:10:28,549 (trainer:732) INFO: 49epoch:train:1601-1700batch: iter_time=1.995, forward_time=0.145, loss_ctc=86.888, loss_att=63.993, acc=0.702, loss=70.861, backward_time=1.042, grad_norm=144.375, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.148e-05, train_time=6.966
+[gpub002:0/64] 2023-07-14 12:12:46,571 (trainer:732) INFO: 49epoch:train:1701-1800batch: iter_time=1.248e-04, forward_time=0.146, loss_ctc=72.288, loss_att=55.796, acc=0.708, loss=60.743, backward_time=1.033, grad_norm=127.355, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.148e-05, train_time=2.760
+[gpub002:0/64] 2023-07-14 12:15:04,541 (trainer:732) INFO: 49epoch:train:1801-1900batch: iter_time=1.254e-04, forward_time=0.146, loss_ctc=72.461, loss_att=50.750, acc=0.721, loss=57.264, backward_time=1.031, grad_norm=124.107, clip=100.000, loss_scale=5.127e+32, optim_step_time=0.182, optim0_lr0=5.147e-05, train_time=2.759
+[gpub002:0/64] 2023-07-14 12:17:21,880 (trainer:732) INFO: 49epoch:train:1901-2000batch: iter_time=1.228e-04, forward_time=0.146, loss_ctc=77.931, loss_att=60.068, acc=0.711, loss=65.427, backward_time=1.031, grad_norm=134.506, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.147e-05, train_time=2.747
+[gpub002:0/64] 2023-07-14 12:19:44,196 (trainer:732) INFO: 49epoch:train:2001-2100batch: iter_time=1.327e-04, forward_time=0.145, loss_ctc=74.295, loss_att=58.950, acc=0.728, loss=63.554, backward_time=1.047, grad_norm=139.921, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.146e-05, train_time=2.846
+[gpub002:0/64] 2023-07-14 12:21:47,756 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-14 12:22:28,541 (trainer:732) INFO: 49epoch:train:2101-2200batch: iter_time=2.902e-04, forward_time=0.195, loss_ctc=66.147, loss_att=47.940, acc=0.731, loss=53.402, backward_time=1.122, grad_norm=127.949, clip=100.000, loss_scale=5.497e+32, optim_step_time=0.196, optim0_lr0=5.146e-05, train_time=3.286
+[gpub002:0/64] 2023-07-14 12:24:44,739 (trainer:732) INFO: 49epoch:train:2201-2300batch: iter_time=1.043e-04, forward_time=0.146, loss_ctc=63.495, loss_att=43.732, acc=0.732, loss=49.661, backward_time=1.028, grad_norm=107.717, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.145e-05, train_time=2.724
+[gpub002:0/64] 2023-07-14 12:27:00,588 (trainer:732) INFO: 49epoch:train:2301-2400batch: iter_time=1.223e-04, forward_time=0.146, loss_ctc=70.905, loss_att=53.186, acc=0.722, loss=58.502, backward_time=1.028, grad_norm=119.707, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.144e-05, train_time=2.717
+[gpub002:0/64] 2023-07-14 12:29:16,288 (trainer:732) INFO: 49epoch:train:2401-2500batch: iter_time=1.233e-04, forward_time=0.146, loss_ctc=71.254, loss_att=49.884, acc=0.724, loss=56.295, backward_time=1.028, grad_norm=137.105, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.144e-05, train_time=2.714
+[gpub002:0/64] 2023-07-14 12:29:36,316 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub002:0/64] 2023-07-14 12:29:55,262 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 12:29:58,726 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad40a74730>)
+[gpub002:0/64] 2023-07-14 12:29:58,726 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub002:0/64] 2023-07-14 12:29:58,784 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 12:36:52,242 (trainer:732) INFO: 49epoch:train:2501-2600batch: iter_time=3.018, forward_time=0.183, loss_ctc=77.934, loss_att=56.931, acc=0.709, loss=63.232, backward_time=1.042, grad_norm=160.805, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.143e-05, train_time=9.116
+[gpub002:0/64] 2023-07-14 12:39:09,252 (trainer:732) INFO: 49epoch:train:2601-2700batch: iter_time=1.261e-04, forward_time=0.144, loss_ctc=76.698, loss_att=56.198, acc=0.715, loss=62.348, backward_time=1.031, grad_norm=159.881, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.143e-05, train_time=2.743
+[gpub002:0/64] 2023-07-14 12:41:25,308 (trainer:732) INFO: 49epoch:train:2701-2800batch: iter_time=1.213e-04, forward_time=0.145, loss_ctc=70.984, loss_att=50.376, acc=0.725, loss=56.558, backward_time=1.031, grad_norm=114.170, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.142e-05, train_time=2.721
+[gpub002:0/64] 2023-07-14 12:43:41,674 (trainer:732) INFO: 49epoch:train:2801-2900batch: iter_time=1.222e-04, forward_time=0.145, loss_ctc=80.056, loss_att=64.744, acc=0.711, loss=69.337, backward_time=1.033, grad_norm=127.411, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.142e-05, train_time=2.727
+[gpub002:0/64] 2023-07-14 12:45:57,246 (trainer:732) INFO: 49epoch:train:2901-3000batch: iter_time=1.143e-04, forward_time=0.144, loss_ctc=67.599, loss_att=50.304, acc=0.735, loss=55.492, backward_time=1.027, grad_norm=117.099, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.141e-05, train_time=2.711
+[gpub002:0/64] 2023-07-14 12:48:13,149 (trainer:732) INFO: 49epoch:train:3001-3100batch: iter_time=1.120e-04, forward_time=0.146, loss_ctc=64.905, loss_att=44.347, acc=0.736, loss=50.514, backward_time=1.030, grad_norm=130.061, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.141e-05, train_time=2.718
+[gpub002:0/64] 2023-07-14 12:50:30,467 (trainer:732) INFO: 49epoch:train:3101-3200batch: iter_time=1.131e-04, forward_time=0.145, loss_ctc=68.429, loss_att=50.737, acc=0.731, loss=56.045, backward_time=1.028, grad_norm=111.368, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.140e-05, train_time=2.746
+[gpub002:0/64] 2023-07-14 12:52:46,238 (trainer:732) INFO: 49epoch:train:3201-3300batch: iter_time=1.227e-04, forward_time=0.144, loss_ctc=62.564, loss_att=44.638, acc=0.725, loss=50.016, backward_time=1.028, grad_norm=110.576, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.140e-05, train_time=2.715
+[gpub002:0/64] 2023-07-14 12:53:46,523 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub002:0/64] 2023-07-14 12:54:05,029 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 12:54:08,395 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad5b3637f0>)
+[gpub002:0/64] 2023-07-14 12:54:08,395 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub002:0/64] 2023-07-14 12:54:08,401 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 12:59:16,516 (trainer:732) INFO: 49epoch:train:3301-3400batch: iter_time=1.682, forward_time=0.171, loss_ctc=82.313, loss_att=58.467, acc=0.718, loss=65.621, backward_time=1.042, grad_norm=192.097, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.139e-05, train_time=7.805
+[gpub002:0/64] 2023-07-14 13:02:14,955 (trainer:732) INFO: 49epoch:train:3401-3500batch: iter_time=1.222e-04, forward_time=0.145, loss_ctc=72.811, loss_att=56.064, acc=0.705, loss=61.088, backward_time=1.086, grad_norm=128.058, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.138e-05, train_time=3.569
+[gpub002:0/64] 2023-07-14 13:05:10,836 (trainer:732) INFO: 49epoch:train:3501-3600batch: iter_time=1.228e-04, forward_time=0.146, loss_ctc=70.741, loss_att=51.039, acc=0.720, loss=56.950, backward_time=1.079, grad_norm=144.537, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.138e-05, train_time=3.517
+[gpub002:0/64] 2023-07-14 13:07:59,359 (trainer:732) INFO: 49epoch:train:3601-3700batch: iter_time=1.242e-04, forward_time=0.144, loss_ctc=82.023, loss_att=65.980, acc=0.690, loss=70.793, backward_time=1.065, grad_norm=119.668, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.137e-05, train_time=3.370
+[gpub002:0/64] 2023-07-14 13:10:33,803 (trainer:732) INFO: 49epoch:train:3701-3800batch: iter_time=1.146e-04, forward_time=0.145, loss_ctc=68.382, loss_att=49.269, acc=0.731, loss=55.003, backward_time=1.040, grad_norm=119.342, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.137e-05, train_time=3.089
+[gpub002:0/64] 2023-07-14 13:13:52,522 (trainer:732) INFO: 49epoch:train:3801-3900batch: iter_time=1.390e-04, forward_time=0.146, loss_ctc=70.071, loss_att=52.760, acc=0.711, loss=57.953, backward_time=1.081, grad_norm=133.837, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.136e-05, train_time=3.974
+srun: Job step aborted: Waiting up to 32 seconds for job step to finish.
+slurmstepd: error: *** STEP 2147805.0 ON gpub002 CANCELLED AT 2023-07-14T13:15:07 DUE TO TIME LIMIT ***