diff --git "a/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.1.log" "b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.1.log" new file mode 100644--- /dev/null +++ "b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.1.log" @@ -0,0 +1,4111 @@ +# Running on gpub001.delta.ncsa.illinois.edu +# Started at Fri Jul 14 13:29:16 CDT 2023 +# SLURMD_NODENAME=gpub001 +# SLURM_CLUSTER_NAME=delta +# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf +# SLURM_CPUS_ON_NODE=64 +# SLURM_CPUS_PER_TASK=64 +# SLURM_EXPORT_ENV=PATH +# SLURM_GET_USER_ENV=1 +# SLURM_GPUS_ON_NODE=4 +# SLURM_GTIDS=0 +# SLURM_JOBID=2157595 +# SLURM_JOB_ACCOUNT=bbjs-delta-gpu +# SLURM_JOB_CPUS_PER_NODE='64(x16)' +# SLURM_JOB_GID=202 +# SLURM_JOB_GPUS=0,1,2,3 +# SLURM_JOB_ID=2157595 +# SLURM_JOB_NAME=exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.log +# SLURM_JOB_NODELIST='gpub[001-002,006,008,022,024,026-027,048-051,074,077-079]' +# SLURM_JOB_NUM_NODES=16 +# SLURM_JOB_PARTITION=gpuA40x4 +# SLURM_JOB_QOS=bbjs-delta-gpu +# SLURM_JOB_UID=68077 +# SLURM_JOB_USER=peng6 +# SLURM_LOCALID=0 +# SLURM_MEM_PER_NODE=240000 +# SLURM_NNODES=16 +# SLURM_NODEID=0 +# SLURM_NODELIST='gpub[001-002,006,008,022,024,026-027,048-051,074,077-079]' +# SLURM_NODE_ALIASES='(null)' +# SLURM_OPEN_MODE=a +# SLURM_PRIO_PROCESS=0 +# SLURM_PROCID=0 +# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1 +# SLURM_SUBMIT_HOST=dt-login02.delta.internal.ncsa.edu +# SLURM_TASKS_PER_NODE='1(x16)' +# SLURM_TASK_PID=1052675 +# SLURM_TOPOLOGY_ADDR=ss00.ss09.gpub001 +# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node +# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9728:109 +# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +[gpub001:0/64] 2023-07-14 13:30:20,482 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0 +[gpub001:0/64] 2023-07-14 13:30:21,930 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 64 nodes. +[gpub001:0/64] 2023-07-14 13:30:21,964 (s2t:483) INFO: Vocabulary size: 50002 +[gpub001:0/64] 2023-07-14 13:30:35,251 (abs_task:1201) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True +[gpub001:0/64] 2023-07-14 13:30:35,260 (abs_task:1202) INFO: Model structure: +ESPnetS2TModel( + (frontend): DefaultFrontend( + (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True) + (frontend): Frontend() + (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False) + ) + (specaug): SpecAug( + (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq) + (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time) + ) + (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True) + (encoder): TransformerEncoder( + (embed): Conv2dSubsampling( + (conv): Sequential( + (0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2)) + (1): ReLU() + (2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2)) + (3): ReLU() + ) + (out): Sequential( + (0): Linear(in_features=19456, out_features=1024, bias=True) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (encoders): MultiSequential( + (0): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (9): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (10): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (11): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (12): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (13): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (14): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (15): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (16): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (17): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (18): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (19): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (20): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (21): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (22): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (23): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + ) + (decoder): TransformerDecoder( + (embed): Sequential( + (0): Embedding(50002, 1024) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (output_layer): Linear(in_features=1024, out_features=50002, bias=True) + (decoders): MultiSequential( + (0): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (9): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (10): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (11): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (12): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (13): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (14): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (15): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (16): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (17): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (18): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (19): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (20): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (21): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (22): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (23): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (criterion_att): LabelSmoothingLoss( + (criterion): KLDivLoss() + ) + (ctc): CTC( + (ctc_lo): Linear(in_features=1024, out_features=50002, bias=True) + (ctc_loss): CTCLoss() + ) +) + +Model summary: + Class Name: ESPnetS2TModel + Total Number of model parameters: 888.51 M + Number of trainable parameters: 888.51 M (100.0%) + Size: 3.55 GB + Type: torch.float32 +[gpub001:0/64] 2023-07-14 13:30:35,260 (abs_task:1205) INFO: Optimizer: +AdamW ( +Parameter Group 0 + amsgrad: False + betas: [0.9, 0.98] + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 0.00025 + lr: 2.5e-08 + maximize: False + weight_decay: 0.0 +) +[gpub001:0/64] 2023-07-14 13:30:35,260 (abs_task:1206) INFO: Scheduler: WarmupLR(warmup_steps=10000) +[gpub001:0/64] 2023-07-14 13:30:35,277 (abs_task:1215) INFO: Saving the configuration in exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/config.yaml +[gpub001:0/64] 2023-07-14 13:30:35,962 (abs_task:1272) INFO: Loading pretrained params from /scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v2/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e18_d18_lr5e-4_warmup20k_raw_bpe50000/valid.acc.ave.pth +[gpub001:0/64] 2023-07-14 13:30:44,311 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 13:30:44,462 (abs_task:1570) INFO: [valid] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev/text", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 13:30:44,462 (abs_task:1571) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=1012, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub001:0/64] 2023-07-14 13:30:44,463 (abs_task:1572) INFO: [valid] mini-batch sizes summary: N-batch=1012, mean=128.1, min=128, max=129 +[gpub001:0/64] 2023-07-14 13:30:44,955 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 13:30:45,289 (abs_task:1570) INFO: [plot_att] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev/text", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 13:30:45,289 (abs_task:1571) INFO: [plot_att] Batch sampler: UnsortedBatchSampler(N-batch=129591, batch_size=1, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub001:0/64] 2023-07-14 13:30:45,289 (abs_task:1572) INFO: [plot_att] mini-batch sizes summary: N-batch=3, mean=1.0, min=1, max=1 +[gpub001:0/64] 2023-07-14 13:31:11,236 (trainer:159) INFO: The training was resumed using exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/checkpoint.pth +gpub001:1052798:1052798 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.101<0> +gpub001:1052798:1052798 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub001:1052798:1052798 [0] NCCL INFO cudaDriverVersion 12010 +NCCL version 2.14.3+cuda11.7 +[gpub001:0/64] 2023-07-14 13:31:16,544 (trainer:284) INFO: 49/60epoch started +[gpub001:0/64] 2023-07-14 13:31:16,605 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub001:0/64] 2023-07-14 13:31:34,016 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 13:31:37,332 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 13:31:37,332 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub001:0/64] 2023-07-14 13:31:37,338 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +gpub050:2739708:2739708 [3] NCCL INFO cudaDriverVersion 12010 +gpub050:2739708:2739708 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0> +gpub050:2739708:2739708 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub050:2739708:2739778 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0> +gpub050:2739708:2739778 [3] NCCL INFO Using network IB +gpub050:2739708:2739778 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub050:2739708:2739778 [3] NCCL INFO Trees [0] -1/-1/-1->43->42 [1] -1/-1/-1->43->42 +gpub050:2739708:2739778 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0 +gpub050:2739708:2739778 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0 +gpub050:2739708:2739778 [3] NCCL INFO Connected all rings +gpub050:2739708:2739778 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 42[85000] via P2P/IPC +gpub050:2739708:2739778 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 42[85000] via P2P/IPC +gpub050:2739708:2739778 [3] NCCL INFO Connected all trees +gpub050:2739708:2739778 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub050:2739708:2739778 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub050:2739708:2739778 [3] NCCL INFO comm 0x51443e00 rank 43 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub050:2739706:2739706 [1] NCCL INFO cudaDriverVersion 12010 +gpub050:2739706:2739706 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0> +gpub050:2739706:2739706 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub050:2739706:2739779 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0> +gpub050:2739706:2739779 [1] NCCL INFO Using network IB +gpub050:2739706:2739779 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub050:2739706:2739779 [1] NCCL INFO Trees [0] 42/36/-1->41->40 [1] 42/-1/-1->41->40 +gpub050:2739706:2739779 [1] NCCL INFO Channel 00/0 : 41[46000] -> 42[85000] via P2P/IPC +gpub050:2739706:2739779 [1] NCCL INFO Channel 01/0 : 41[46000] -> 42[85000] via P2P/IPC +gpub050:2739706:2739779 [1] NCCL INFO Connected all rings +gpub050:2739706:2739779 [1] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [receive] via NET/IB/0 +gpub050:2739706:2739779 [1] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [send] via NET/IB/0 +gpub050:2739706:2739779 [1] NCCL INFO Channel 00/0 : 41[46000] -> 40[7000] via P2P/IPC +gpub050:2739706:2739779 [1] NCCL INFO Channel 01/0 : 41[46000] -> 40[7000] via P2P/IPC +gpub050:2739706:2739779 [1] NCCL INFO Connected all trees +gpub050:2739706:2739779 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub050:2739706:2739779 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub050:2739706:2739779 [1] NCCL INFO comm 0xb91afa10 rank 41 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub050:2739707:2739707 [2] NCCL INFO cudaDriverVersion 12010 +gpub050:2739707:2739707 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0> +gpub050:2739707:2739707 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub050:2739707:2739777 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0> +gpub050:2739707:2739777 [2] NCCL INFO Using network IB +gpub050:2739707:2739777 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub050:2739707:2739777 [2] NCCL INFO Trees [0] 43/-1/-1->42->41 [1] 43/-1/-1->42->41 +gpub050:2739707:2739777 [2] NCCL INFO Channel 00/0 : 42[85000] -> 43[c7000] via P2P/IPC +gpub050:2739707:2739777 [2] NCCL INFO Channel 01/0 : 42[85000] -> 43[c7000] via P2P/IPC +gpub050:2739707:2739777 [2] NCCL INFO Connected all rings +gpub050:2739707:2739777 [2] NCCL INFO Channel 00/0 : 42[85000] -> 41[46000] via P2P/IPC +gpub050:2739707:2739777 [2] NCCL INFO Channel 01/0 : 42[85000] -> 41[46000] via P2P/IPC +gpub050:2739707:2739777 [2] NCCL INFO Connected all trees +gpub050:2739707:2739777 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub050:2739707:2739777 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub050:2739707:2739777 [2] NCCL INFO comm 0x9490430 rank 42 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub050:2739705:2739705 [0] NCCL INFO cudaDriverVersion 12010 +gpub050:2739705:2739705 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0> +gpub050:2739705:2739705 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub050:2739705:2739780 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0> +gpub050:2739705:2739780 [0] NCCL INFO Using network IB +gpub050:2739705:2739780 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub050:2739705:2739780 [0] NCCL INFO Trees [0] 41/44/-1->40->49 [1] 41/-1/-1->40->37 +gpub050:2739705:2739780 [0] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0 +gpub050:2739705:2739780 [0] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0 +gpub050:2739705:2739780 [0] NCCL INFO Channel 00/0 : 40[7000] -> 41[46000] via P2P/IPC +gpub050:2739705:2739780 [0] NCCL INFO Channel 01/0 : 40[7000] -> 41[46000] via P2P/IPC +gpub050:2739705:2739780 [0] NCCL INFO Connected all rings +gpub050:2739705:2739780 [0] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [receive] via NET/IB/0 +gpub050:2739705:2739780 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [send] via NET/IB/0 +gpub050:2739705:2739780 [0] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [send] via NET/IB/0 +gpub050:2739705:2739780 [0] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [receive] via NET/IB/0 +gpub050:2739705:2739780 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [receive] via NET/IB/0 +gpub050:2739705:2739780 [0] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [send] via NET/IB/0 +gpub050:2739705:2739780 [0] NCCL INFO Connected all trees +gpub050:2739705:2739780 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub050:2739705:2739780 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub050:2739705:2739780 [0] NCCL INFO comm 0xb798f2d0 rank 40 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub078:587387:587387 [1] NCCL INFO cudaDriverVersion 12010 +gpub078:587387:587387 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0> +gpub078:587387:587387 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub078:587387:587522 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0> +gpub078:587387:587522 [1] NCCL INFO Using network IB +gpub078:587387:587522 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub078:587387:587522 [1] NCCL INFO Trees [0] 58/52/-1->57->56 [1] 58/-1/-1->57->56 +gpub078:587387:587522 [1] NCCL INFO Channel 00/0 : 57[46000] -> 58[85000] via P2P/IPC +gpub078:587387:587522 [1] NCCL INFO Channel 01/0 : 57[46000] -> 58[85000] via P2P/IPC +gpub078:587387:587522 [1] NCCL INFO Connected all rings +gpub078:587387:587522 [1] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [receive] via NET/IB/0 +gpub078:587387:587522 [1] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [send] via NET/IB/0 +gpub078:587387:587522 [1] NCCL INFO Channel 00/0 : 57[46000] -> 56[7000] via P2P/IPC +gpub078:587387:587522 [1] NCCL INFO Channel 01/0 : 57[46000] -> 56[7000] via P2P/IPC +gpub078:587387:587522 [1] NCCL INFO Connected all trees +gpub078:587387:587522 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub078:587387:587522 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub078:587387:587522 [1] NCCL INFO comm 0x8bcb3990 rank 57 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub078:587386:587386 [0] NCCL INFO cudaDriverVersion 12010 +gpub078:587386:587386 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0> +gpub078:587386:587386 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub078:587386:587521 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0> +gpub078:587386:587521 [0] NCCL INFO Using network IB +gpub078:587386:587521 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub078:587386:587521 [0] NCCL INFO Trees [0] 57/60/-1->56->48 [1] 57/-1/-1->56->53 +gpub078:587386:587521 [0] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0 +gpub078:587386:587521 [0] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0 +gpub078:587386:587521 [0] NCCL INFO Channel 00/0 : 56[7000] -> 57[46000] via P2P/IPC +gpub078:587386:587521 [0] NCCL INFO Channel 01/0 : 56[7000] -> 57[46000] via P2P/IPC +gpub078:587386:587521 [0] NCCL INFO Connected all rings +gpub078:587386:587521 [0] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [receive] via NET/IB/0 +gpub078:587386:587521 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [send] via NET/IB/0 +gpub078:587386:587521 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [receive] via NET/IB/0 +gpub078:587386:587521 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [send] via NET/IB/0 +gpub078:587386:587521 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [receive] via NET/IB/0 +gpub078:587386:587521 [0] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [send] via NET/IB/0 +gpub078:587386:587521 [0] NCCL INFO Connected all trees +gpub078:587386:587521 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub078:587386:587521 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub078:587386:587521 [0] NCCL INFO comm 0xb210a550 rank 56 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub078:587389:587389 [3] NCCL INFO cudaDriverVersion 12010 +gpub078:587389:587389 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0> +gpub078:587389:587389 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub078:587389:587520 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0> +gpub078:587389:587520 [3] NCCL INFO Using network IB +gpub078:587389:587520 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub078:587389:587520 [3] NCCL INFO Trees [0] -1/-1/-1->59->58 [1] -1/-1/-1->59->58 +gpub078:587389:587520 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0 +gpub078:587389:587520 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0 +gpub078:587389:587520 [3] NCCL INFO Connected all rings +gpub078:587389:587520 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 58[85000] via P2P/IPC +gpub078:587389:587520 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 58[85000] via P2P/IPC +gpub078:587389:587520 [3] NCCL INFO Connected all trees +gpub078:587389:587520 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub078:587389:587520 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub078:587389:587520 [3] NCCL INFO comm 0xf3a7e40 rank 59 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub002:2311584:2311584 [0] NCCL INFO cudaDriverVersion 12010 +gpub002:2311584:2311584 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0> +gpub002:2311584:2311584 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub002:2311584:2311667 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0> +gpub002:2311584:2311667 [0] NCCL INFO Using network IB +gpub002:2311584:2311667 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub002:2311584:2311667 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12 +gpub002:2311584:2311667 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 +gpub002:2311584:2311667 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 +gpub002:2311584:2311667 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub002:2311584:2311667 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub002:2311584:2311667 [0] NCCL INFO Connected all rings +gpub002:2311584:2311667 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/IB/0 +gpub002:2311584:2311667 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/IB/0 +gpub002:2311584:2311667 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/IB/0 +gpub002:2311584:2311667 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/IB/0 +gpub002:2311584:2311667 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/IB/0 +gpub002:2311584:2311667 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/IB/0 +gpub002:2311584:2311667 [0] NCCL INFO Connected all trees +gpub002:2311584:2311667 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub002:2311584:2311667 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub002:2311584:2311667 [0] NCCL INFO comm 0x9d597d00 rank 4 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub002:2311585:2311585 [1] NCCL INFO cudaDriverVersion 12010 +gpub002:2311585:2311585 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0> +gpub002:2311585:2311585 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub002:2311585:2311664 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0> +gpub002:2311585:2311664 [1] NCCL INFO Using network IB +gpub002:2311585:2311664 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub002:2311585:2311664 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4 +gpub002:2311585:2311664 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub002:2311585:2311664 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub002:2311585:2311664 [1] NCCL INFO Connected all rings +gpub002:2311585:2311664 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/IB/0 +gpub002:2311585:2311664 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/IB/0 +gpub002:2311585:2311664 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub002:2311585:2311664 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub002:2311585:2311664 [1] NCCL INFO Connected all trees +gpub002:2311585:2311664 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub002:2311585:2311664 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub002:2311585:2311664 [1] NCCL INFO comm 0x9cc6bd40 rank 5 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub002:2311586:2311586 [2] NCCL INFO cudaDriverVersion 12010 +gpub002:2311586:2311586 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0> +gpub002:2311586:2311586 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub002:2311586:2311666 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0> +gpub002:2311586:2311666 [2] NCCL INFO Using network IB +gpub002:2311586:2311666 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub002:2311586:2311666 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5 +gpub002:2311586:2311666 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub002:2311586:2311666 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub002:2311586:2311666 [2] NCCL INFO Connected all rings +gpub002:2311586:2311666 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub002:2311586:2311666 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub002:2311586:2311666 [2] NCCL INFO Connected all trees +gpub002:2311586:2311666 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub002:2311586:2311666 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub002:2311586:2311666 [2] NCCL INFO comm 0x8ee58800 rank 6 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub002:2311587:2311587 [3] NCCL INFO cudaDriverVersion 12010 +gpub002:2311587:2311587 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0> +gpub002:2311587:2311587 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub002:2311587:2311665 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0> +gpub002:2311587:2311665 [3] NCCL INFO Using network IB +gpub002:2311587:2311665 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub002:2311587:2311665 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6 +gpub002:2311587:2311665 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 +gpub002:2311587:2311665 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 +gpub002:2311587:2311665 [3] NCCL INFO Connected all rings +gpub002:2311587:2311665 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub002:2311587:2311665 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub002:2311587:2311665 [3] NCCL INFO Connected all trees +gpub002:2311587:2311665 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub002:2311587:2311665 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub002:2311587:2311665 [3] NCCL INFO comm 0x4fabf1f0 rank 7 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub078:587388:587388 [2] NCCL INFO cudaDriverVersion 12010 +gpub078:587388:587388 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0> +gpub078:587388:587388 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub078:587388:587523 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0> +gpub078:587388:587523 [2] NCCL INFO Using network IB +gpub078:587388:587523 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub078:587388:587523 [2] NCCL INFO Trees [0] 59/-1/-1->58->57 [1] 59/-1/-1->58->57 +gpub078:587388:587523 [2] NCCL INFO Channel 00/0 : 58[85000] -> 59[c7000] via P2P/IPC +gpub078:587388:587523 [2] NCCL INFO Channel 01/0 : 58[85000] -> 59[c7000] via P2P/IPC +gpub078:587388:587523 [2] NCCL INFO Connected all rings +gpub078:587388:587523 [2] NCCL INFO Channel 00/0 : 58[85000] -> 57[46000] via P2P/IPC +gpub078:587388:587523 [2] NCCL INFO Channel 01/0 : 58[85000] -> 57[46000] via P2P/IPC +gpub078:587388:587523 [2] NCCL INFO Connected all trees +gpub078:587388:587523 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub078:587388:587523 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub078:587388:587523 [2] NCCL INFO comm 0x4f14aa50 rank 58 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub051:3424421:3424421 [3] NCCL INFO cudaDriverVersion 12010 +gpub051:3424421:3424421 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0> +gpub051:3424421:3424421 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub051:3424421:3424552 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0> +gpub051:3424421:3424552 [3] NCCL INFO Using network IB +gpub051:3424421:3424552 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub051:3424421:3424552 [3] NCCL INFO Trees [0] -1/-1/-1->47->46 [1] -1/-1/-1->47->46 +gpub051:3424421:3424552 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0 +gpub051:3424421:3424552 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0 +gpub051:3424421:3424552 [3] NCCL INFO Connected all rings +gpub051:3424421:3424552 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 46[85000] via P2P/IPC +gpub051:3424421:3424552 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 46[85000] via P2P/IPC +gpub051:3424421:3424552 [3] NCCL INFO Connected all trees +gpub051:3424421:3424552 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub051:3424421:3424552 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub051:3424421:3424552 [3] NCCL INFO comm 0x9ee4290 rank 47 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub051:3424419:3424419 [1] NCCL INFO cudaDriverVersion 12010 +gpub051:3424419:3424419 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0> +gpub051:3424419:3424419 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub051:3424419:3424553 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0> +gpub051:3424419:3424553 [1] NCCL INFO Using network IB +gpub051:3424419:3424553 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub051:3424419:3424553 [1] NCCL INFO Trees [0] 46/-1/-1->45->44 [1] 46/52/-1->45->44 +gpub051:3424419:3424553 [1] NCCL INFO Channel 00/0 : 45[46000] -> 46[85000] via P2P/IPC +gpub051:3424419:3424553 [1] NCCL INFO Channel 01/0 : 45[46000] -> 46[85000] via P2P/IPC +gpub051:3424419:3424553 [1] NCCL INFO Connected all rings +gpub051:3424419:3424553 [1] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [send] via NET/IB/0 +gpub051:3424419:3424553 [1] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [receive] via NET/IB/0 +gpub051:3424419:3424553 [1] NCCL INFO Channel 00/0 : 45[46000] -> 44[7000] via P2P/IPC +gpub051:3424419:3424553 [1] NCCL INFO Channel 01/0 : 45[46000] -> 44[7000] via P2P/IPC +gpub051:3424419:3424553 [1] NCCL INFO Connected all trees +gpub051:3424419:3424553 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub051:3424419:3424553 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub051:3424419:3424553 [1] NCCL INFO comm 0xb60902d0 rank 45 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub051:3424418:3424418 [0] NCCL INFO cudaDriverVersion 12010 +gpub051:3424418:3424418 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0> +gpub051:3424418:3424418 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub051:3424418:3424555 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0> +gpub051:3424418:3424555 [0] NCCL INFO Using network IB +gpub051:3424418:3424555 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub051:3424418:3424555 [0] NCCL INFO Trees [0] 45/-1/-1->44->40 [1] 45/36/-1->44->29 +gpub051:3424418:3424555 [0] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0 +gpub051:3424418:3424555 [0] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0 +gpub051:3424418:3424555 [0] NCCL INFO Channel 00/0 : 44[7000] -> 45[46000] via P2P/IPC +gpub051:3424418:3424555 [0] NCCL INFO Channel 01/0 : 44[7000] -> 45[46000] via P2P/IPC +gpub051:3424418:3424555 [0] NCCL INFO Connected all rings +gpub051:3424418:3424555 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [receive] via NET/IB/0 +gpub051:3424418:3424555 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [receive] via NET/IB/0 +gpub051:3424418:3424555 [0] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [receive] via NET/IB/0 +gpub051:3424418:3424555 [0] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [send] via NET/IB/0 +gpub051:3424418:3424555 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [send] via NET/IB/0 +gpub051:3424418:3424555 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [send] via NET/IB/0 +gpub051:3424418:3424555 [0] NCCL INFO Connected all trees +gpub051:3424418:3424555 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub051:3424418:3424555 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub051:3424418:3424555 [0] NCCL INFO comm 0x8ebe3540 rank 44 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub051:3424420:3424420 [2] NCCL INFO cudaDriverVersion 12010 +gpub051:3424420:3424420 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0> +gpub051:3424420:3424420 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub051:3424420:3424554 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0> +gpub051:3424420:3424554 [2] NCCL INFO Using network IB +gpub051:3424420:3424554 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub051:3424420:3424554 [2] NCCL INFO Trees [0] 47/-1/-1->46->45 [1] 47/-1/-1->46->45 +gpub051:3424420:3424554 [2] NCCL INFO Channel 00/0 : 46[85000] -> 47[c7000] via P2P/IPC +gpub051:3424420:3424554 [2] NCCL INFO Channel 01/0 : 46[85000] -> 47[c7000] via P2P/IPC +gpub051:3424420:3424554 [2] NCCL INFO Connected all rings +gpub051:3424420:3424554 [2] NCCL INFO Channel 00/0 : 46[85000] -> 45[46000] via P2P/IPC +gpub051:3424420:3424554 [2] NCCL INFO Channel 01/0 : 46[85000] -> 45[46000] via P2P/IPC +gpub051:3424420:3424554 [2] NCCL INFO Connected all trees +gpub051:3424420:3424554 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub051:3424420:3424554 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub051:3424420:3424554 [2] NCCL INFO comm 0x4f403790 rank 46 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub027:4034780:4034780 [1] NCCL INFO cudaDriverVersion 12010 +gpub027:4034780:4034780 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0> +gpub027:4034780:4034780 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub027:4034780:4034863 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0> +gpub027:4034780:4034863 [1] NCCL INFO Using network IB +gpub027:4034780:4034863 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub027:4034780:4034863 [1] NCCL INFO Trees [0] 30/-1/-1->29->28 [1] 30/44/-1->29->28 +gpub027:4034780:4034863 [1] NCCL INFO Channel 00/0 : 29[46000] -> 30[85000] via P2P/IPC +gpub027:4034780:4034863 [1] NCCL INFO Channel 01/0 : 29[46000] -> 30[85000] via P2P/IPC +gpub027:4034780:4034863 [1] NCCL INFO Connected all rings +gpub027:4034780:4034863 [1] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [send] via NET/IB/0 +gpub027:4034780:4034863 [1] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [receive] via NET/IB/0 +gpub027:4034780:4034863 [1] NCCL INFO Channel 00/0 : 29[46000] -> 28[7000] via P2P/IPC +gpub027:4034780:4034863 [1] NCCL INFO Channel 01/0 : 29[46000] -> 28[7000] via P2P/IPC +gpub027:4034780:4034863 [1] NCCL INFO Connected all trees +gpub027:4034780:4034863 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub027:4034780:4034863 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub027:4034780:4034863 [1] NCCL INFO comm 0x9afc8490 rank 29 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub027:4034779:4034779 [0] NCCL INFO cudaDriverVersion 12010 +gpub027:4034779:4034779 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0> +gpub027:4034779:4034779 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub027:4034779:4034862 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0> +gpub027:4034779:4034862 [0] NCCL INFO Using network IB +gpub027:4034779:4034862 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub027:4034779:4034862 [0] NCCL INFO Trees [0] 29/-1/-1->28->24 [1] 29/12/-1->28->60 +gpub027:4034779:4034862 [0] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0 +gpub027:4034779:4034862 [0] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0 +gpub027:4034779:4034862 [0] NCCL INFO Channel 00/0 : 28[7000] -> 29[46000] via P2P/IPC +gpub027:4034779:4034862 [0] NCCL INFO Channel 01/0 : 28[7000] -> 29[46000] via P2P/IPC +gpub027:4034779:4034862 [0] NCCL INFO Connected all rings +gpub027:4034779:4034862 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [receive] via NET/IB/0 +gpub027:4034779:4034862 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [receive] via NET/IB/0 +gpub027:4034779:4034862 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [receive] via NET/IB/0 +gpub027:4034779:4034862 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [send] via NET/IB/0 +gpub027:4034779:4034862 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [send] via NET/IB/0 +gpub027:4034779:4034862 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [send] via NET/IB/0 +gpub027:4034779:4034862 [0] NCCL INFO Connected all trees +gpub027:4034779:4034862 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub027:4034779:4034862 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub027:4034779:4034862 [0] NCCL INFO comm 0xb5e6b1f0 rank 28 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub027:4034782:4034782 [3] NCCL INFO cudaDriverVersion 12010 +gpub027:4034782:4034782 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0> +gpub027:4034782:4034782 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub027:4034782:4034861 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0> +gpub027:4034782:4034861 [3] NCCL INFO Using network IB +gpub027:4034782:4034861 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub027:4034782:4034861 [3] NCCL INFO Trees [0] -1/-1/-1->31->30 [1] -1/-1/-1->31->30 +gpub027:4034782:4034861 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0 +gpub027:4034782:4034861 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0 +gpub027:4034782:4034861 [3] NCCL INFO Connected all rings +gpub027:4034782:4034861 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 30[85000] via P2P/IPC +gpub027:4034782:4034861 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 30[85000] via P2P/IPC +gpub027:4034782:4034861 [3] NCCL INFO Connected all trees +gpub027:4034782:4034861 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub027:4034782:4034861 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub027:4034782:4034861 [3] NCCL INFO comm 0x4f996350 rank 31 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub027:4034781:4034781 [2] NCCL INFO cudaDriverVersion 12010 +gpub027:4034781:4034781 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0> +gpub027:4034781:4034781 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub027:4034781:4034864 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0> +gpub027:4034781:4034864 [2] NCCL INFO Using network IB +gpub027:4034781:4034864 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub027:4034781:4034864 [2] NCCL INFO Trees [0] 31/-1/-1->30->29 [1] 31/-1/-1->30->29 +gpub027:4034781:4034864 [2] NCCL INFO Channel 00/0 : 30[85000] -> 31[c7000] via P2P/IPC +gpub027:4034781:4034864 [2] NCCL INFO Channel 01/0 : 30[85000] -> 31[c7000] via P2P/IPC +gpub027:4034781:4034864 [2] NCCL INFO Connected all rings +gpub027:4034781:4034864 [2] NCCL INFO Channel 00/0 : 30[85000] -> 29[46000] via P2P/IPC +gpub027:4034781:4034864 [2] NCCL INFO Channel 01/0 : 30[85000] -> 29[46000] via P2P/IPC +gpub027:4034781:4034864 [2] NCCL INFO Connected all trees +gpub027:4034781:4034864 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub027:4034781:4034864 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub027:4034781:4034864 [2] NCCL INFO comm 0x8d940630 rank 30 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub077:2521721:2521721 [3] NCCL INFO cudaDriverVersion 12010 +gpub077:2521721:2521721 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.177<0> +gpub077:2521721:2521721 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub077:2521721:2521790 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.177<0> +gpub077:2521721:2521790 [3] NCCL INFO Using network IB +gpub077:2521721:2521790 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub077:2521721:2521790 [3] NCCL INFO Trees [0] -1/-1/-1->55->54 [1] -1/-1/-1->55->54 +gpub077:2521721:2521790 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0 +gpub077:2521721:2521790 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0 +gpub077:2521721:2521790 [3] NCCL INFO Connected all rings +gpub077:2521721:2521790 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 54[85000] via P2P/IPC +gpub077:2521721:2521790 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 54[85000] via P2P/IPC +gpub077:2521721:2521790 [3] NCCL INFO Connected all trees +gpub077:2521721:2521790 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub077:2521721:2521790 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub077:2521721:2521790 [3] NCCL INFO comm 0x500bb780 rank 55 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub079:3396495:3396495 [2] NCCL INFO cudaDriverVersion 12010 +gpub079:3396495:3396495 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.179<0> +gpub079:3396495:3396495 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub079:3396495:3396576 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.179<0> +gpub079:3396495:3396576 [2] NCCL INFO Using network IB +gpub079:3396495:3396576 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub079:3396495:3396576 [2] NCCL INFO Trees [0] 63/-1/-1->62->61 [1] 63/-1/-1->62->61 +gpub079:3396495:3396576 [2] NCCL INFO Channel 00/0 : 62[85000] -> 63[c7000] via P2P/IPC +gpub079:3396495:3396576 [2] NCCL INFO Channel 01/0 : 62[85000] -> 63[c7000] via P2P/IPC +gpub079:3396495:3396576 [2] NCCL INFO Connected all rings +gpub079:3396495:3396576 [2] NCCL INFO Channel 00/0 : 62[85000] -> 61[46000] via P2P/IPC +gpub079:3396495:3396576 [2] NCCL INFO Channel 01/0 : 62[85000] -> 61[46000] via P2P/IPC +gpub079:3396495:3396576 [2] NCCL INFO Connected all trees +gpub079:3396495:3396576 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub079:3396495:3396576 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub079:3396495:3396576 [2] NCCL INFO comm 0x8e939be0 rank 62 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub006:1859755:1859755 [3] NCCL INFO cudaDriverVersion 12010 +gpub006:1859755:1859755 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.106<0> +gpub006:1859755:1859755 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub006:1859755:1859833 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.106<0> +gpub006:1859755:1859833 [3] NCCL INFO Using network IB +gpub006:1859755:1859833 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub006:1859755:1859833 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10 +gpub006:1859755:1859833 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 +gpub006:1859755:1859833 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 +gpub006:1859755:1859833 [3] NCCL INFO Connected all rings +gpub006:1859755:1859833 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub006:1859755:1859833 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub006:1859755:1859833 [3] NCCL INFO Connected all trees +gpub006:1859755:1859833 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub006:1859755:1859833 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub006:1859755:1859833 [3] NCCL INFO comm 0x50847890 rank 11 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub049:277663:277663 [1] NCCL INFO cudaDriverVersion 12010 +gpub049:277663:277663 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.149<0> +gpub049:277663:277663 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub049:277663:277744 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.149<0> +gpub049:277663:277744 [1] NCCL INFO Using network IB +gpub049:277663:277744 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub049:277663:277744 [1] NCCL INFO Trees [0] 38/-1/-1->37->36 [1] 38/40/-1->37->36 +gpub049:277663:277744 [1] NCCL INFO Channel 00/0 : 37[46000] -> 38[85000] via P2P/IPC +gpub049:277663:277744 [1] NCCL INFO Channel 01/0 : 37[46000] -> 38[85000] via P2P/IPC +gpub049:277663:277744 [1] NCCL INFO Connected all rings +gpub049:277663:277744 [1] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [send] via NET/IB/0 +gpub049:277663:277744 [1] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [receive] via NET/IB/0 +gpub049:277663:277744 [1] NCCL INFO Channel 00/0 : 37[46000] -> 36[7000] via P2P/IPC +gpub049:277663:277744 [1] NCCL INFO Channel 01/0 : 37[46000] -> 36[7000] via P2P/IPC +gpub049:277663:277744 [1] NCCL INFO Connected all trees +gpub049:277663:277744 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub049:277663:277744 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub049:277663:277744 [1] NCCL INFO comm 0xb77e62d0 rank 37 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub079:3396496:3396496 [3] NCCL INFO cudaDriverVersion 12010 +gpub079:3396496:3396496 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.179<0> +gpub079:3396496:3396496 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub079:3396496:3396574 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.179<0> +gpub079:3396496:3396574 [3] NCCL INFO Using network IB +gpub079:3396496:3396574 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub079:3396496:3396574 [3] NCCL INFO Trees [0] -1/-1/-1->63->62 [1] -1/-1/-1->63->62 +gpub079:3396496:3396574 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0 +gpub079:3396496:3396574 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0 +gpub079:3396496:3396574 [3] NCCL INFO Connected all rings +gpub079:3396496:3396574 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 62[85000] via P2P/IPC +gpub079:3396496:3396574 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 62[85000] via P2P/IPC +gpub079:3396496:3396574 [3] NCCL INFO Connected all trees +gpub079:3396496:3396574 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub079:3396496:3396574 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub079:3396496:3396574 [3] NCCL INFO comm 0x51317510 rank 63 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub074:4055358:4055358 [2] NCCL INFO cudaDriverVersion 12010 +gpub074:4055358:4055358 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0> +gpub074:4055358:4055358 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub074:4055358:4055429 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0> +gpub074:4055358:4055429 [2] NCCL INFO Using network IB +gpub074:4055358:4055429 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub074:4055358:4055429 [2] NCCL INFO Trees [0] 51/-1/-1->50->49 [1] 51/-1/-1->50->49 +gpub074:4055358:4055429 [2] NCCL INFO Channel 00/0 : 50[85000] -> 51[c7000] via P2P/IPC +gpub074:4055358:4055429 [2] NCCL INFO Channel 01/0 : 50[85000] -> 51[c7000] via P2P/IPC +gpub074:4055358:4055429 [2] NCCL INFO Connected all rings +gpub074:4055358:4055429 [2] NCCL INFO Channel 00/0 : 50[85000] -> 49[46000] via P2P/IPC +gpub074:4055358:4055429 [2] NCCL INFO Channel 01/0 : 50[85000] -> 49[46000] via P2P/IPC +gpub049:277665:277665 [3] NCCL INFO cudaDriverVersion 12010 +gpub049:277665:277665 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.149<0> +gpub049:277665:277665 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub049:277665:277743 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.149<0> +gpub049:277665:277743 [3] NCCL INFO Using network IB +gpub049:277665:277743 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub049:277665:277743 [3] NCCL INFO Trees [0] -1/-1/-1->39->38 [1] -1/-1/-1->39->38 +gpub049:277665:277743 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0 +gpub049:277665:277743 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0 +gpub049:277665:277743 [3] NCCL INFO Connected all rings +gpub049:277665:277743 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 38[85000] via P2P/IPC +gpub049:277665:277743 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 38[85000] via P2P/IPC +gpub074:4055358:4055429 [2] NCCL INFO Connected all trees +gpub074:4055358:4055429 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub074:4055358:4055429 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub074:4055358:4055429 [2] NCCL INFO comm 0x5076e6a0 rank 50 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub049:277665:277743 [3] NCCL INFO Connected all trees +gpub049:277665:277743 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub049:277665:277743 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub049:277665:277743 [3] NCCL INFO comm 0x9d1a0c70 rank 39 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub049:277662:277662 [0] NCCL INFO cudaDriverVersion 12010 +gpub049:277662:277662 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.149<0> +gpub049:277662:277662 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub049:277662:277745 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.149<0> +gpub049:277662:277745 [0] NCCL INFO Using network IB +gpub049:277662:277745 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub049:277662:277745 [0] NCCL INFO Trees [0] 37/-1/-1->36->41 [1] 37/32/-1->36->44 +gpub049:277662:277745 [0] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0 +gpub049:277662:277745 [0] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0 +gpub049:277662:277745 [0] NCCL INFO Channel 00/0 : 36[7000] -> 37[46000] via P2P/IPC +gpub049:277662:277745 [0] NCCL INFO Channel 01/0 : 36[7000] -> 37[46000] via P2P/IPC +gpub049:277662:277745 [0] NCCL INFO Connected all rings +gpub049:277662:277745 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [receive] via NET/IB/0 +gpub049:277662:277745 [0] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [send] via NET/IB/0 +gpub049:277662:277745 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [send] via NET/IB/0 +gpub049:277662:277745 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [receive] via NET/IB/0 +gpub049:277662:277745 [0] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [receive] via NET/IB/0 +gpub049:277662:277745 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [send] via NET/IB/0 +gpub049:277662:277745 [0] NCCL INFO Connected all trees +gpub049:277662:277745 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub049:277662:277745 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub049:277662:277745 [0] NCCL INFO comm 0x50033560 rank 36 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub074:4055359:4055359 [3] NCCL INFO cudaDriverVersion 12010 +gpub074:4055359:4055359 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0> +gpub074:4055359:4055359 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub074:4055359:4055428 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0> +gpub074:4055359:4055428 [3] NCCL INFO Using network IB +gpub074:4055359:4055428 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub074:4055359:4055428 [3] NCCL INFO Trees [0] -1/-1/-1->51->50 [1] -1/-1/-1->51->50 +gpub074:4055359:4055428 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0 +gpub074:4055359:4055428 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0 +gpub074:4055359:4055428 [3] NCCL INFO Connected all rings +gpub074:4055359:4055428 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 50[85000] via P2P/IPC +gpub074:4055359:4055428 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 50[85000] via P2P/IPC +gpub074:4055359:4055428 [3] NCCL INFO Connected all trees +gpub074:4055359:4055428 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub074:4055359:4055428 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub074:4055359:4055428 [3] NCCL INFO comm 0xb59ce3d0 rank 51 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub077:2521719:2521719 [1] NCCL INFO cudaDriverVersion 12010 +gpub077:2521719:2521719 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.177<0> +gpub077:2521719:2521719 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub077:2521719:2521792 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.177<0> +gpub077:2521719:2521792 [1] NCCL INFO Using network IB +gpub077:2521719:2521792 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub077:2521719:2521792 [1] NCCL INFO Trees [0] 54/-1/-1->53->52 [1] 54/56/-1->53->52 +gpub077:2521719:2521792 [1] NCCL INFO Channel 00/0 : 53[46000] -> 54[85000] via P2P/IPC +gpub077:2521719:2521792 [1] NCCL INFO Channel 01/0 : 53[46000] -> 54[85000] via P2P/IPC +gpub077:2521719:2521792 [1] NCCL INFO Connected all rings +gpub077:2521719:2521792 [1] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [send] via NET/IB/0 +gpub077:2521719:2521792 [1] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [receive] via NET/IB/0 +gpub077:2521719:2521792 [1] NCCL INFO Channel 00/0 : 53[46000] -> 52[7000] via P2P/IPC +gpub077:2521719:2521792 [1] NCCL INFO Channel 01/0 : 53[46000] -> 52[7000] via P2P/IPC +gpub077:2521719:2521792 [1] NCCL INFO Connected all trees +gpub077:2521719:2521792 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub077:2521719:2521792 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub077:2521719:2521792 [1] NCCL INFO comm 0xb802530 rank 53 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub079:3396493:3396493 [0] NCCL INFO cudaDriverVersion 12010 +gpub079:3396493:3396493 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.179<0> +gpub079:3396493:3396493 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub079:3396493:3396573 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.179<0> +gpub079:3396493:3396573 [0] NCCL INFO Using network IB +gpub079:3396493:3396573 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub079:3396493:3396573 [0] NCCL INFO Trees [0] 61/-1/-1->60->56 [1] 61/28/-1->60->-1 +gpub079:3396493:3396573 [0] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0 +gpub079:3396493:3396573 [0] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0 +gpub079:3396493:3396573 [0] NCCL INFO Channel 00/0 : 60[7000] -> 61[46000] via P2P/IPC +gpub079:3396493:3396573 [0] NCCL INFO Channel 01/0 : 60[7000] -> 61[46000] via P2P/IPC +gpub079:3396493:3396573 [0] NCCL INFO Connected all rings +gpub079:3396493:3396573 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [receive] via NET/IB/0 +gpub079:3396493:3396573 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [receive] via NET/IB/0 +gpub079:3396493:3396573 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [send] via NET/IB/0 +gpub079:3396493:3396573 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [send] via NET/IB/0 +gpub079:3396493:3396573 [0] NCCL INFO Connected all trees +gpub079:3396493:3396573 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub079:3396493:3396573 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub079:3396493:3396573 [0] NCCL INFO comm 0x4f9d83d0 rank 60 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub026:2781297:2781297 [1] NCCL INFO cudaDriverVersion 12010 +gpub026:2781297:2781297 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.126<0> +gpub026:2781297:2781297 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub026:2781297:2781382 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.126<0> +gpub026:2781297:2781382 [1] NCCL INFO Using network IB +gpub026:2781297:2781382 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub026:2781297:2781382 [1] NCCL INFO Trees [0] 26/20/-1->25->24 [1] 26/-1/-1->25->24 +gpub026:2781297:2781382 [1] NCCL INFO Channel 00/0 : 25[46000] -> 26[85000] via P2P/IPC +gpub026:2781297:2781382 [1] NCCL INFO Channel 01/0 : 25[46000] -> 26[85000] via P2P/IPC +gpub026:2781297:2781382 [1] NCCL INFO Connected all rings +gpub026:2781297:2781382 [1] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [receive] via NET/IB/0 +gpub026:2781297:2781382 [1] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [send] via NET/IB/0 +gpub026:2781297:2781382 [1] NCCL INFO Channel 00/0 : 25[46000] -> 24[7000] via P2P/IPC +gpub026:2781297:2781382 [1] NCCL INFO Channel 01/0 : 25[46000] -> 24[7000] via P2P/IPC +gpub026:2781297:2781382 [1] NCCL INFO Connected all trees +gpub026:2781297:2781382 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub026:2781297:2781382 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub026:2781297:2781382 [1] NCCL INFO comm 0x50ca5540 rank 25 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub006:1859754:1859754 [2] NCCL INFO cudaDriverVersion 12010 +gpub006:1859754:1859754 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.106<0> +gpub006:1859754:1859754 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub006:1859754:1859834 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.106<0> +gpub006:1859754:1859834 [2] NCCL INFO Using network IB +gpub006:1859754:1859834 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub006:1859754:1859834 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9 +gpub006:1859754:1859834 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub006:1859754:1859834 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub006:1859754:1859834 [2] NCCL INFO Connected all rings +gpub006:1859754:1859834 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub006:1859754:1859834 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub006:1859754:1859834 [2] NCCL INFO Connected all trees +gpub006:1859754:1859834 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub006:1859754:1859834 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub006:1859754:1859834 [2] NCCL INFO comm 0xa42aef0 rank 10 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub074:4055357:4055357 [1] NCCL INFO cudaDriverVersion 12010 +gpub074:4055357:4055357 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0> +gpub074:4055357:4055357 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub074:4055357:4055427 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0> +gpub074:4055357:4055427 [1] NCCL INFO Using network IB +gpub074:4055357:4055427 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub074:4055357:4055427 [1] NCCL INFO Trees [0] 50/40/-1->49->48 [1] 50/-1/-1->49->48 +gpub074:4055357:4055427 [1] NCCL INFO Channel 00/0 : 49[46000] -> 50[85000] via P2P/IPC +gpub074:4055357:4055427 [1] NCCL INFO Channel 01/0 : 49[46000] -> 50[85000] via P2P/IPC +gpub074:4055357:4055427 [1] NCCL INFO Connected all rings +gpub074:4055357:4055427 [1] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [receive] via NET/IB/0 +gpub074:4055357:4055427 [1] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [send] via NET/IB/0 +gpub074:4055357:4055427 [1] NCCL INFO Channel 00/0 : 49[46000] -> 48[7000] via P2P/IPC +gpub074:4055357:4055427 [1] NCCL INFO Channel 01/0 : 49[46000] -> 48[7000] via P2P/IPC +gpub074:4055357:4055427 [1] NCCL INFO Connected all trees +gpub074:4055357:4055427 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub074:4055357:4055427 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub074:4055357:4055427 [1] NCCL INFO comm 0x8b30550 rank 49 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub079:3396494:3396494 [1] NCCL INFO cudaDriverVersion 12010 +gpub079:3396494:3396494 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.179<0> +gpub079:3396494:3396494 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub079:3396494:3396575 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.179<0> +gpub079:3396494:3396575 [1] NCCL INFO Using network IB +gpub079:3396494:3396575 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub079:3396494:3396575 [1] NCCL INFO Trees [0] 62/-1/-1->61->60 [1] 62/-1/-1->61->60 +gpub079:3396494:3396575 [1] NCCL INFO Channel 00/0 : 61[46000] -> 62[85000] via P2P/IPC +gpub079:3396494:3396575 [1] NCCL INFO Channel 01/0 : 61[46000] -> 62[85000] via P2P/IPC +gpub079:3396494:3396575 [1] NCCL INFO Connected all rings +gpub079:3396494:3396575 [1] NCCL INFO Channel 00/0 : 61[46000] -> 60[7000] via P2P/IPC +gpub079:3396494:3396575 [1] NCCL INFO Channel 01/0 : 61[46000] -> 60[7000] via P2P/IPC +gpub079:3396494:3396575 [1] NCCL INFO Connected all trees +gpub079:3396494:3396575 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub079:3396494:3396575 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub079:3396494:3396575 [1] NCCL INFO comm 0xc163a90 rank 61 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub026:2781299:2781299 [3] NCCL INFO cudaDriverVersion 12010 +gpub026:2781299:2781299 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.126<0> +gpub026:2781299:2781299 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub026:2781299:2781379 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.126<0> +gpub026:2781299:2781379 [3] NCCL INFO Using network IB +gpub026:2781299:2781379 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub026:2781299:2781379 [3] NCCL INFO Trees [0] -1/-1/-1->27->26 [1] -1/-1/-1->27->26 +gpub026:2781299:2781379 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0 +gpub026:2781299:2781379 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0 +gpub026:2781299:2781379 [3] NCCL INFO Connected all rings +gpub026:2781299:2781379 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 26[85000] via P2P/IPC +gpub026:2781299:2781379 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 26[85000] via P2P/IPC +gpub026:2781299:2781379 [3] NCCL INFO Connected all trees +gpub026:2781299:2781379 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub026:2781299:2781379 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub026:2781299:2781379 [3] NCCL INFO comm 0x507c61a0 rank 27 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub077:2521720:2521720 [2] NCCL INFO cudaDriverVersion 12010 +gpub077:2521720:2521720 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.177<0> +gpub077:2521720:2521720 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub077:2521720:2521791 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.177<0> +gpub077:2521720:2521791 [2] NCCL INFO Using network IB +gpub077:2521720:2521791 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub077:2521720:2521791 [2] NCCL INFO Trees [0] 55/-1/-1->54->53 [1] 55/-1/-1->54->53 +gpub077:2521720:2521791 [2] NCCL INFO Channel 00/0 : 54[85000] -> 55[c7000] via P2P/IPC +gpub077:2521720:2521791 [2] NCCL INFO Channel 01/0 : 54[85000] -> 55[c7000] via P2P/IPC +gpub077:2521720:2521791 [2] NCCL INFO Connected all rings +gpub077:2521720:2521791 [2] NCCL INFO Channel 00/0 : 54[85000] -> 53[46000] via P2P/IPC +gpub077:2521720:2521791 [2] NCCL INFO Channel 01/0 : 54[85000] -> 53[46000] via P2P/IPC +gpub077:2521720:2521791 [2] NCCL INFO Connected all trees +gpub077:2521720:2521791 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub077:2521720:2521791 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub077:2521720:2521791 [2] NCCL INFO comm 0xa4c559d0 rank 54 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub006:1859752:1859752 [0] NCCL INFO cudaDriverVersion 12010 +gpub006:1859752:1859752 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.106<0> +gpub006:1859752:1859752 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub006:1859752:1859836 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.106<0> +gpub006:1859752:1859836 [0] NCCL INFO Using network IB +gpub006:1859752:1859836 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub006:1859752:1859836 [0] NCCL INFO Trees [0] 9/12/-1->8->17 [1] 9/-1/-1->8->5 +gpub006:1859752:1859836 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 +gpub006:1859752:1859836 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 +gpub006:1859752:1859836 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub006:1859752:1859836 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub006:1859752:1859836 [0] NCCL INFO Connected all rings +gpub006:1859752:1859836 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/IB/0 +gpub006:1859752:1859836 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/IB/0 +gpub006:1859752:1859836 [0] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [send] via NET/IB/0 +gpub006:1859752:1859836 [0] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [receive] via NET/IB/0 +gpub006:1859752:1859836 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/IB/0 +gpub006:1859752:1859836 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/IB/0 +gpub006:1859752:1859836 [0] NCCL INFO Connected all trees +gpub006:1859752:1859836 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub006:1859752:1859836 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub006:1859752:1859836 [0] NCCL INFO comm 0x50278a40 rank 8 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub008:2990364:2990364 [0] NCCL INFO cudaDriverVersion 12010 +gpub008:2990364:2990364 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0> +gpub008:2990364:2990364 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub008:2990364:2990440 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.108<0> +gpub008:2990364:2990440 [0] NCCL INFO Using network IB +gpub008:2990364:2990440 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub008:2990364:2990440 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->28 +gpub008:2990364:2990440 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 +gpub008:2990364:2990440 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 +gpub008:2990364:2990440 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub008:2990364:2990440 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub008:2990364:2990440 [0] NCCL INFO Connected all rings +gpub008:2990364:2990440 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/IB/0 +gpub008:2990364:2990440 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/IB/0 +gpub008:2990364:2990440 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [send] via NET/IB/0 +gpub008:2990364:2990440 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [receive] via NET/IB/0 +gpub008:2990364:2990440 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/IB/0 +gpub008:2990364:2990440 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/IB/0 +gpub008:2990364:2990440 [0] NCCL INFO Connected all trees +gpub008:2990364:2990440 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub008:2990364:2990440 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub008:2990364:2990440 [0] NCCL INFO comm 0xa229210 rank 12 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub077:2521718:2521718 [0] NCCL INFO cudaDriverVersion 12010 +gpub077:2521718:2521718 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.177<0> +gpub077:2521718:2521718 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub077:2521718:2521789 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.177<0> +gpub077:2521718:2521789 [0] NCCL INFO Using network IB +gpub077:2521718:2521789 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub077:2521718:2521789 [0] NCCL INFO Trees [0] 53/-1/-1->52->57 [1] 53/48/-1->52->45 +gpub077:2521718:2521789 [0] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0 +gpub077:2521718:2521789 [0] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0 +gpub077:2521718:2521789 [0] NCCL INFO Channel 00/0 : 52[7000] -> 53[46000] via P2P/IPC +gpub077:2521718:2521789 [0] NCCL INFO Channel 01/0 : 52[7000] -> 53[46000] via P2P/IPC +gpub077:2521718:2521789 [0] NCCL INFO Connected all rings +gpub077:2521718:2521789 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [receive] via NET/IB/0 +gpub077:2521718:2521789 [0] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [send] via NET/IB/0 +gpub077:2521718:2521789 [0] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [receive] via NET/IB/0 +gpub077:2521718:2521789 [0] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [send] via NET/IB/0 +gpub077:2521718:2521789 [0] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [receive] via NET/IB/0 +gpub077:2521718:2521789 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [send] via NET/IB/0 +gpub077:2521718:2521789 [0] NCCL INFO Connected all trees +gpub077:2521718:2521789 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub077:2521718:2521789 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub077:2521718:2521789 [0] NCCL INFO comm 0x5162ad90 rank 52 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub008:2990366:2990366 [2] NCCL INFO cudaDriverVersion 12010 +gpub008:2990366:2990366 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0> +gpub008:2990366:2990366 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub008:2990366:2990443 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.108<0> +gpub008:2990366:2990443 [2] NCCL INFO Using network IB +gpub008:2990366:2990443 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub008:2990366:2990443 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13 +gpub008:2990366:2990443 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub008:2990366:2990443 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub008:2990366:2990443 [2] NCCL INFO Connected all rings +gpub008:2990366:2990443 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub008:2990366:2990443 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub008:2990366:2990443 [2] NCCL INFO Connected all trees +gpub008:2990366:2990443 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub008:2990366:2990443 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub008:2990366:2990443 [2] NCCL INFO comm 0xb6d2c880 rank 14 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub074:4055356:4055356 [0] NCCL INFO cudaDriverVersion 12010 +gpub074:4055356:4055356 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0> +gpub074:4055356:4055356 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub074:4055356:4055430 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0> +gpub074:4055356:4055430 [0] NCCL INFO Using network IB +gpub074:4055356:4055430 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub074:4055356:4055430 [0] NCCL INFO Trees [0] 49/56/-1->48->32 [1] 49/-1/-1->48->52 +gpub074:4055356:4055430 [0] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0 +gpub074:4055356:4055430 [0] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0 +gpub074:4055356:4055430 [0] NCCL INFO Channel 00/0 : 48[7000] -> 49[46000] via P2P/IPC +gpub074:4055356:4055430 [0] NCCL INFO Channel 01/0 : 48[7000] -> 49[46000] via P2P/IPC +gpub074:4055356:4055430 [0] NCCL INFO Connected all rings +gpub074:4055356:4055430 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [send] via NET/IB/0 +gpub074:4055356:4055430 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [send] via NET/IB/0 +gpub074:4055356:4055430 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [receive] via NET/IB/0 +gpub074:4055356:4055430 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [send] via NET/IB/0 +gpub074:4055356:4055430 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [receive] via NET/IB/0 +gpub074:4055356:4055430 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [receive] via NET/IB/0 +gpub074:4055356:4055430 [0] NCCL INFO Connected all trees +gpub074:4055356:4055430 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub074:4055356:4055430 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub074:4055356:4055430 [0] NCCL INFO comm 0x9c0ae50 rank 48 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub006:1859753:1859753 [1] NCCL INFO cudaDriverVersion 12010 +gpub006:1859753:1859753 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.106<0> +gpub006:1859753:1859753 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub006:1859753:1859835 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.106<0> +gpub006:1859753:1859835 [1] NCCL INFO Using network IB +gpub006:1859753:1859835 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub006:1859753:1859835 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8 +gpub006:1859753:1859835 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub006:1859753:1859835 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub006:1859753:1859835 [1] NCCL INFO Connected all rings +gpub006:1859753:1859835 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/IB/0 +gpub006:1859753:1859835 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/IB/0 +gpub006:1859753:1859835 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub006:1859753:1859835 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub006:1859753:1859835 [1] NCCL INFO Connected all trees +gpub006:1859753:1859835 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub006:1859753:1859835 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub006:1859753:1859835 [1] NCCL INFO comm 0xa3eb5f0 rank 9 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub001:1052799:1052799 [1] NCCL INFO cudaDriverVersion 12010 +gpub001:1052799:1052799 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.101<0> +gpub001:1052799:1052799 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub001:1052799:1052880 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.101<0> +gpub001:1052799:1052880 [1] NCCL INFO Using network IB +gpub001:1052799:1052880 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub001:1052799:1052880 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 +gpub001:1052799:1052880 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub001:1052799:1052880 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub001:1052799:1052880 [1] NCCL INFO Connected all rings +gpub001:1052799:1052880 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub001:1052799:1052880 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub001:1052799:1052880 [1] NCCL INFO Connected all trees +gpub001:1052799:1052880 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub001:1052799:1052880 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub001:1052799:1052880 [1] NCCL INFO comm 0x50befe70 rank 1 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub001:1052801:1052801 [3] NCCL INFO cudaDriverVersion 12010 +gpub001:1052801:1052801 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.101<0> +gpub001:1052801:1052801 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub001:1052801:1052879 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.101<0> +gpub001:1052801:1052879 [3] NCCL INFO Using network IB +gpub001:1052801:1052879 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub001:1052801:1052879 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2 +gpub001:1052801:1052879 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 +gpub001:1052801:1052879 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 +gpub001:1052801:1052879 [3] NCCL INFO Connected all rings +gpub001:1052801:1052879 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub001:1052801:1052879 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub001:1052801:1052879 [3] NCCL INFO Connected all trees +gpub001:1052801:1052879 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub001:1052801:1052879 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub001:1052801:1052879 [3] NCCL INFO comm 0xb78dc020 rank 3 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub026:2781298:2781298 [2] NCCL INFO cudaDriverVersion 12010 +gpub026:2781298:2781298 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.126<0> +gpub026:2781298:2781298 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub026:2781298:2781380 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.126<0> +gpub026:2781298:2781380 [2] NCCL INFO Using network IB +gpub026:2781298:2781380 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub026:2781298:2781380 [2] NCCL INFO Trees [0] 27/-1/-1->26->25 [1] 27/-1/-1->26->25 +gpub026:2781298:2781380 [2] NCCL INFO Channel 00/0 : 26[85000] -> 27[c7000] via P2P/IPC +gpub026:2781298:2781380 [2] NCCL INFO Channel 01/0 : 26[85000] -> 27[c7000] via P2P/IPC +gpub026:2781298:2781380 [2] NCCL INFO Connected all rings +gpub026:2781298:2781380 [2] NCCL INFO Channel 00/0 : 26[85000] -> 25[46000] via P2P/IPC +gpub026:2781298:2781380 [2] NCCL INFO Channel 01/0 : 26[85000] -> 25[46000] via P2P/IPC +gpub049:277664:277664 [2] NCCL INFO cudaDriverVersion 12010 +gpub049:277664:277664 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.149<0> +gpub049:277664:277664 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub049:277664:277742 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.149<0> +gpub049:277664:277742 [2] NCCL INFO Using network IB +gpub049:277664:277742 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub049:277664:277742 [2] NCCL INFO Trees [0] 39/-1/-1->38->37 [1] 39/-1/-1->38->37 +gpub049:277664:277742 [2] NCCL INFO Channel 00/0 : 38[85000] -> 39[c7000] via P2P/IPC +gpub049:277664:277742 [2] NCCL INFO Channel 01/0 : 38[85000] -> 39[c7000] via P2P/IPC +gpub049:277664:277742 [2] NCCL INFO Connected all rings +gpub049:277664:277742 [2] NCCL INFO Channel 00/0 : 38[85000] -> 37[46000] via P2P/IPC +gpub049:277664:277742 [2] NCCL INFO Channel 01/0 : 38[85000] -> 37[46000] via P2P/IPC +gpub049:277664:277742 [2] NCCL INFO Connected all trees +gpub026:2781298:2781380 [2] NCCL INFO Connected all trees +gpub026:2781298:2781380 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub026:2781298:2781380 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub026:2781298:2781380 [2] NCCL INFO comm 0x8e36d550 rank 26 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub049:277664:277742 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub049:277664:277742 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub049:277664:277742 [2] NCCL INFO comm 0x92096c0 rank 38 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub008:2990367:2990367 [3] NCCL INFO cudaDriverVersion 12010 +gpub008:2990367:2990367 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0> +gpub008:2990367:2990367 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub008:2990367:2990442 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.108<0> +gpub008:2990367:2990442 [3] NCCL INFO Using network IB +gpub008:2990367:2990442 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub008:2990367:2990442 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14 +gpub008:2990367:2990442 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0 +gpub008:2990367:2990442 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0 +gpub008:2990367:2990442 [3] NCCL INFO Connected all rings +gpub008:2990367:2990442 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub008:2990367:2990442 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub008:2990367:2990442 [3] NCCL INFO Connected all trees +gpub008:2990367:2990442 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub008:2990367:2990442 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub008:2990367:2990442 [3] NCCL INFO comm 0x4fa470f0 rank 15 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub001:1052798:1052882 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.101<0> +gpub001:1052798:1052882 [0] NCCL INFO Using network IB +gpub001:1052798:1052882 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub001:1052798:1052882 [0] NCCL INFO Channel 00/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpub001:1052798:1052882 [0] NCCL INFO Channel 01/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpub001:1052798:1052882 [0] NCCL INFO Trees [0] 1/32/-1->0->-1 [1] 1/-1/-1->0->4 +gpub001:1052798:1052882 [0] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0 +gpub001:1052798:1052882 [0] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0 +gpub001:1052798:1052882 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub001:1052798:1052882 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub001:1052798:1052882 [0] NCCL INFO Connected all rings +gpub001:1052798:1052882 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/IB/0 +gpub001:1052798:1052882 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [receive] via NET/IB/0 +gpub001:1052798:1052882 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [send] via NET/IB/0 +gpub001:1052798:1052882 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/IB/0 +gpub001:1052798:1052882 [0] NCCL INFO Connected all trees +gpub001:1052798:1052882 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub001:1052798:1052882 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub001:1052798:1052882 [0] NCCL INFO comm 0x50dde690 rank 0 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub022:106667:106667 [1] NCCL INFO cudaDriverVersion 12010 +gpub022:106667:106667 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0> +gpub022:106667:106667 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub022:106667:106747 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.122<0> +gpub022:106667:106747 [1] NCCL INFO Using network IB +gpub022:106667:106747 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub022:106667:106747 [1] NCCL INFO Trees [0] 18/8/-1->17->16 [1] 18/-1/-1->17->16 +gpub022:106667:106747 [1] NCCL INFO Channel 00/0 : 17[46000] -> 18[85000] via P2P/IPC +gpub022:106667:106747 [1] NCCL INFO Channel 01/0 : 17[46000] -> 18[85000] via P2P/IPC +gpub022:106667:106747 [1] NCCL INFO Connected all rings +gpub022:106667:106747 [1] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [receive] via NET/IB/0 +gpub022:106667:106747 [1] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [send] via NET/IB/0 +gpub022:106667:106747 [1] NCCL INFO Channel 00/0 : 17[46000] -> 16[7000] via P2P/IPC +gpub022:106667:106747 [1] NCCL INFO Channel 01/0 : 17[46000] -> 16[7000] via P2P/IPC +gpub022:106667:106747 [1] NCCL INFO Connected all trees +gpub022:106667:106747 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub022:106667:106747 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub022:106667:106747 [1] NCCL INFO comm 0x8f3e3330 rank 17 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub008:2990365:2990365 [1] NCCL INFO cudaDriverVersion 12010 +gpub008:2990365:2990365 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0> +gpub008:2990365:2990365 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub008:2990365:2990441 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.108<0> +gpub008:2990365:2990441 [1] NCCL INFO Using network IB +gpub008:2990365:2990441 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub008:2990365:2990441 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/20/-1->13->12 +gpub008:2990365:2990441 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub008:2990365:2990441 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub008:2990365:2990441 [1] NCCL INFO Connected all rings +gpub008:2990365:2990441 [1] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [send] via NET/IB/0 +gpub008:2990365:2990441 [1] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [receive] via NET/IB/0 +gpub008:2990365:2990441 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub008:2990365:2990441 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub008:2990365:2990441 [1] NCCL INFO Connected all trees +gpub008:2990365:2990441 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub008:2990365:2990441 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub008:2990365:2990441 [1] NCCL INFO comm 0x98eccf0 rank 13 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub024:541987:541987 [2] NCCL INFO cudaDriverVersion 12010 +gpub024:541987:541987 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.124<0> +gpub024:541987:541987 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub024:541987:542066 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.124<0> +gpub024:541987:542066 [2] NCCL INFO Using network IB +gpub024:541987:542066 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub024:541987:542066 [2] NCCL INFO Trees [0] 23/-1/-1->22->21 [1] 23/-1/-1->22->21 +gpub024:541987:542066 [2] NCCL INFO Channel 00/0 : 22[85000] -> 23[c7000] via P2P/IPC +gpub024:541987:542066 [2] NCCL INFO Channel 01/0 : 22[85000] -> 23[c7000] via P2P/IPC +gpub024:541987:542066 [2] NCCL INFO Connected all rings +gpub024:541987:542066 [2] NCCL INFO Channel 00/0 : 22[85000] -> 21[46000] via P2P/IPC +gpub024:541987:542066 [2] NCCL INFO Channel 01/0 : 22[85000] -> 21[46000] via P2P/IPC +gpub024:541987:542066 [2] NCCL INFO Connected all trees +gpub024:541987:542066 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub024:541987:542066 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub024:541987:542066 [2] NCCL INFO comm 0x505ff970 rank 22 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub001:1052800:1052800 [2] NCCL INFO cudaDriverVersion 12010 +gpub001:1052800:1052800 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.101<0> +gpub001:1052800:1052800 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub001:1052800:1052881 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.101<0> +gpub001:1052800:1052881 [2] NCCL INFO Using network IB +gpub001:1052800:1052881 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub001:1052800:1052881 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 +gpub001:1052800:1052881 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub001:1052800:1052881 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub001:1052800:1052881 [2] NCCL INFO Connected all rings +gpub001:1052800:1052881 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub001:1052800:1052881 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub001:1052800:1052881 [2] NCCL INFO Connected all trees +gpub001:1052800:1052881 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub001:1052800:1052881 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub001:1052800:1052881 [2] NCCL INFO comm 0x8e66c510 rank 2 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub022:106669:106669 [3] NCCL INFO cudaDriverVersion 12010 +gpub022:106669:106669 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0> +gpub022:106669:106669 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub022:106669:106748 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.122<0> +gpub022:106669:106748 [3] NCCL INFO Using network IB +gpub022:106669:106748 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub022:106669:106748 [3] NCCL INFO Trees [0] -1/-1/-1->19->18 [1] -1/-1/-1->19->18 +gpub022:106669:106748 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0 +gpub022:106669:106748 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0 +gpub022:106669:106748 [3] NCCL INFO Connected all rings +gpub022:106669:106748 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 18[85000] via P2P/IPC +gpub022:106669:106748 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 18[85000] via P2P/IPC +gpub022:106669:106748 [3] NCCL INFO Connected all trees +gpub022:106669:106748 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub022:106669:106748 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub022:106669:106748 [3] NCCL INFO comm 0x4f1d7190 rank 19 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub026:2781296:2781296 [0] NCCL INFO cudaDriverVersion 12010 +gpub026:2781296:2781296 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.126<0> +gpub026:2781296:2781296 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub026:2781296:2781381 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.126<0> +gpub026:2781296:2781381 [0] NCCL INFO Using network IB +gpub026:2781296:2781381 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub026:2781296:2781381 [0] NCCL INFO Trees [0] 25/28/-1->24->16 [1] 25/-1/-1->24->21 +gpub026:2781296:2781381 [0] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0 +gpub026:2781296:2781381 [0] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0 +gpub026:2781296:2781381 [0] NCCL INFO Channel 00/0 : 24[7000] -> 25[46000] via P2P/IPC +gpub026:2781296:2781381 [0] NCCL INFO Channel 01/0 : 24[7000] -> 25[46000] via P2P/IPC +gpub026:2781296:2781381 [0] NCCL INFO Connected all rings +gpub026:2781296:2781381 [0] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [receive] via NET/IB/0 +gpub026:2781296:2781381 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [send] via NET/IB/0 +gpub026:2781296:2781381 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [receive] via NET/IB/0 +gpub026:2781296:2781381 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [send] via NET/IB/0 +gpub026:2781296:2781381 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [receive] via NET/IB/0 +gpub026:2781296:2781381 [0] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [send] via NET/IB/0 +gpub026:2781296:2781381 [0] NCCL INFO Connected all trees +gpub026:2781296:2781381 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub026:2781296:2781381 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub026:2781296:2781381 [0] NCCL INFO comm 0xaebd9cd0 rank 24 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub024:541985:541985 [0] NCCL INFO cudaDriverVersion 12010 +gpub024:541985:541985 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.124<0> +gpub024:541985:541985 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub024:541985:542068 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.124<0> +gpub024:541985:542068 [0] NCCL INFO Using network IB +gpub024:541985:542068 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub024:541985:542068 [0] NCCL INFO Trees [0] 21/-1/-1->20->25 [1] 21/16/-1->20->13 +gpub024:541985:542068 [0] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0 +gpub024:541985:542068 [0] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0 +gpub024:541985:542068 [0] NCCL INFO Channel 00/0 : 20[7000] -> 21[46000] via P2P/IPC +gpub024:541985:542068 [0] NCCL INFO Channel 01/0 : 20[7000] -> 21[46000] via P2P/IPC +gpub024:541985:542068 [0] NCCL INFO Connected all rings +gpub024:541985:542068 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [receive] via NET/IB/0 +gpub024:541985:542068 [0] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [send] via NET/IB/0 +gpub024:541985:542068 [0] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [receive] via NET/IB/0 +gpub024:541985:542068 [0] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [send] via NET/IB/0 +gpub024:541985:542068 [0] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [receive] via NET/IB/0 +gpub024:541985:542068 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [send] via NET/IB/0 +gpub024:541985:542068 [0] NCCL INFO Connected all trees +gpub024:541985:542068 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub024:541985:542068 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub024:541985:542068 [0] NCCL INFO comm 0x4ffe64c0 rank 20 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub022:106668:106668 [2] NCCL INFO cudaDriverVersion 12010 +gpub022:106668:106668 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0> +gpub022:106668:106668 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub022:106668:106749 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.122<0> +gpub022:106668:106749 [2] NCCL INFO Using network IB +gpub022:106668:106749 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub022:106668:106749 [2] NCCL INFO Trees [0] 19/-1/-1->18->17 [1] 19/-1/-1->18->17 +gpub022:106668:106749 [2] NCCL INFO Channel 00/0 : 18[85000] -> 19[c7000] via P2P/IPC +gpub022:106668:106749 [2] NCCL INFO Channel 01/0 : 18[85000] -> 19[c7000] via P2P/IPC +gpub022:106668:106749 [2] NCCL INFO Connected all rings +gpub022:106668:106749 [2] NCCL INFO Channel 00/0 : 18[85000] -> 17[46000] via P2P/IPC +gpub022:106668:106749 [2] NCCL INFO Channel 01/0 : 18[85000] -> 17[46000] via P2P/IPC +gpub022:106668:106749 [2] NCCL INFO Connected all trees +gpub022:106668:106749 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub022:106668:106749 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub022:106668:106749 [2] NCCL INFO comm 0x4fd27690 rank 18 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub024:541988:541988 [3] NCCL INFO cudaDriverVersion 12010 +gpub024:541988:541988 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.124<0> +gpub024:541988:541988 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub024:541988:542067 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.124<0> +gpub024:541988:542067 [3] NCCL INFO Using network IB +gpub024:541988:542067 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub024:541988:542067 [3] NCCL INFO Trees [0] -1/-1/-1->23->22 [1] -1/-1/-1->23->22 +gpub024:541988:542067 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0 +gpub024:541988:542067 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0 +gpub024:541988:542067 [3] NCCL INFO Connected all rings +gpub024:541988:542067 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 22[85000] via P2P/IPC +gpub024:541988:542067 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 22[85000] via P2P/IPC +gpub024:541988:542067 [3] NCCL INFO Connected all trees +gpub024:541988:542067 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub024:541988:542067 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub024:541988:542067 [3] NCCL INFO comm 0xb65fe8d0 rank 23 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub048:3933786:3933786 [3] NCCL INFO cudaDriverVersion 12010 +gpub048:3933786:3933786 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.148<0> +gpub048:3933786:3933786 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub048:3933786:3933849 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.148<0> +gpub048:3933786:3933849 [3] NCCL INFO Using network IB +gpub048:3933786:3933849 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub048:3933786:3933849 [3] NCCL INFO Trees [0] -1/-1/-1->35->34 [1] -1/-1/-1->35->34 +gpub048:3933786:3933849 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0 +gpub048:3933786:3933849 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0 +gpub048:3933786:3933849 [3] NCCL INFO Connected all rings +gpub048:3933786:3933849 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 34[85000] via P2P/IPC +gpub048:3933786:3933849 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 34[85000] via P2P/IPC +gpub048:3933786:3933849 [3] NCCL INFO Connected all trees +gpub048:3933786:3933849 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub048:3933786:3933849 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub048:3933786:3933849 [3] NCCL INFO comm 0x8e08e110 rank 35 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub048:3933785:3933785 [2] NCCL INFO cudaDriverVersion 12010 +gpub048:3933785:3933785 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.148<0> +gpub048:3933785:3933785 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub048:3933785:3933846 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.148<0> +gpub048:3933785:3933846 [2] NCCL INFO Using network IB +gpub048:3933785:3933846 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub048:3933785:3933846 [2] NCCL INFO Trees [0] 35/-1/-1->34->33 [1] 35/-1/-1->34->33 +gpub048:3933785:3933846 [2] NCCL INFO Channel 00/0 : 34[85000] -> 35[c7000] via P2P/IPC +gpub048:3933785:3933846 [2] NCCL INFO Channel 01/0 : 34[85000] -> 35[c7000] via P2P/IPC +gpub048:3933785:3933846 [2] NCCL INFO Connected all rings +gpub048:3933785:3933846 [2] NCCL INFO Channel 00/0 : 34[85000] -> 33[46000] via P2P/IPC +gpub048:3933785:3933846 [2] NCCL INFO Channel 01/0 : 34[85000] -> 33[46000] via P2P/IPC +gpub048:3933785:3933846 [2] NCCL INFO Connected all trees +gpub048:3933785:3933846 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub048:3933785:3933846 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub048:3933785:3933846 [2] NCCL INFO comm 0xb9dce190 rank 34 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub048:3933784:3933784 [1] NCCL INFO cudaDriverVersion 12010 +gpub048:3933784:3933784 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.148<0> +gpub048:3933784:3933784 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub048:3933784:3933848 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.148<0> +gpub048:3933784:3933848 [1] NCCL INFO Using network IB +gpub048:3933784:3933848 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub048:3933784:3933848 [1] NCCL INFO Trees [0] 34/16/-1->33->32 [1] 34/-1/-1->33->32 +gpub048:3933784:3933848 [1] NCCL INFO Channel 00/0 : 33[46000] -> 34[85000] via P2P/IPC +gpub048:3933784:3933848 [1] NCCL INFO Channel 01/0 : 33[46000] -> 34[85000] via P2P/IPC +gpub048:3933784:3933848 [1] NCCL INFO Connected all rings +gpub048:3933784:3933848 [1] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [receive] via NET/IB/0 +gpub048:3933784:3933848 [1] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [send] via NET/IB/0 +gpub048:3933784:3933848 [1] NCCL INFO Channel 00/0 : 33[46000] -> 32[7000] via P2P/IPC +gpub048:3933784:3933848 [1] NCCL INFO Channel 01/0 : 33[46000] -> 32[7000] via P2P/IPC +gpub048:3933784:3933848 [1] NCCL INFO Connected all trees +gpub048:3933784:3933848 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub048:3933784:3933848 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub048:3933784:3933848 [1] NCCL INFO comm 0x9d3ee1d0 rank 33 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub024:541986:541986 [1] NCCL INFO cudaDriverVersion 12010 +gpub024:541986:541986 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.124<0> +gpub024:541986:541986 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub024:541986:542065 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.124<0> +gpub024:541986:542065 [1] NCCL INFO Using network IB +gpub024:541986:542065 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub024:541986:542065 [1] NCCL INFO Trees [0] 22/-1/-1->21->20 [1] 22/24/-1->21->20 +gpub024:541986:542065 [1] NCCL INFO Channel 00/0 : 21[46000] -> 22[85000] via P2P/IPC +gpub024:541986:542065 [1] NCCL INFO Channel 01/0 : 21[46000] -> 22[85000] via P2P/IPC +gpub024:541986:542065 [1] NCCL INFO Connected all rings +gpub024:541986:542065 [1] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [send] via NET/IB/0 +gpub024:541986:542065 [1] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [receive] via NET/IB/0 +gpub024:541986:542065 [1] NCCL INFO Channel 00/0 : 21[46000] -> 20[7000] via P2P/IPC +gpub024:541986:542065 [1] NCCL INFO Channel 01/0 : 21[46000] -> 20[7000] via P2P/IPC +gpub024:541986:542065 [1] NCCL INFO Connected all trees +gpub024:541986:542065 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub024:541986:542065 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub024:541986:542065 [1] NCCL INFO comm 0x8c61ca80 rank 21 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub048:3933783:3933783 [0] NCCL INFO cudaDriverVersion 12010 +gpub048:3933783:3933783 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.148<0> +gpub048:3933783:3933783 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub048:3933783:3933847 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.148<0> +gpub048:3933783:3933847 [0] NCCL INFO Using network IB +gpub048:3933783:3933847 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub048:3933783:3933847 [0] NCCL INFO Trees [0] 33/48/-1->32->0 [1] 33/-1/-1->32->36 +gpub048:3933783:3933847 [0] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0 +gpub048:3933783:3933847 [0] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0 +gpub048:3933783:3933847 [0] NCCL INFO Channel 00/0 : 32[7000] -> 33[46000] via P2P/IPC +gpub048:3933783:3933847 [0] NCCL INFO Channel 01/0 : 32[7000] -> 33[46000] via P2P/IPC +gpub048:3933783:3933847 [0] NCCL INFO Connected all rings +gpub048:3933783:3933847 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [send] via NET/IB/0 +gpub048:3933783:3933847 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [send] via NET/IB/0 +gpub048:3933783:3933847 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [receive] via NET/IB/0 +gpub048:3933783:3933847 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [send] via NET/IB/0 +gpub048:3933783:3933847 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [receive] via NET/IB/0 +gpub048:3933783:3933847 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [receive] via NET/IB/0 +gpub048:3933783:3933847 [0] NCCL INFO Connected all trees +gpub048:3933783:3933847 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub048:3933783:3933847 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub048:3933783:3933847 [0] NCCL INFO comm 0x8d070d10 rank 32 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub022:106666:106666 [0] NCCL INFO cudaDriverVersion 12010 +gpub022:106666:106666 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0> +gpub022:106666:106666 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub022:106666:106746 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.122<0> +gpub022:106666:106746 [0] NCCL INFO Using network IB +gpub022:106666:106746 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub022:106666:106746 [0] NCCL INFO Trees [0] 17/24/-1->16->33 [1] 17/-1/-1->16->20 +gpub022:106666:106746 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0 +gpub022:106666:106746 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0 +gpub022:106666:106746 [0] NCCL INFO Channel 00/0 : 16[7000] -> 17[46000] via P2P/IPC +gpub022:106666:106746 [0] NCCL INFO Channel 01/0 : 16[7000] -> 17[46000] via P2P/IPC +gpub022:106666:106746 [0] NCCL INFO Connected all rings +gpub022:106666:106746 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [send] via NET/IB/0 +gpub022:106666:106746 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [send] via NET/IB/0 +gpub022:106666:106746 [0] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [send] via NET/IB/0 +gpub022:106666:106746 [0] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [receive] via NET/IB/0 +gpub022:106666:106746 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [receive] via NET/IB/0 +gpub022:106666:106746 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [receive] via NET/IB/0 +gpub022:106666:106746 [0] NCCL INFO Connected all trees +gpub022:106666:106746 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub022:106666:106746 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub022:106666:106746 [0] NCCL INFO comm 0x4ef16f50 rank 16 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[gpub001:0/64] 2023-07-14 13:38:31,549 (trainer:732) INFO: 49epoch:train:1-100batch: iter_time=1.254, forward_time=0.216, loss_ctc=75.424, loss_att=56.205, acc=0.707, loss=61.970, backward_time=1.042, grad_norm=126.161, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.157e-05, train_time=8.698 +[gpub001:0/64] 2023-07-14 13:40:47,428 (trainer:732) INFO: 49epoch:train:101-200batch: iter_time=1.324e-04, forward_time=0.143, loss_ctc=78.109, loss_att=58.369, acc=0.696, loss=64.291, backward_time=1.027, grad_norm=156.563, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.156e-05, train_time=2.718 +[gpub001:0/64] 2023-07-14 13:43:04,326 (trainer:732) INFO: 49epoch:train:201-300batch: iter_time=1.368e-04, forward_time=0.144, loss_ctc=71.274, loss_att=53.833, acc=0.706, loss=59.065, backward_time=1.034, grad_norm=117.395, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.156e-05, train_time=2.738 +[gpub001:0/64] 2023-07-14 13:45:19,908 (trainer:732) INFO: 49epoch:train:301-400batch: iter_time=1.152e-04, forward_time=0.140, loss_ctc=82.935, loss_att=67.130, acc=0.686, loss=71.872, backward_time=1.023, grad_norm=143.181, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.155e-05, train_time=2.711 +[gpub001:0/64] 2023-07-14 13:47:38,398 (trainer:732) INFO: 49epoch:train:401-500batch: iter_time=1.057e-04, forward_time=0.140, loss_ctc=67.558, loss_att=49.800, acc=0.725, loss=55.128, backward_time=1.028, grad_norm=137.364, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.155e-05, train_time=2.770 +[gpub001:0/64] 2023-07-14 13:49:52,942 (trainer:732) INFO: 49epoch:train:501-600batch: iter_time=1.077e-04, forward_time=0.139, loss_ctc=67.201, loss_att=46.260, acc=0.720, loss=52.542, backward_time=1.019, grad_norm=114.336, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.154e-05, train_time=2.691 +[gpub001:0/64] 2023-07-14 13:52:16,838 (trainer:732) INFO: 49epoch:train:601-700batch: iter_time=1.205e-04, forward_time=0.142, loss_ctc=70.668, loss_att=51.546, acc=0.714, loss=57.283, backward_time=1.035, grad_norm=119.406, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.154e-05, train_time=2.878 +[gpub001:0/64] 2023-07-14 13:54:35,492 (trainer:732) INFO: 49epoch:train:701-800batch: iter_time=1.298e-04, forward_time=0.142, loss_ctc=61.423, loss_att=43.823, acc=0.717, loss=49.103, backward_time=1.026, grad_norm=107.937, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.153e-05, train_time=2.773 +[gpub001:0/64] 2023-07-14 13:55:27,347 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub001:0/64] 2023-07-14 13:55:45,050 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 13:55:48,387 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 13:55:48,388 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub001:0/64] 2023-07-14 13:55:48,394 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 14:01:54,878 (trainer:732) INFO: 49epoch:train:801-900batch: iter_time=1.305, forward_time=0.165, loss_ctc=83.766, loss_att=63.502, acc=0.709, loss=69.582, backward_time=1.037, grad_norm=161.753, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.153e-05, train_time=8.787 +[gpub001:0/64] 2023-07-14 14:04:11,115 (trainer:732) INFO: 49epoch:train:901-1000batch: iter_time=1.176e-04, forward_time=0.143, loss_ctc=74.996, loss_att=53.416, acc=0.702, loss=59.890, backward_time=1.026, grad_norm=131.720, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.152e-05, train_time=2.725 +[gpub001:0/64] 2023-07-14 14:06:26,981 (trainer:732) INFO: 49epoch:train:1001-1100batch: iter_time=1.237e-04, forward_time=0.143, loss_ctc=71.753, loss_att=56.326, acc=0.702, loss=60.954, backward_time=1.025, grad_norm=130.090, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.152e-05, train_time=2.717 +[gpub001:0/64] 2023-07-14 14:08:43,048 (trainer:732) INFO: 49epoch:train:1101-1200batch: iter_time=1.122e-04, forward_time=0.142, loss_ctc=79.119, loss_att=61.363, acc=0.706, loss=66.690, backward_time=1.027, grad_norm=115.657, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.151e-05, train_time=2.721 +[gpub001:0/64] 2023-07-14 14:10:58,405 (trainer:732) INFO: 49epoch:train:1201-1300batch: iter_time=1.299e-04, forward_time=0.143, loss_ctc=72.908, loss_att=53.208, acc=0.712, loss=59.118, backward_time=1.023, grad_norm=115.476, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.150e-05, train_time=2.707 +[gpub001:0/64] 2023-07-14 14:13:13,436 (trainer:732) INFO: 49epoch:train:1301-1400batch: iter_time=1.351e-04, forward_time=0.142, loss_ctc=61.698, loss_att=43.601, acc=0.720, loss=49.030, backward_time=1.022, grad_norm=118.028, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.150e-05, train_time=2.700 +[gpub001:0/64] 2023-07-14 14:15:28,715 (trainer:732) INFO: 49epoch:train:1401-1500batch: iter_time=1.272e-04, forward_time=0.143, loss_ctc=69.799, loss_att=51.631, acc=0.718, loss=57.082, backward_time=1.023, grad_norm=137.188, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.149e-05, train_time=2.705 +[gpub001:0/64] 2023-07-14 14:17:44,041 (trainer:732) INFO: 49epoch:train:1501-1600batch: iter_time=1.310e-04, forward_time=0.143, loss_ctc=62.191, loss_att=43.167, acc=0.720, loss=48.874, backward_time=1.024, grad_norm=110.624, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.149e-05, train_time=2.706 +[gpub001:0/64] 2023-07-14 14:19:28,411 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub001:0/64] 2023-07-14 14:19:46,106 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 14:19:49,464 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 14:19:49,464 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub001:0/64] 2023-07-14 14:19:49,518 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 14:24:53,336 (trainer:732) INFO: 49epoch:train:1601-1700batch: iter_time=2.398, forward_time=0.158, loss_ctc=86.888, loss_att=63.993, acc=0.702, loss=70.861, backward_time=1.039, grad_norm=144.375, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.148e-05, train_time=8.586 +[gpub001:0/64] 2023-07-14 14:27:11,824 (trainer:732) INFO: 49epoch:train:1701-1800batch: iter_time=1.060e-04, forward_time=0.144, loss_ctc=72.288, loss_att=55.796, acc=0.708, loss=60.743, backward_time=1.032, grad_norm=127.355, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.148e-05, train_time=2.770 +[gpub001:0/64] 2023-07-14 14:29:27,834 (trainer:732) INFO: 49epoch:train:1801-1900batch: iter_time=1.113e-04, forward_time=0.145, loss_ctc=72.461, loss_att=50.750, acc=0.721, loss=57.264, backward_time=1.026, grad_norm=124.107, clip=100.000, loss_scale=5.127e+32, optim_step_time=0.181, optim0_lr0=5.147e-05, train_time=2.720 +[gpub001:0/64] 2023-07-14 14:31:43,989 (trainer:732) INFO: 49epoch:train:1901-2000batch: iter_time=1.183e-04, forward_time=0.144, loss_ctc=77.931, loss_att=60.068, acc=0.711, loss=65.427, backward_time=1.028, grad_norm=134.506, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.180, optim0_lr0=5.147e-05, train_time=2.723 +[gpub001:0/64] 2023-07-14 14:34:00,002 (trainer:732) INFO: 49epoch:train:2001-2100batch: iter_time=1.221e-04, forward_time=0.144, loss_ctc=74.295, loss_att=58.950, acc=0.728, loss=63.554, backward_time=1.026, grad_norm=139.921, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.180, optim0_lr0=5.146e-05, train_time=2.720 +[gpub001:0/64] 2023-07-14 14:35:34,874 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub001:0/64] 2023-07-14 14:36:15,530 (trainer:732) INFO: 49epoch:train:2101-2200batch: iter_time=1.218e-04, forward_time=0.144, loss_ctc=66.147, loss_att=47.940, acc=0.731, loss=53.402, backward_time=1.025, grad_norm=127.949, clip=100.000, loss_scale=5.497e+32, optim_step_time=0.180, optim0_lr0=5.146e-05, train_time=2.710 +[gpub001:0/64] 2023-07-14 14:38:31,109 (trainer:732) INFO: 49epoch:train:2201-2300batch: iter_time=1.115e-04, forward_time=0.144, loss_ctc=63.495, loss_att=43.732, acc=0.732, loss=49.661, backward_time=1.025, grad_norm=107.717, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.145e-05, train_time=2.711 +[gpub001:0/64] 2023-07-14 14:40:46,863 (trainer:732) INFO: 49epoch:train:2301-2400batch: iter_time=1.134e-04, forward_time=0.144, loss_ctc=70.905, loss_att=53.186, acc=0.722, loss=58.502, backward_time=1.025, grad_norm=119.707, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.144e-05, train_time=2.715 +[gpub001:0/64] 2023-07-14 14:43:02,330 (trainer:732) INFO: 49epoch:train:2401-2500batch: iter_time=1.097e-04, forward_time=0.143, loss_ctc=71.274, loss_att=49.885, acc=0.724, loss=56.302, backward_time=1.024, grad_norm=140.068, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.144e-05, train_time=2.709 +[gpub001:0/64] 2023-07-14 14:43:03,512 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub001:0/64] 2023-07-14 14:43:21,506 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 14:43:24,979 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 14:43:24,979 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub001:0/64] 2023-07-14 14:43:24,985 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 14:50:14,455 (trainer:732) INFO: 49epoch:train:2501-2600batch: iter_time=1.236, forward_time=0.144, loss_ctc=77.943, loss_att=56.932, acc=0.709, loss=63.235, backward_time=1.048, grad_norm=215.635, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.143e-05, train_time=8.642 +[gpub001:0/64] 2023-07-14 14:52:30,604 (trainer:732) INFO: 49epoch:train:2601-2700batch: iter_time=1.298e-04, forward_time=0.144, loss_ctc=76.520, loss_att=56.178, acc=0.714, loss=62.281, backward_time=1.026, grad_norm=138.030, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.143e-05, train_time=2.723 +[gpub001:0/64] 2023-07-14 14:54:46,489 (trainer:732) INFO: 49epoch:train:2701-2800batch: iter_time=1.242e-04, forward_time=0.145, loss_ctc=70.928, loss_att=50.393, acc=0.725, loss=56.554, backward_time=1.025, grad_norm=113.963, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.142e-05, train_time=2.717 +[gpub001:0/64] 2023-07-14 14:57:02,630 (trainer:732) INFO: 49epoch:train:2801-2900batch: iter_time=1.260e-04, forward_time=0.145, loss_ctc=80.053, loss_att=64.710, acc=0.711, loss=69.313, backward_time=1.028, grad_norm=130.479, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.142e-05, train_time=2.723 +[gpub001:0/64] 2023-07-14 14:59:17,960 (trainer:732) INFO: 49epoch:train:2901-3000batch: iter_time=1.280e-04, forward_time=0.143, loss_ctc=67.583, loss_att=50.322, acc=0.734, loss=55.500, backward_time=1.023, grad_norm=118.809, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.141e-05, train_time=2.706 +[gpub001:0/64] 2023-07-14 15:01:33,622 (trainer:732) INFO: 49epoch:train:3001-3100batch: iter_time=1.303e-04, forward_time=0.146, loss_ctc=64.848, loss_att=44.323, acc=0.737, loss=50.480, backward_time=1.024, grad_norm=131.361, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.141e-05, train_time=2.713 +[gpub001:0/64] 2023-07-14 15:03:48,945 (trainer:732) INFO: 49epoch:train:3101-3200batch: iter_time=1.302e-04, forward_time=0.144, loss_ctc=68.493, loss_att=50.724, acc=0.731, loss=56.054, backward_time=1.022, grad_norm=139.131, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.140e-05, train_time=2.706 +[gpub001:0/64] 2023-07-14 15:06:04,272 (trainer:732) INFO: 49epoch:train:3201-3300batch: iter_time=1.350e-04, forward_time=0.144, loss_ctc=62.583, loss_att=44.592, acc=0.725, loss=49.990, backward_time=1.023, grad_norm=116.550, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.140e-05, train_time=2.706 +[gpub001:0/64] 2023-07-14 15:06:50,474 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub001:0/64] 2023-07-14 15:07:08,807 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 15:07:12,196 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 15:07:12,197 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub001:0/64] 2023-07-14 15:07:12,203 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 15:12:51,478 (trainer:732) INFO: 49epoch:train:3301-3400batch: iter_time=1.286, forward_time=0.144, loss_ctc=82.374, loss_att=58.429, acc=0.718, loss=65.612, backward_time=1.042, grad_norm=161.884, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.139e-05, train_time=8.144 +[gpub001:0/64] 2023-07-14 15:15:09,454 (trainer:732) INFO: 49epoch:train:3401-3500batch: iter_time=1.225e-04, forward_time=0.143, loss_ctc=72.824, loss_att=56.074, acc=0.705, loss=61.099, backward_time=1.027, grad_norm=124.071, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.138e-05, train_time=2.759 +[gpub001:0/64] 2023-07-14 15:16:18,542 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub001:0/64] 2023-07-14 15:17:26,359 (trainer:732) INFO: 49epoch:train:3501-3600batch: iter_time=1.224e-04, forward_time=0.144, loss_ctc=70.789, loss_att=51.064, acc=0.720, loss=56.982, backward_time=1.025, grad_norm=127.943, clip=100.000, loss_scale=2.417e+32, optim_step_time=0.180, optim0_lr0=5.138e-05, train_time=2.738 +[gpub001:0/64] 2023-07-14 15:19:44,415 (trainer:732) INFO: 49epoch:train:3601-3700batch: iter_time=1.162e-04, forward_time=0.144, loss_ctc=81.997, loss_att=65.961, acc=0.690, loss=70.771, backward_time=1.027, grad_norm=125.683, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.137e-05, train_time=2.761 +[gpub001:0/64] 2023-07-14 15:22:04,902 (trainer:732) INFO: 49epoch:train:3701-3800batch: iter_time=1.162e-04, forward_time=0.144, loss_ctc=68.446, loss_att=49.301, acc=0.730, loss=55.044, backward_time=1.028, grad_norm=115.877, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.137e-05, train_time=2.810 +[gpub001:0/64] 2023-07-14 15:24:23,287 (trainer:732) INFO: 49epoch:train:3801-3900batch: iter_time=1.214e-04, forward_time=0.144, loss_ctc=70.058, loss_att=52.734, acc=0.711, loss=57.931, backward_time=1.024, grad_norm=135.739, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.136e-05, train_time=2.767 +[gpub001:0/64] 2023-07-14 15:26:40,552 (trainer:732) INFO: 49epoch:train:3901-4000batch: iter_time=1.206e-04, forward_time=0.145, loss_ctc=63.341, loss_att=45.010, acc=0.729, loss=50.510, backward_time=1.027, grad_norm=115.671, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.136e-05, train_time=2.745 +[gpub001:0/64] 2023-07-14 15:28:56,692 (trainer:732) INFO: 49epoch:train:4001-4100batch: iter_time=1.112e-04, forward_time=0.145, loss_ctc=67.558, loss_att=49.060, acc=0.721, loss=54.609, backward_time=1.024, grad_norm=113.122, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.135e-05, train_time=2.723 +[gpub001:0/64] 2023-07-14 15:30:27,503 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub001:0/64] 2023-07-14 15:30:45,587 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 15:30:49,041 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 15:30:49,041 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub001:0/64] 2023-07-14 15:30:49,047 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 15:35:52,270 (trainer:732) INFO: 49epoch:train:4101-4200batch: iter_time=1.261, forward_time=0.144, loss_ctc=70.221, loss_att=53.085, acc=0.717, loss=58.226, backward_time=1.035, grad_norm=140.521, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.135e-05, train_time=8.311 +[gpub001:0/64] 2023-07-14 15:38:08,843 (trainer:732) INFO: 49epoch:train:4201-4300batch: iter_time=1.161e-04, forward_time=0.144, loss_ctc=73.382, loss_att=57.566, acc=0.707, loss=62.311, backward_time=1.029, grad_norm=116.208, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.134e-05, train_time=2.731 +[gpub001:0/64] 2023-07-14 15:40:24,503 (trainer:732) INFO: 49epoch:train:4301-4400batch: iter_time=1.151e-04, forward_time=0.144, loss_ctc=72.437, loss_att=50.993, acc=0.718, loss=57.426, backward_time=1.025, grad_norm=135.729, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.134e-05, train_time=2.713 +[gpub001:0/64] 2023-07-14 15:42:39,948 (trainer:732) INFO: 49epoch:train:4401-4500batch: iter_time=1.171e-04, forward_time=0.144, loss_ctc=79.101, loss_att=66.083, acc=0.685, loss=69.989, backward_time=1.023, grad_norm=131.828, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.133e-05, train_time=2.709 +[gpub001:0/64] 2023-07-14 15:44:55,456 (trainer:732) INFO: 49epoch:train:4501-4600batch: iter_time=1.249e-04, forward_time=0.143, loss_ctc=67.710, loss_att=48.912, acc=0.733, loss=54.551, backward_time=1.023, grad_norm=151.097, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.133e-05, train_time=2.710 +[gpub001:0/64] 2023-07-14 15:47:10,473 (trainer:732) INFO: 49epoch:train:4601-4700batch: iter_time=1.201e-04, forward_time=0.142, loss_ctc=67.960, loss_att=47.417, acc=0.714, loss=53.580, backward_time=1.021, grad_norm=139.328, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.132e-05, train_time=2.700 +[gpub001:0/64] 2023-07-14 15:49:28,348 (trainer:732) INFO: 49epoch:train:4701-4800batch: iter_time=1.428e-04, forward_time=0.143, loss_ctc=66.335, loss_att=46.579, acc=0.730, loss=52.506, backward_time=1.025, grad_norm=121.960, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.131e-05, train_time=2.757 +[gpub001:0/64] 2023-07-14 15:51:43,745 (trainer:732) INFO: 49epoch:train:4801-4900batch: iter_time=1.151e-04, forward_time=0.144, loss_ctc=65.075, loss_att=47.591, acc=0.717, loss=52.836, backward_time=1.024, grad_norm=142.515, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.131e-05, train_time=2.708 +[gpub001:0/64] 2023-07-14 15:53:59,339 (trainer:732) INFO: 49epoch:train:4901-5000batch: iter_time=1.186e-04, forward_time=0.144, loss_ctc=76.840, loss_att=55.520, acc=0.720, loss=61.916, backward_time=1.025, grad_norm=146.307, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.130e-05, train_time=2.712 +[gpub001:0/64] 2023-07-14 15:54:01,002 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub001:0/64] 2023-07-14 15:54:18,910 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 15:54:22,671 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 15:54:22,671 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub001:0/64] 2023-07-14 15:54:22,677 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 16:02:02,572 (trainer:732) INFO: 49epoch:train:5001-5100batch: iter_time=1.246, forward_time=0.173, loss_ctc=70.620, loss_att=55.070, acc=0.710, loss=59.735, backward_time=1.102, grad_norm=143.891, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=5.130e-05, train_time=9.664 +[gpub001:0/64] 2023-07-14 16:05:05,429 (trainer:732) INFO: 49epoch:train:5101-5200batch: iter_time=1.234e-04, forward_time=0.144, loss_ctc=74.968, loss_att=53.708, acc=0.725, loss=60.086, backward_time=1.111, grad_norm=140.062, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.129e-05, train_time=3.657 +[gpub001:0/64] 2023-07-14 16:07:39,205 (trainer:732) INFO: 49epoch:train:5201-5300batch: iter_time=1.230e-04, forward_time=0.143, loss_ctc=78.360, loss_att=62.863, acc=0.701, loss=67.512, backward_time=1.045, grad_norm=129.656, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.129e-05, train_time=3.075 +[gpub001:0/64] 2023-07-14 16:10:30,819 (trainer:732) INFO: 49epoch:train:5301-5400batch: iter_time=1.198e-04, forward_time=0.143, loss_ctc=70.380, loss_att=52.634, acc=0.739, loss=57.958, backward_time=1.065, grad_norm=145.551, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.179, optim0_lr0=5.128e-05, train_time=3.432 +[gpub001:0/64] 2023-07-14 16:13:06,699 (trainer:732) INFO: 49epoch:train:5401-5500batch: iter_time=1.248e-04, forward_time=0.144, loss_ctc=66.159, loss_att=49.037, acc=0.727, loss=54.174, backward_time=1.050, grad_norm=127.077, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.128e-05, train_time=3.117 +[gpub001:0/64] 2023-07-14 16:15:41,679 (trainer:732) INFO: 49epoch:train:5501-5600batch: iter_time=1.175e-04, forward_time=0.143, loss_ctc=63.480, loss_att=42.575, acc=0.741, loss=48.847, backward_time=1.045, grad_norm=114.198, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.127e-05, train_time=3.099 +[gpub001:0/64] 2023-07-14 16:18:02,778 (trainer:732) INFO: 49epoch:train:5601-5700batch: iter_time=1.207e-04, forward_time=0.143, loss_ctc=71.660, loss_att=53.814, acc=0.721, loss=59.168, backward_time=1.031, grad_norm=134.301, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.127e-05, train_time=2.822 +[gpub001:0/64] 2023-07-14 16:20:31,593 (trainer:732) INFO: 49epoch:train:5701-5800batch: iter_time=1.171e-04, forward_time=0.144, loss_ctc=71.301, loss_att=52.766, acc=0.725, loss=58.326, backward_time=1.041, grad_norm=137.471, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.126e-05, train_time=2.976 +[gpub001:0/64] 2023-07-14 16:21:30,492 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub001:0/64] 2023-07-14 16:21:48,619 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 16:21:52,043 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 16:21:52,043 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub001:0/64] 2023-07-14 16:21:52,049 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 16:27:57,645 (trainer:732) INFO: 49epoch:train:5801-5900batch: iter_time=1.522, forward_time=0.161, loss_ctc=73.115, loss_att=49.116, acc=0.724, loss=56.316, backward_time=1.040, grad_norm=136.046, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.126e-05, train_time=8.921 +[gpub001:0/64] 2023-07-14 16:30:13,954 (trainer:732) INFO: 49epoch:train:5901-6000batch: iter_time=1.296e-04, forward_time=0.144, loss_ctc=71.267, loss_att=55.700, acc=0.705, loss=60.370, backward_time=1.026, grad_norm=123.436, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.125e-05, train_time=2.726 +[gpub001:0/64] 2023-07-14 16:32:30,026 (trainer:732) INFO: 49epoch:train:6001-6100batch: iter_time=1.304e-04, forward_time=0.144, loss_ctc=70.944, loss_att=50.849, acc=0.722, loss=56.877, backward_time=1.022, grad_norm=205.768, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.124e-05, train_time=2.721 +[gpub001:0/64] 2023-07-14 16:34:45,468 (trainer:732) INFO: 49epoch:train:6101-6200batch: iter_time=1.363e-04, forward_time=0.144, loss_ctc=80.848, loss_att=65.774, acc=0.690, loss=70.296, backward_time=1.024, grad_norm=189.035, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.124e-05, train_time=2.709 +[gpub001:0/64] 2023-07-14 16:37:00,897 (trainer:732) INFO: 49epoch:train:6201-6300batch: iter_time=1.565e-04, forward_time=0.144, loss_ctc=68.282, loss_att=48.857, acc=0.734, loss=54.685, backward_time=1.025, grad_norm=112.677, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.123e-05, train_time=2.708 +[gpub001:0/64] 2023-07-14 16:39:16,422 (trainer:732) INFO: 49epoch:train:6301-6400batch: iter_time=1.438e-04, forward_time=0.144, loss_ctc=69.857, loss_att=51.306, acc=0.719, loss=56.871, backward_time=1.024, grad_norm=136.182, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.123e-05, train_time=2.710 +[gpub001:0/64] 2023-07-14 16:41:31,597 (trainer:732) INFO: 49epoch:train:6401-6500batch: iter_time=1.378e-04, forward_time=0.144, loss_ctc=62.547, loss_att=44.512, acc=0.727, loss=49.923, backward_time=1.022, grad_norm=115.827, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.122e-05, train_time=2.703 +[gpub001:0/64] 2023-07-14 16:43:46,948 (trainer:732) INFO: 49epoch:train:6501-6600batch: iter_time=1.210e-04, forward_time=0.145, loss_ctc=68.154, loss_att=48.823, acc=0.723, loss=54.622, backward_time=1.024, grad_norm=127.783, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.122e-05, train_time=2.707 +[gpub001:0/64] 2023-07-14 16:45:29,079 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub001:0/64] 2023-07-14 16:45:47,237 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 16:45:50,693 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 16:45:50,693 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub001:0/64] 2023-07-14 16:45:50,699 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 16:49:39,620 (trainer:732) INFO: 49epoch:train:6601-6700batch: iter_time=2.079, forward_time=0.183, loss_ctc=76.672, loss_att=56.369, acc=0.715, loss=62.460, backward_time=1.034, grad_norm=132.969, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=5.121e-05, train_time=7.053 +[gpub001:0/64] 2023-07-14 16:51:56,314 (trainer:732) INFO: 49epoch:train:6701-6800batch: iter_time=1.109e-04, forward_time=0.145, loss_ctc=70.489, loss_att=56.214, acc=0.716, loss=60.496, backward_time=1.027, grad_norm=152.693, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.121e-05, train_time=2.734 +[gpub001:0/64] 2023-07-14 16:54:12,308 (trainer:732) INFO: 49epoch:train:6801-6900batch: iter_time=1.133e-04, forward_time=0.143, loss_ctc=71.773, loss_att=49.137, acc=0.730, loss=55.928, backward_time=1.027, grad_norm=116.515, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.120e-05, train_time=2.720 +[gpub001:0/64] 2023-07-14 16:56:28,064 (trainer:732) INFO: 49epoch:train:6901-7000batch: iter_time=1.082e-04, forward_time=0.145, loss_ctc=76.239, loss_att=59.077, acc=0.716, loss=64.225, backward_time=1.026, grad_norm=122.696, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.120e-05, train_time=2.715 +[gpub001:0/64] 2023-07-14 16:58:43,796 (trainer:732) INFO: 49epoch:train:7001-7100batch: iter_time=1.142e-04, forward_time=0.144, loss_ctc=73.758, loss_att=58.472, acc=0.731, loss=63.058, backward_time=1.026, grad_norm=119.532, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.119e-05, train_time=2.714 +[gpub001:0/64] 2023-07-14 17:00:59,084 (trainer:732) INFO: 49epoch:train:7101-7200batch: iter_time=1.003e-04, forward_time=0.142, loss_ctc=67.069, loss_att=48.465, acc=0.731, loss=54.046, backward_time=1.023, grad_norm=124.919, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.119e-05, train_time=2.706 +[gpub001:0/64] 2023-07-14 17:03:14,636 (trainer:732) INFO: 49epoch:train:7201-7300batch: iter_time=9.950e-05, forward_time=0.144, loss_ctc=63.394, loss_att=43.420, acc=0.735, loss=49.412, backward_time=1.024, grad_norm=118.629, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.179, optim0_lr0=5.118e-05, train_time=2.711 +[gpub001:0/64] 2023-07-14 17:05:30,294 (trainer:732) INFO: 49epoch:train:7301-7400batch: iter_time=1.066e-04, forward_time=0.144, loss_ctc=70.573, loss_att=52.037, acc=0.731, loss=57.598, backward_time=1.025, grad_norm=138.691, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.117e-05, train_time=2.713 +[gpub001:0/64] 2023-07-14 17:07:45,765 (trainer:732) INFO: 49epoch:train:7401-7500batch: iter_time=9.254e-05, forward_time=0.144, loss_ctc=67.756, loss_att=48.815, acc=0.728, loss=54.498, backward_time=1.025, grad_norm=152.362, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.117e-05, train_time=2.709 +[gpub001:0/64] 2023-07-14 17:07:47,423 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub001:0/64] 2023-07-14 17:08:05,615 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 17:08:09,029 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 17:08:09,029 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub001:0/64] 2023-07-14 17:08:09,035 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 17:15:00,221 (trainer:732) INFO: 49epoch:train:7501-7600batch: iter_time=1.254, forward_time=0.144, loss_ctc=73.840, loss_att=54.828, acc=0.713, loss=60.532, backward_time=1.035, grad_norm=126.378, clip=100.000, loss_scale=2.434e+32, optim_step_time=0.180, optim0_lr0=5.116e-05, train_time=8.689 +[gpub001:0/64] 2023-07-14 17:17:16,603 (trainer:732) INFO: 49epoch:train:7601-7700batch: iter_time=1.198e-04, forward_time=0.144, loss_ctc=73.851, loss_att=54.824, acc=0.712, loss=60.532, backward_time=1.027, grad_norm=149.921, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.116e-05, train_time=2.727 +[gpub001:0/64] 2023-07-14 17:19:32,240 (trainer:732) INFO: 49epoch:train:7701-7800batch: iter_time=1.247e-04, forward_time=0.145, loss_ctc=69.949, loss_att=52.432, acc=0.715, loss=57.687, backward_time=1.024, grad_norm=117.932, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.115e-05, train_time=2.712 +[gpub001:0/64] 2023-07-14 17:21:47,986 (trainer:732) INFO: 49epoch:train:7801-7900batch: iter_time=1.104e-04, forward_time=0.145, loss_ctc=80.270, loss_att=64.561, acc=0.698, loss=69.274, backward_time=1.027, grad_norm=140.177, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.115e-05, train_time=2.715 +[gpub001:0/64] 2023-07-14 17:24:03,451 (trainer:732) INFO: 49epoch:train:7901-8000batch: iter_time=1.315e-04, forward_time=0.145, loss_ctc=65.982, loss_att=48.615, acc=0.731, loss=53.825, backward_time=1.024, grad_norm=143.731, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.114e-05, train_time=2.709 +[gpub001:0/64] 2023-07-14 17:26:18,815 (trainer:732) INFO: 49epoch:train:8001-8100batch: iter_time=1.331e-04, forward_time=0.145, loss_ctc=63.870, loss_att=44.066, acc=0.731, loss=50.007, backward_time=1.023, grad_norm=124.882, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.114e-05, train_time=2.707 +[gpub001:0/64] 2023-07-14 17:28:34,027 (trainer:732) INFO: 49epoch:train:8101-8200batch: iter_time=1.181e-04, forward_time=0.144, loss_ctc=68.490, loss_att=50.287, acc=0.723, loss=55.748, backward_time=1.021, grad_norm=116.591, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.113e-05, train_time=2.704 +[gpub001:0/64] 2023-07-14 17:30:49,344 (trainer:732) INFO: 49epoch:train:8201-8300batch: iter_time=1.236e-04, forward_time=0.145, loss_ctc=60.268, loss_att=42.861, acc=0.725, loss=48.083, backward_time=1.021, grad_norm=105.963, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.113e-05, train_time=2.706 +[gpub001:0/64] 2023-07-14 17:31:35,992 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub001:0/64] 2023-07-14 17:31:53,991 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 17:31:57,440 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 17:31:57,440 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub001:0/64] 2023-07-14 17:31:57,446 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 17:38:13,424 (trainer:732) INFO: 49epoch:train:8301-8400batch: iter_time=1.214, forward_time=0.154, loss_ctc=81.413, loss_att=59.604, acc=0.719, loss=66.147, backward_time=1.043, grad_norm=156.011, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.112e-05, train_time=8.881 +[gpub001:0/64] 2023-07-14 17:40:30,027 (trainer:732) INFO: 49epoch:train:8401-8500batch: iter_time=1.238e-04, forward_time=0.145, loss_ctc=72.136, loss_att=55.373, acc=0.718, loss=60.402, backward_time=1.026, grad_norm=133.755, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.112e-05, train_time=2.732 +[gpub001:0/64] 2023-07-14 17:42:45,754 (trainer:732) INFO: 49epoch:train:8501-8600batch: iter_time=1.202e-04, forward_time=0.145, loss_ctc=70.343, loss_att=50.701, acc=0.727, loss=56.593, backward_time=1.028, grad_norm=121.862, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.111e-05, train_time=2.714 +[gpub001:0/64] 2023-07-14 17:45:01,816 (trainer:732) INFO: 49epoch:train:8601-8700batch: iter_time=1.256e-04, forward_time=0.146, loss_ctc=80.448, loss_att=64.353, acc=0.708, loss=69.182, backward_time=1.028, grad_norm=135.963, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.111e-05, train_time=2.721 +[gpub001:0/64] 2023-07-14 17:47:20,645 (trainer:732) INFO: 49epoch:train:8701-8800batch: iter_time=1.180e-04, forward_time=0.145, loss_ctc=67.178, loss_att=48.931, acc=0.743, loss=54.405, backward_time=1.028, grad_norm=142.495, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.110e-05, train_time=2.776 +[gpub001:0/64] 2023-07-14 17:49:37,238 (trainer:732) INFO: 49epoch:train:8801-8900batch: iter_time=1.296e-04, forward_time=0.146, loss_ctc=68.598, loss_att=50.099, acc=0.731, loss=55.649, backward_time=1.027, grad_norm=124.003, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.109e-05, train_time=2.732 +[gpub001:0/64] 2023-07-14 17:51:56,378 (trainer:732) INFO: 49epoch:train:8901-9000batch: iter_time=1.274e-04, forward_time=0.145, loss_ctc=62.040, loss_att=43.423, acc=0.740, loss=49.008, backward_time=1.025, grad_norm=114.553, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.109e-05, train_time=2.783 +[gpub001:0/64] 2023-07-14 17:54:11,781 (trainer:732) INFO: 49epoch:train:9001-9100batch: iter_time=1.150e-04, forward_time=0.144, loss_ctc=67.395, loss_att=49.894, acc=0.729, loss=55.144, backward_time=1.022, grad_norm=128.721, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.108e-05, train_time=2.708 +[gpub001:0/64] 2023-07-14 17:56:00,273 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub001:0/64] 2023-07-14 17:56:18,139 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 17:56:21,608 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 17:56:21,608 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub001:0/64] 2023-07-14 17:56:21,615 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 18:01:26,368 (trainer:732) INFO: 49epoch:train:9101-9200batch: iter_time=1.617, forward_time=0.164, loss_ctc=69.323, loss_att=50.650, acc=0.726, loss=56.252, backward_time=1.035, grad_norm=140.311, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.108e-05, train_time=8.692 +[gpub001:0/64] 2023-07-14 18:03:44,759 (trainer:732) INFO: 49epoch:train:9201-9300batch: iter_time=1.228e-04, forward_time=0.147, loss_ctc=73.686, loss_att=57.660, acc=0.717, loss=62.468, backward_time=1.032, grad_norm=120.818, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.107e-05, train_time=2.768 +[gpub001:0/64] 2023-07-14 18:06:01,753 (trainer:732) INFO: 49epoch:train:9301-9400batch: iter_time=1.486e-04, forward_time=0.145, loss_ctc=71.589, loss_att=48.903, acc=0.728, loss=55.709, backward_time=1.025, grad_norm=119.501, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.107e-05, train_time=2.740 +[gpub001:0/64] 2023-07-14 18:08:19,922 (trainer:732) INFO: 49epoch:train:9401-9500batch: iter_time=1.301e-04, forward_time=0.144, loss_ctc=80.193, loss_att=65.513, acc=0.702, loss=69.917, backward_time=1.031, grad_norm=140.511, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.106e-05, train_time=2.763 +[gpub001:0/64] 2023-07-14 18:10:37,699 (trainer:732) INFO: 49epoch:train:9501-9600batch: iter_time=1.212e-04, forward_time=0.144, loss_ctc=67.328, loss_att=48.700, acc=0.744, loss=54.289, backward_time=1.027, grad_norm=128.383, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.106e-05, train_time=2.755 +[gpub001:0/64] 2023-07-14 18:12:55,391 (trainer:732) INFO: 49epoch:train:9601-9700batch: iter_time=1.316e-04, forward_time=0.143, loss_ctc=65.278, loss_att=46.806, acc=0.726, loss=52.347, backward_time=1.026, grad_norm=135.251, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.105e-05, train_time=2.754 +[gpub001:0/64] 2023-07-14 18:15:15,142 (trainer:732) INFO: 49epoch:train:9701-9800batch: iter_time=1.367e-04, forward_time=0.145, loss_ctc=65.510, loss_att=45.641, acc=0.741, loss=51.602, backward_time=1.034, grad_norm=115.004, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.105e-05, train_time=2.795 +[gpub001:0/64] 2023-07-14 18:17:33,807 (trainer:732) INFO: 49epoch:train:9801-9900batch: iter_time=1.295e-04, forward_time=0.144, loss_ctc=64.369, loss_att=47.790, acc=0.723, loss=52.764, backward_time=1.027, grad_norm=108.677, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.104e-05, train_time=2.773 +[gpub001:0/64] 2023-07-14 18:19:49,329 (trainer:732) INFO: 49epoch:train:9901-10000batch: iter_time=1.004e-04, forward_time=0.144, loss_ctc=76.087, loss_att=54.280, acc=0.724, loss=60.822, backward_time=1.024, grad_norm=120.371, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.104e-05, train_time=2.710 +[gpub001:0/64] 2023-07-14 18:33:02,493 (trainer:338) INFO: 49epoch results: [train] iter_time=0.177, forward_time=0.146, loss_ctc=71.210, loss_att=52.672, acc=0.719, loss=58.234, backward_time=1.030, grad_norm=131.679, clip=100.000, loss_scale=2.702e+32, optim_step_time=0.180, optim0_lr0=5.130e-05, train_time=3.462, time=4 hours, 48 minutes and 48.83 seconds, total_count=460000, gpu_max_cached_mem_GB=34.336, [valid] loss_ctc=43.418, cer_ctc=0.254, loss_att=37.707, acc=0.674, cer=0.423, wer=0.998, loss=39.421, time=7 minutes and 3.51 seconds, total_count=47058, gpu_max_cached_mem_GB=37.631, [att_plot] time=5 minutes and 53.56 seconds, total_count=0, gpu_max_cached_mem_GB=37.631 +[gpub001:0/64] 2023-07-14 18:33:18,474 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub001:0/64] 2023-07-14 18:33:18,484 (trainer:272) INFO: 50/60epoch started. Estimated time to finish: 2 days, 7 hours and 22 minutes +[gpub001:0/64] 2023-07-14 18:33:18,487 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub001:0/64] 2023-07-14 18:33:35,824 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 18:33:39,090 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 18:33:39,090 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub001:0/64] 2023-07-14 18:33:39,096 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 18:37:48,677 (trainer:732) INFO: 50epoch:train:1-100batch: iter_time=1.165, forward_time=0.188, loss_ctc=76.860, loss_att=56.022, acc=0.704, loss=62.273, backward_time=1.063, grad_norm=182.532, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.187, optim0_lr0=5.103e-05, train_time=5.403 +[gpub001:0/64] 2023-07-14 18:40:12,390 (trainer:732) INFO: 50epoch:train:101-200batch: iter_time=9.433e-05, forward_time=0.179, loss_ctc=63.633, loss_att=45.042, acc=0.731, loss=50.619, backward_time=1.036, grad_norm=138.710, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.103e-05, train_time=2.872 +[gpub001:0/64] 2023-07-14 18:42:28,865 (trainer:732) INFO: 50epoch:train:201-300batch: iter_time=9.529e-05, forward_time=0.145, loss_ctc=66.674, loss_att=52.287, acc=0.720, loss=56.603, backward_time=1.031, grad_norm=131.160, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.102e-05, train_time=2.731 +[gpub001:0/64] 2023-07-14 18:44:55,125 (trainer:732) INFO: 50epoch:train:301-400batch: iter_time=9.373e-05, forward_time=0.144, loss_ctc=70.708, loss_att=47.914, acc=0.725, loss=54.752, backward_time=1.038, grad_norm=128.284, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.101e-05, train_time=2.925 +[gpub001:0/64] 2023-07-14 18:47:16,114 (trainer:732) INFO: 50epoch:train:401-500batch: iter_time=9.543e-05, forward_time=0.145, loss_ctc=76.756, loss_att=56.886, acc=0.714, loss=62.847, backward_time=1.034, grad_norm=131.145, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.101e-05, train_time=2.820 +[gpub001:0/64] 2023-07-14 18:49:35,575 (trainer:732) INFO: 50epoch:train:501-600batch: iter_time=9.838e-05, forward_time=0.144, loss_ctc=66.678, loss_att=50.737, acc=0.720, loss=55.519, backward_time=1.034, grad_norm=115.394, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.100e-05, train_time=2.789 +[gpub001:0/64] 2023-07-14 18:52:04,139 (trainer:732) INFO: 50epoch:train:601-700batch: iter_time=9.636e-05, forward_time=0.157, loss_ctc=66.833, loss_att=48.220, acc=0.716, loss=53.804, backward_time=1.050, grad_norm=122.552, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.100e-05, train_time=2.971 +[gpub001:0/64] 2023-07-14 18:54:29,918 (trainer:732) INFO: 50epoch:train:701-800batch: iter_time=9.484e-05, forward_time=0.144, loss_ctc=71.313, loss_att=46.886, acc=0.720, loss=54.214, backward_time=1.043, grad_norm=121.906, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.099e-05, train_time=2.915 +[gpub001:0/64] 2023-07-14 18:55:23,066 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub001:0/64] 2023-07-14 18:55:40,513 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 18:55:43,867 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 18:55:43,867 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub001:0/64] 2023-07-14 18:55:43,874 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 19:00:29,306 (trainer:732) INFO: 50epoch:train:801-900batch: iter_time=1.702, forward_time=0.164, loss_ctc=74.067, loss_att=54.822, acc=0.707, loss=60.595, backward_time=1.045, grad_norm=162.733, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.099e-05, train_time=7.188 +[gpub001:0/64] 2023-07-14 19:02:46,018 (trainer:732) INFO: 50epoch:train:901-1000batch: iter_time=1.192e-04, forward_time=0.144, loss_ctc=63.885, loss_att=44.598, acc=0.731, loss=50.384, backward_time=1.029, grad_norm=110.262, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.098e-05, train_time=2.734 +[gpub001:0/64] 2023-07-14 19:05:02,005 (trainer:732) INFO: 50epoch:train:1001-1100batch: iter_time=1.114e-04, forward_time=0.145, loss_ctc=65.165, loss_att=50.965, acc=0.731, loss=55.225, backward_time=1.029, grad_norm=115.952, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.098e-05, train_time=2.720 +[gpub001:0/64] 2023-07-14 19:07:17,905 (trainer:732) INFO: 50epoch:train:1101-1200batch: iter_time=1.223e-04, forward_time=0.144, loss_ctc=72.477, loss_att=48.920, acc=0.727, loss=55.987, backward_time=1.029, grad_norm=135.909, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.097e-05, train_time=2.718 +[gpub001:0/64] 2023-07-14 19:09:33,663 (trainer:732) INFO: 50epoch:train:1201-1300batch: iter_time=1.099e-04, forward_time=0.144, loss_ctc=76.221, loss_att=57.505, acc=0.709, loss=63.120, backward_time=1.029, grad_norm=152.617, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.097e-05, train_time=2.715 +[gpub001:0/64] 2023-07-14 19:11:52,890 (trainer:732) INFO: 50epoch:train:1301-1400batch: iter_time=1.119e-04, forward_time=0.145, loss_ctc=64.913, loss_att=49.074, acc=0.717, loss=53.826, backward_time=1.032, grad_norm=143.678, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.096e-05, train_time=2.784 +[gpub001:0/64] 2023-07-14 19:14:08,777 (trainer:732) INFO: 50epoch:train:1401-1500batch: iter_time=1.097e-04, forward_time=0.145, loss_ctc=64.207, loss_att=46.300, acc=0.727, loss=51.672, backward_time=1.028, grad_norm=96.777, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.096e-05, train_time=2.718 +[gpub001:0/64] 2023-07-14 19:16:24,490 (trainer:732) INFO: 50epoch:train:1501-1600batch: iter_time=1.088e-04, forward_time=0.144, loss_ctc=72.579, loss_att=48.346, acc=0.712, loss=55.616, backward_time=1.026, grad_norm=117.818, clip=100.000, loss_scale=4.868e+32, optim_step_time=0.181, optim0_lr0=5.095e-05, train_time=2.714 +[gpub001:0/64] 2023-07-14 19:18:06,491 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub001:0/64] 2023-07-14 19:18:24,593 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 19:18:28,023 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 19:18:28,023 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub001:0/64] 2023-07-14 19:18:28,029 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 19:21:27,486 (trainer:732) INFO: 50epoch:train:1601-1700batch: iter_time=1.532, forward_time=0.145, loss_ctc=76.069, loss_att=57.854, acc=0.715, loss=63.318, backward_time=1.042, grad_norm=120.403, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.095e-05, train_time=6.060 +[gpub001:0/64] 2023-07-14 19:23:44,660 (trainer:732) INFO: 50epoch:train:1701-1800batch: iter_time=1.012e-04, forward_time=0.145, loss_ctc=69.245, loss_att=50.311, acc=0.700, loss=55.991, backward_time=1.031, grad_norm=146.320, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.094e-05, train_time=2.743 +[gpub001:0/64] 2023-07-14 19:26:00,439 (trainer:732) INFO: 50epoch:train:1801-1900batch: iter_time=1.160e-04, forward_time=0.144, loss_ctc=65.293, loss_att=47.043, acc=0.727, loss=52.518, backward_time=1.025, grad_norm=118.987, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.094e-05, train_time=2.715 +[gpub001:0/64] 2023-07-14 19:28:15,850 (trainer:732) INFO: 50epoch:train:1901-2000batch: iter_time=1.534e-04, forward_time=0.146, loss_ctc=64.296, loss_att=50.158, acc=0.720, loss=54.399, backward_time=1.025, grad_norm=125.395, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.093e-05, train_time=2.708 +[gpub001:0/64] 2023-07-14 19:30:31,788 (trainer:732) INFO: 50epoch:train:2001-2100batch: iter_time=1.528e-04, forward_time=0.147, loss_ctc=72.538, loss_att=52.328, acc=0.704, loss=58.391, backward_time=1.030, grad_norm=133.928, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.092e-05, train_time=2.719 +[gpub001:0/64] 2023-07-14 19:32:47,884 (trainer:732) INFO: 50epoch:train:2101-2200batch: iter_time=1.222e-04, forward_time=0.147, loss_ctc=73.571, loss_att=55.147, acc=0.713, loss=60.674, backward_time=1.031, grad_norm=116.254, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.092e-05, train_time=2.722 +[gpub001:0/64] 2023-07-14 19:35:03,746 (trainer:732) INFO: 50epoch:train:2201-2300batch: iter_time=1.165e-04, forward_time=0.146, loss_ctc=66.832, loss_att=48.835, acc=0.707, loss=54.234, backward_time=1.029, grad_norm=126.747, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.091e-05, train_time=2.717 +[gpub001:0/64] 2023-07-14 19:37:19,158 (trainer:732) INFO: 50epoch:train:2301-2400batch: iter_time=1.330e-04, forward_time=0.146, loss_ctc=69.922, loss_att=45.724, acc=0.719, loss=52.983, backward_time=1.027, grad_norm=121.655, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.091e-05, train_time=2.708 +[gpub001:0/64] 2023-07-14 19:39:35,125 (trainer:732) INFO: 50epoch:train:2401-2500batch: iter_time=1.500e-04, forward_time=0.147, loss_ctc=69.057, loss_att=51.498, acc=0.713, loss=56.766, backward_time=1.030, grad_norm=136.557, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.090e-05, train_time=2.719 +[gpub001:0/64] 2023-07-14 19:39:36,493 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub001:0/64] 2023-07-14 19:39:54,855 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 19:39:58,286 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 19:39:58,286 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub001:0/64] 2023-07-14 19:39:58,292 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 19:47:03,128 (trainer:732) INFO: 50epoch:train:2501-2600batch: iter_time=1.229, forward_time=0.146, loss_ctc=75.319, loss_att=54.368, acc=0.702, loss=60.653, backward_time=1.044, grad_norm=145.226, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.090e-05, train_time=8.960 +[gpub001:0/64] 2023-07-14 19:49:19,629 (trainer:732) INFO: 50epoch:train:2601-2700batch: iter_time=1.033e-04, forward_time=0.147, loss_ctc=62.652, loss_att=44.316, acc=0.729, loss=49.817, backward_time=1.029, grad_norm=163.995, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.089e-05, train_time=2.730 +[gpub001:0/64] 2023-07-14 19:51:35,460 (trainer:732) INFO: 50epoch:train:2701-2800batch: iter_time=1.076e-04, forward_time=0.145, loss_ctc=65.546, loss_att=51.752, acc=0.720, loss=55.890, backward_time=1.028, grad_norm=116.187, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.089e-05, train_time=2.716 +[gpub001:0/64] 2023-07-14 19:53:51,137 (trainer:732) INFO: 50epoch:train:2801-2900batch: iter_time=1.003e-04, forward_time=0.145, loss_ctc=69.677, loss_att=46.545, acc=0.724, loss=53.485, backward_time=1.027, grad_norm=151.273, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.088e-05, train_time=2.713 +[gpub001:0/64] 2023-07-14 19:55:55,895 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub001:0/64] 2023-07-14 19:56:06,946 (trainer:732) INFO: 50epoch:train:2901-3000batch: iter_time=1.015e-04, forward_time=0.145, loss_ctc=75.469, loss_att=56.637, acc=0.712, loss=62.287, backward_time=1.028, grad_norm=123.542, clip=100.000, loss_scale=6.225e+32, optim_step_time=0.181, optim0_lr0=5.088e-05, train_time=2.716 +[gpub001:0/64] 2023-07-14 19:58:23,795 (trainer:732) INFO: 50epoch:train:3001-3100batch: iter_time=9.923e-05, forward_time=0.145, loss_ctc=64.660, loss_att=50.088, acc=0.711, loss=54.459, backward_time=1.029, grad_norm=139.049, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.087e-05, train_time=2.737 +[gpub001:0/64] 2023-07-14 20:00:39,421 (trainer:732) INFO: 50epoch:train:3101-3200batch: iter_time=1.047e-04, forward_time=0.144, loss_ctc=65.956, loss_att=46.842, acc=0.716, loss=52.576, backward_time=1.026, grad_norm=112.318, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.087e-05, train_time=2.712 +[gpub001:0/64] 2023-07-14 20:02:55,138 (trainer:732) INFO: 50epoch:train:3201-3300batch: iter_time=1.221e-04, forward_time=0.146, loss_ctc=69.096, loss_att=46.225, acc=0.715, loss=53.086, backward_time=1.028, grad_norm=123.518, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.086e-05, train_time=2.714 +[gpub001:0/64] 2023-07-14 20:03:41,171 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub001:0/64] 2023-07-14 20:03:59,687 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 20:04:03,127 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 20:04:03,127 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub001:0/64] 2023-07-14 20:04:03,133 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 20:10:00,535 (trainer:732) INFO: 50epoch:train:3301-3400batch: iter_time=1.229, forward_time=0.207, loss_ctc=76.427, loss_att=55.751, acc=0.711, loss=61.954, backward_time=1.042, grad_norm=113.736, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.086e-05, train_time=8.507 +[gpub001:0/64] 2023-07-14 20:12:16,912 (trainer:732) INFO: 50epoch:train:3401-3500batch: iter_time=1.159e-04, forward_time=0.146, loss_ctc=67.057, loss_att=48.763, acc=0.708, loss=54.251, backward_time=1.030, grad_norm=130.479, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.085e-05, train_time=2.728 +[gpub001:0/64] 2023-07-14 20:14:32,971 (trainer:732) INFO: 50epoch:train:3501-3600batch: iter_time=1.144e-04, forward_time=0.145, loss_ctc=67.118, loss_att=49.572, acc=0.728, loss=54.835, backward_time=1.028, grad_norm=137.657, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.085e-05, train_time=2.721 +[gpub001:0/64] 2023-07-14 20:16:49,816 (trainer:732) INFO: 50epoch:train:3601-3700batch: iter_time=1.234e-04, forward_time=0.145, loss_ctc=65.072, loss_att=47.251, acc=0.722, loss=52.598, backward_time=1.026, grad_norm=124.680, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.084e-05, train_time=2.737 +[gpub001:0/64] 2023-07-14 20:19:06,520 (trainer:732) INFO: 50epoch:train:3701-3800batch: iter_time=1.213e-04, forward_time=0.145, loss_ctc=68.645, loss_att=49.960, acc=0.716, loss=55.566, backward_time=1.026, grad_norm=119.968, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.084e-05, train_time=2.734 +[gpub001:0/64] 2023-07-14 20:21:22,393 (trainer:732) INFO: 50epoch:train:3801-3900batch: iter_time=1.198e-04, forward_time=0.147, loss_ctc=71.225, loss_att=52.976, acc=0.716, loss=58.451, backward_time=1.027, grad_norm=125.839, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.083e-05, train_time=2.717 +[gpub001:0/64] 2023-07-14 20:23:38,267 (trainer:732) INFO: 50epoch:train:3901-4000batch: iter_time=1.194e-04, forward_time=0.146, loss_ctc=67.296, loss_att=49.609, acc=0.708, loss=54.915, backward_time=1.027, grad_norm=122.521, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.082e-05, train_time=2.717 +[gpub001:0/64] 2023-07-14 20:25:53,904 (trainer:732) INFO: 50epoch:train:4001-4100batch: iter_time=1.199e-04, forward_time=0.146, loss_ctc=68.722, loss_att=44.612, acc=0.725, loss=51.845, backward_time=1.026, grad_norm=133.861, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.082e-05, train_time=2.713 +[gpub001:0/64] 2023-07-14 20:27:25,140 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub001:0/64] 2023-07-14 20:27:42,919 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 20:27:46,370 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 20:27:46,370 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub001:0/64] 2023-07-14 20:27:46,376 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 20:31:59,928 (trainer:732) INFO: 50epoch:train:4101-4200batch: iter_time=1.266, forward_time=0.158, loss_ctc=69.892, loss_att=54.719, acc=0.724, loss=59.271, backward_time=1.041, grad_norm=120.069, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.081e-05, train_time=7.318 +[gpub001:0/64] 2023-07-14 20:34:16,845 (trainer:732) INFO: 50epoch:train:4201-4300batch: iter_time=1.347e-04, forward_time=0.148, loss_ctc=72.245, loss_att=50.876, acc=0.714, loss=57.287, backward_time=1.033, grad_norm=144.739, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.081e-05, train_time=2.740 +[gpub001:0/64] 2023-07-14 20:36:33,354 (trainer:732) INFO: 50epoch:train:4301-4400batch: iter_time=1.372e-04, forward_time=0.145, loss_ctc=64.193, loss_att=45.992, acc=0.740, loss=51.452, backward_time=1.028, grad_norm=136.356, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.080e-05, train_time=2.730 +[gpub001:0/64] 2023-07-14 20:38:49,363 (trainer:732) INFO: 50epoch:train:4401-4500batch: iter_time=1.302e-04, forward_time=0.145, loss_ctc=64.055, loss_att=49.713, acc=0.733, loss=54.015, backward_time=1.029, grad_norm=152.603, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.080e-05, train_time=2.720 +[gpub001:0/64] 2023-07-14 20:41:05,362 (trainer:732) INFO: 50epoch:train:4501-4600batch: iter_time=1.058e-04, forward_time=0.146, loss_ctc=71.594, loss_att=49.931, acc=0.723, loss=56.430, backward_time=1.030, grad_norm=141.739, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.079e-05, train_time=2.720 +[gpub001:0/64] 2023-07-14 20:43:21,381 (trainer:732) INFO: 50epoch:train:4601-4700batch: iter_time=1.147e-04, forward_time=0.146, loss_ctc=72.948, loss_att=55.695, acc=0.725, loss=60.871, backward_time=1.029, grad_norm=108.629, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.079e-05, train_time=2.720 +[gpub001:0/64] 2023-07-14 20:45:37,140 (trainer:732) INFO: 50epoch:train:4701-4800batch: iter_time=1.130e-04, forward_time=0.145, loss_ctc=65.934, loss_att=47.948, acc=0.724, loss=53.344, backward_time=1.028, grad_norm=152.190, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.078e-05, train_time=2.715 +[gpub001:0/64] 2023-07-14 20:47:52,965 (trainer:732) INFO: 50epoch:train:4801-4900batch: iter_time=1.108e-04, forward_time=0.146, loss_ctc=68.950, loss_att=45.382, acc=0.730, loss=52.453, backward_time=1.029, grad_norm=130.881, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.078e-05, train_time=2.716 +[gpub001:0/64] 2023-07-14 20:50:08,537 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub001:0/64] 2023-07-14 20:50:26,538 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 20:50:30,045 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 20:50:30,046 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub001:0/64] 2023-07-14 20:50:30,052 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 20:54:56,835 (trainer:732) INFO: 50epoch:train:4901-5000batch: iter_time=1.261, forward_time=0.166, loss_ctc=68.113, loss_att=51.907, acc=0.712, loss=56.769, backward_time=1.030, grad_norm=123.226, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.077e-05, train_time=8.477 +[gpub001:0/64] 2023-07-14 20:57:14,888 (trainer:732) INFO: 50epoch:train:5001-5100batch: iter_time=1.200e-04, forward_time=0.146, loss_ctc=74.370, loss_att=53.834, acc=0.714, loss=59.995, backward_time=1.037, grad_norm=133.466, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.077e-05, train_time=2.761 +[gpub001:0/64] 2023-07-14 20:59:30,447 (trainer:732) INFO: 50epoch:train:5101-5200batch: iter_time=1.271e-04, forward_time=0.145, loss_ctc=63.418, loss_att=44.065, acc=0.743, loss=49.871, backward_time=1.025, grad_norm=118.916, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.076e-05, train_time=2.711 +[gpub001:0/64] 2023-07-14 21:01:46,392 (trainer:732) INFO: 50epoch:train:5201-5300batch: iter_time=1.244e-04, forward_time=0.146, loss_ctc=64.343, loss_att=50.622, acc=0.729, loss=54.738, backward_time=1.029, grad_norm=107.040, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.076e-05, train_time=2.719 +[gpub001:0/64] 2023-07-14 21:04:02,266 (trainer:732) INFO: 50epoch:train:5301-5400batch: iter_time=1.242e-04, forward_time=0.146, loss_ctc=67.130, loss_att=45.243, acc=0.738, loss=51.809, backward_time=1.027, grad_norm=126.591, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.075e-05, train_time=2.717 +[gpub001:0/64] 2023-07-14 21:06:18,342 (trainer:732) INFO: 50epoch:train:5401-5500batch: iter_time=1.258e-04, forward_time=0.147, loss_ctc=74.073, loss_att=55.688, acc=0.724, loss=61.203, backward_time=1.029, grad_norm=120.175, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.075e-05, train_time=2.721 +[gpub001:0/64] 2023-07-14 21:08:34,524 (trainer:732) INFO: 50epoch:train:5501-5600batch: iter_time=1.200e-04, forward_time=0.148, loss_ctc=64.861, loss_att=50.199, acc=0.726, loss=54.598, backward_time=1.029, grad_norm=114.083, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.074e-05, train_time=2.723 +[gpub001:0/64] 2023-07-14 21:10:57,350 (trainer:732) INFO: 50epoch:train:5601-5700batch: iter_time=1.141e-04, forward_time=0.146, loss_ctc=63.074, loss_att=45.439, acc=0.726, loss=50.730, backward_time=1.034, grad_norm=110.217, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.074e-05, train_time=2.856 +[gpub001:0/64] 2023-07-14 21:13:13,082 (trainer:732) INFO: 50epoch:train:5701-5800batch: iter_time=1.103e-04, forward_time=0.146, loss_ctc=69.031, loss_att=46.799, acc=0.723, loss=53.469, backward_time=1.027, grad_norm=143.702, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.073e-05, train_time=2.714 +[gpub001:0/64] 2023-07-14 21:14:11,324 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub001:0/64] 2023-07-14 21:14:29,273 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 21:14:32,688 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 21:14:32,688 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub001:0/64] 2023-07-14 21:14:32,749 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 21:19:03,310 (trainer:732) INFO: 50epoch:train:5801-5900batch: iter_time=2.002, forward_time=0.183, loss_ctc=73.655, loss_att=53.932, acc=0.722, loss=59.849, backward_time=1.055, grad_norm=115.715, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.073e-05, train_time=7.004 +[gpub001:0/64] 2023-07-14 21:21:32,431 (trainer:732) INFO: 50epoch:train:5901-6000batch: iter_time=1.194e-04, forward_time=0.146, loss_ctc=66.964, loss_att=50.032, acc=0.725, loss=55.112, backward_time=1.037, grad_norm=127.025, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.072e-05, train_time=2.983 +[gpub001:0/64] 2023-07-14 21:24:07,379 (trainer:732) INFO: 50epoch:train:6001-6100batch: iter_time=1.119e-04, forward_time=0.146, loss_ctc=66.762, loss_att=49.729, acc=0.735, loss=54.839, backward_time=1.080, grad_norm=115.938, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.071e-05, train_time=3.099 +[gpub001:0/64] 2023-07-14 21:26:28,272 (trainer:732) INFO: 50epoch:train:6101-6200batch: iter_time=1.172e-04, forward_time=0.146, loss_ctc=65.838, loss_att=47.261, acc=0.729, loss=52.834, backward_time=1.034, grad_norm=114.927, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.071e-05, train_time=2.818 +[gpub001:0/64] 2023-07-14 21:28:48,811 (trainer:732) INFO: 50epoch:train:6201-6300batch: iter_time=1.167e-04, forward_time=0.147, loss_ctc=68.647, loss_att=49.152, acc=0.730, loss=55.000, backward_time=1.037, grad_norm=128.845, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.070e-05, train_time=2.811 +[gpub001:0/64] 2023-07-14 21:30:05,936 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub001:0/64] 2023-07-14 21:31:12,672 (trainer:732) INFO: 50epoch:train:6301-6400batch: iter_time=1.053e-04, forward_time=0.147, loss_ctc=69.972, loss_att=51.775, acc=0.730, loss=57.234, backward_time=1.035, grad_norm=136.068, clip=100.000, loss_scale=2.484e+32, optim_step_time=0.182, optim0_lr0=5.070e-05, train_time=2.877 +[gpub001:0/64] 2023-07-14 21:33:31,183 (trainer:732) INFO: 50epoch:train:6401-6500batch: iter_time=1.154e-04, forward_time=0.146, loss_ctc=66.029, loss_att=48.270, acc=0.719, loss=53.598, backward_time=1.031, grad_norm=131.463, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.069e-05, train_time=2.770 +[gpub001:0/64] 2023-07-14 21:35:47,199 (trainer:732) INFO: 50epoch:train:6501-6600batch: iter_time=1.128e-04, forward_time=0.146, loss_ctc=68.099, loss_att=44.525, acc=0.732, loss=51.598, backward_time=1.027, grad_norm=116.086, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.069e-05, train_time=2.720 +[gpub001:0/64] 2023-07-14 21:37:20,083 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub001:0/64] 2023-07-14 21:37:38,383 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 21:37:41,825 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 21:37:41,825 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub001:0/64] 2023-07-14 21:37:41,831 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 21:41:34,057 (trainer:732) INFO: 50epoch:train:6601-6700batch: iter_time=1.243, forward_time=0.149, loss_ctc=75.228, loss_att=56.364, acc=0.705, loss=62.024, backward_time=1.043, grad_norm=131.714, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.068e-05, train_time=6.937 +[gpub001:0/64] 2023-07-14 21:43:51,597 (trainer:732) INFO: 50epoch:train:6701-6800batch: iter_time=1.228e-04, forward_time=0.146, loss_ctc=62.359, loss_att=46.440, acc=0.717, loss=51.216, backward_time=1.031, grad_norm=126.683, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.068e-05, train_time=2.751 +[gpub001:0/64] 2023-07-14 21:46:07,899 (trainer:732) INFO: 50epoch:train:6801-6900batch: iter_time=1.299e-04, forward_time=0.146, loss_ctc=65.735, loss_att=48.063, acc=0.725, loss=53.365, backward_time=1.029, grad_norm=134.386, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.067e-05, train_time=2.726 +[gpub001:0/64] 2023-07-14 21:48:23,594 (trainer:732) INFO: 50epoch:train:6901-7000batch: iter_time=1.278e-04, forward_time=0.145, loss_ctc=68.323, loss_att=49.027, acc=0.722, loss=54.815, backward_time=1.026, grad_norm=130.941, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.067e-05, train_time=2.714 +[gpub001:0/64] 2023-07-14 21:50:39,377 (trainer:732) INFO: 50epoch:train:7001-7100batch: iter_time=1.150e-04, forward_time=0.146, loss_ctc=70.496, loss_att=52.439, acc=0.716, loss=57.856, backward_time=1.029, grad_norm=129.853, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.066e-05, train_time=2.715 +[gpub001:0/64] 2023-07-14 21:52:55,578 (trainer:732) INFO: 50epoch:train:7101-7200batch: iter_time=1.273e-04, forward_time=0.147, loss_ctc=67.037, loss_att=51.153, acc=0.710, loss=55.918, backward_time=1.031, grad_norm=135.297, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.066e-05, train_time=2.724 +[gpub001:0/64] 2023-07-14 21:55:11,269 (trainer:732) INFO: 50epoch:train:7201-7300batch: iter_time=1.392e-04, forward_time=0.146, loss_ctc=64.625, loss_att=47.415, acc=0.716, loss=52.578, backward_time=1.028, grad_norm=138.346, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.065e-05, train_time=2.714 +[gpub001:0/64] 2023-07-14 21:57:27,032 (trainer:732) INFO: 50epoch:train:7301-7400batch: iter_time=1.153e-04, forward_time=0.146, loss_ctc=67.172, loss_att=45.670, acc=0.720, loss=52.120, backward_time=1.027, grad_norm=138.208, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.065e-05, train_time=2.715 +[gpub001:0/64] 2023-07-14 21:59:42,685 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub001:0/64] 2023-07-14 22:00:00,870 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 22:00:04,268 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 22:00:04,268 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub001:0/64] 2023-07-14 22:00:04,274 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 22:04:14,311 (trainer:732) INFO: 50epoch:train:7401-7500batch: iter_time=1.298, forward_time=0.174, loss_ctc=72.785, loss_att=54.934, acc=0.706, loss=60.289, backward_time=1.034, grad_norm=127.076, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.064e-05, train_time=8.145 +[gpub001:0/64] 2023-07-14 22:06:32,396 (trainer:732) INFO: 50epoch:train:7501-7600batch: iter_time=1.281e-04, forward_time=0.145, loss_ctc=75.267, loss_att=55.281, acc=0.706, loss=61.277, backward_time=1.033, grad_norm=127.032, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.064e-05, train_time=2.761 +[gpub001:0/64] 2023-07-14 22:08:49,298 (trainer:732) INFO: 50epoch:train:7601-7700batch: iter_time=1.148e-04, forward_time=0.146, loss_ctc=61.713, loss_att=43.430, acc=0.731, loss=48.915, backward_time=1.029, grad_norm=113.236, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.063e-05, train_time=2.738 +[gpub001:0/64] 2023-07-14 22:11:05,012 (trainer:732) INFO: 50epoch:train:7701-7800batch: iter_time=1.138e-04, forward_time=0.145, loss_ctc=64.435, loss_att=51.314, acc=0.720, loss=55.251, backward_time=1.025, grad_norm=137.487, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.063e-05, train_time=2.714 +[gpub001:0/64] 2023-07-14 22:13:20,529 (trainer:732) INFO: 50epoch:train:7801-7900batch: iter_time=1.071e-04, forward_time=0.145, loss_ctc=67.719, loss_att=45.921, acc=0.729, loss=52.460, backward_time=1.026, grad_norm=116.618, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.062e-05, train_time=2.710 +[gpub001:0/64] 2023-07-14 22:15:36,651 (trainer:732) INFO: 50epoch:train:7901-8000batch: iter_time=1.160e-04, forward_time=0.146, loss_ctc=74.668, loss_att=55.808, acc=0.718, loss=61.466, backward_time=1.030, grad_norm=138.995, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.062e-05, train_time=2.722 +[gpub001:0/64] 2023-07-14 22:17:52,426 (trainer:732) INFO: 50epoch:train:8001-8100batch: iter_time=1.146e-04, forward_time=0.146, loss_ctc=64.712, loss_att=49.154, acc=0.717, loss=53.822, backward_time=1.027, grad_norm=134.088, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.061e-05, train_time=2.715 +[gpub001:0/64] 2023-07-14 22:20:08,073 (trainer:732) INFO: 50epoch:train:8101-8200batch: iter_time=1.172e-04, forward_time=0.145, loss_ctc=62.415, loss_att=45.458, acc=0.725, loss=50.545, backward_time=1.026, grad_norm=104.346, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.061e-05, train_time=2.713 +[gpub001:0/64] 2023-07-14 22:22:24,409 (trainer:732) INFO: 50epoch:train:8201-8300batch: iter_time=1.217e-04, forward_time=0.144, loss_ctc=69.803, loss_att=47.319, acc=0.716, loss=54.064, backward_time=1.024, grad_norm=120.199, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.060e-05, train_time=2.726 +[gpub001:0/64] 2023-07-14 22:23:27,929 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub001:0/64] 2023-07-14 22:23:46,234 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 22:23:49,965 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 22:23:49,965 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub001:0/64] 2023-07-14 22:23:49,971 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 22:29:23,107 (trainer:732) INFO: 50epoch:train:8301-8400batch: iter_time=1.937, forward_time=0.171, loss_ctc=73.351, loss_att=54.696, acc=0.704, loss=60.292, backward_time=1.042, grad_norm=126.226, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=5.060e-05, train_time=8.373 +[gpub001:0/64] 2023-07-14 22:31:39,375 (trainer:732) INFO: 50epoch:train:8401-8500batch: iter_time=1.123e-04, forward_time=0.144, loss_ctc=65.360, loss_att=47.307, acc=0.724, loss=52.723, backward_time=1.028, grad_norm=106.196, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.059e-05, train_time=2.726 +[gpub001:0/64] 2023-07-14 22:33:56,198 (trainer:732) INFO: 50epoch:train:8501-8600batch: iter_time=1.121e-04, forward_time=0.145, loss_ctc=62.356, loss_att=47.082, acc=0.731, loss=51.664, backward_time=1.028, grad_norm=116.896, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.058e-05, train_time=2.736 +[gpub001:0/64] 2023-07-14 22:36:12,773 (trainer:732) INFO: 50epoch:train:8601-8700batch: iter_time=1.049e-04, forward_time=0.145, loss_ctc=69.928, loss_att=48.566, acc=0.721, loss=54.975, backward_time=1.028, grad_norm=123.106, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.058e-05, train_time=2.731 +[gpub001:0/64] 2023-07-14 22:38:28,734 (trainer:732) INFO: 50epoch:train:8701-8800batch: iter_time=1.093e-04, forward_time=0.145, loss_ctc=73.339, loss_att=55.181, acc=0.711, loss=60.628, backward_time=1.027, grad_norm=118.978, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.057e-05, train_time=2.719 +[gpub001:0/64] 2023-07-14 22:40:44,531 (trainer:732) INFO: 50epoch:train:8801-8900batch: iter_time=1.146e-04, forward_time=0.146, loss_ctc=63.033, loss_att=48.849, acc=0.707, loss=53.104, backward_time=1.028, grad_norm=132.965, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.057e-05, train_time=2.716 +[gpub001:0/64] 2023-07-14 22:42:59,903 (trainer:732) INFO: 50epoch:train:8901-9000batch: iter_time=1.334e-04, forward_time=0.145, loss_ctc=64.413, loss_att=46.307, acc=0.729, loss=51.739, backward_time=1.026, grad_norm=96.639, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.056e-05, train_time=2.707 +[gpub001:0/64] 2023-07-14 22:45:18,107 (trainer:732) INFO: 50epoch:train:9001-9100batch: iter_time=1.165e-04, forward_time=0.147, loss_ctc=69.114, loss_att=45.854, acc=0.719, loss=52.832, backward_time=1.032, grad_norm=140.806, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.056e-05, train_time=2.764 +[gpub001:0/64] 2023-07-14 22:46:49,602 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub001:0/64] 2023-07-14 22:47:07,559 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 22:47:10,970 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 22:47:10,970 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub001:0/64] 2023-07-14 22:47:11,050 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 22:52:32,878 (trainer:732) INFO: 50epoch:train:9101-9200batch: iter_time=2.016, forward_time=0.145, loss_ctc=79.028, loss_att=57.548, acc=0.699, loss=63.992, backward_time=1.040, grad_norm=136.426, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.055e-05, train_time=8.695 +[gpub001:0/64] 2023-07-14 22:54:49,816 (trainer:732) INFO: 50epoch:train:9201-9300batch: iter_time=1.038e-04, forward_time=0.145, loss_ctc=62.727, loss_att=47.586, acc=0.737, loss=52.129, backward_time=1.032, grad_norm=123.258, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.055e-05, train_time=2.739 +[gpub001:0/64] 2023-07-14 22:57:06,559 (trainer:732) INFO: 50epoch:train:9301-9400batch: iter_time=1.113e-04, forward_time=0.145, loss_ctc=64.256, loss_att=48.937, acc=0.734, loss=53.533, backward_time=1.029, grad_norm=116.732, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.054e-05, train_time=2.735 +[gpub001:0/64] 2023-07-14 22:59:22,627 (trainer:732) INFO: 50epoch:train:9401-9500batch: iter_time=1.020e-04, forward_time=0.144, loss_ctc=68.645, loss_att=49.152, acc=0.732, loss=55.000, backward_time=1.026, grad_norm=126.326, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.054e-05, train_time=2.721 +[gpub001:0/64] 2023-07-14 23:01:39,743 (trainer:732) INFO: 50epoch:train:9501-9600batch: iter_time=1.028e-04, forward_time=0.146, loss_ctc=71.460, loss_att=50.831, acc=0.729, loss=57.020, backward_time=1.030, grad_norm=125.820, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.053e-05, train_time=2.742 +[gpub001:0/64] 2023-07-14 23:03:56,039 (trainer:732) INFO: 50epoch:train:9601-9700batch: iter_time=1.072e-04, forward_time=0.146, loss_ctc=65.840, loss_att=52.027, acc=0.718, loss=56.171, backward_time=1.029, grad_norm=137.286, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.053e-05, train_time=2.726 +[gpub001:0/64] 2023-07-14 23:06:11,688 (trainer:732) INFO: 50epoch:train:9701-9800batch: iter_time=1.074e-04, forward_time=0.145, loss_ctc=65.045, loss_att=48.007, acc=0.726, loss=53.118, backward_time=1.026, grad_norm=116.113, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.052e-05, train_time=2.713 +[gpub001:0/64] 2023-07-14 23:08:27,117 (trainer:732) INFO: 50epoch:train:9801-9900batch: iter_time=1.110e-04, forward_time=0.144, loss_ctc=69.175, loss_att=45.588, acc=0.730, loss=52.664, backward_time=1.025, grad_norm=139.307, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.052e-05, train_time=2.708 +[gpub001:0/64] 2023-07-14 23:10:42,703 (trainer:732) INFO: 50epoch:train:9901-10000batch: iter_time=1.068e-04, forward_time=0.145, loss_ctc=72.841, loss_att=53.971, acc=0.715, loss=59.632, backward_time=1.025, grad_norm=123.765, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.051e-05, train_time=2.711 +[gpub001:0/64] 2023-07-14 23:24:09,245 (trainer:338) INFO: 50epoch results: [train] iter_time=0.179, forward_time=0.148, loss_ctc=68.583, loss_att=49.901, acc=0.720, loss=55.506, backward_time=1.032, grad_norm=127.995, clip=100.000, loss_scale=3.121e+32, optim_step_time=0.182, optim0_lr0=5.077e-05, train_time=3.329, time=4 hours, 37 minutes and 40.49 seconds, total_count=470000, gpu_max_cached_mem_GB=37.635, [valid] loss_ctc=42.429, cer_ctc=0.252, loss_att=37.607, acc=0.676, cer=0.417, wer=0.998, loss=39.054, time=7 minutes and 17.28 seconds, total_count=48070, gpu_max_cached_mem_GB=37.635, [att_plot] time=5 minutes and 52.99 seconds, total_count=0, gpu_max_cached_mem_GB=37.635 +[gpub001:0/64] 2023-07-14 23:24:24,757 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub001:0/64] 2023-07-14 23:24:24,796 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.acc": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.acc.ave_5best.till50epoch.pth +[gpub001:0/64] 2023-07-14 23:25:13,113 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.total_count": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.total_count.ave_5best.till50epoch.pth +[gpub001:0/64] 2023-07-14 23:25:53,416 (trainer:272) INFO: 51/60epoch started. Estimated time to finish: 2 days, 1 hour and 33 minutes +[gpub001:0/64] 2023-07-14 23:25:55,441 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub001:0/64] 2023-07-14 23:26:15,006 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 23:26:18,855 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 23:26:18,856 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub001:0/64] 2023-07-14 23:26:18,921 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 23:36:35,831 (trainer:732) INFO: 51epoch:train:1-100batch: iter_time=4.981, forward_time=0.181, loss_ctc=71.923, loss_att=55.595, acc=0.703, loss=60.494, backward_time=1.041, grad_norm=115.944, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=5.051e-05, train_time=12.824 +[gpub001:0/64] 2023-07-14 23:38:52,739 (trainer:732) INFO: 51epoch:train:101-200batch: iter_time=1.265e-04, forward_time=0.145, loss_ctc=79.235, loss_att=65.934, acc=0.698, loss=69.924, backward_time=1.031, grad_norm=121.600, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.050e-05, train_time=2.738 +[gpub001:0/64] 2023-07-14 23:41:08,953 (trainer:732) INFO: 51epoch:train:201-300batch: iter_time=1.312e-04, forward_time=0.145, loss_ctc=69.961, loss_att=49.731, acc=0.710, loss=55.800, backward_time=1.029, grad_norm=132.357, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.050e-05, train_time=2.724 +[gpub001:0/64] 2023-07-14 23:43:26,233 (trainer:732) INFO: 51epoch:train:301-400batch: iter_time=1.131e-04, forward_time=0.144, loss_ctc=64.152, loss_att=48.408, acc=0.691, loss=53.132, backward_time=1.026, grad_norm=128.225, clip=100.000, loss_scale=2.369e+32, optim_step_time=0.182, optim0_lr0=5.049e-05, train_time=2.745 +[gpub001:0/64] 2023-07-14 23:45:41,876 (trainer:732) INFO: 51epoch:train:401-500batch: iter_time=1.198e-04, forward_time=0.145, loss_ctc=72.180, loss_att=50.535, acc=0.705, loss=57.029, backward_time=1.028, grad_norm=153.483, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.049e-05, train_time=2.713 +[gpub001:0/64] 2023-07-14 23:48:00,509 (trainer:732) INFO: 51epoch:train:501-600batch: iter_time=1.228e-04, forward_time=0.146, loss_ctc=81.916, loss_att=67.146, acc=0.698, loss=71.577, backward_time=1.036, grad_norm=135.937, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.048e-05, train_time=2.772 +[gpub001:0/64] 2023-07-14 23:50:17,111 (trainer:732) INFO: 51epoch:train:601-700batch: iter_time=1.289e-04, forward_time=0.145, loss_ctc=62.275, loss_att=44.773, acc=0.726, loss=50.023, backward_time=1.029, grad_norm=104.898, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.048e-05, train_time=2.732 +[gpub001:0/64] 2023-07-14 23:52:33,852 (trainer:732) INFO: 51epoch:train:701-800batch: iter_time=1.278e-04, forward_time=0.145, loss_ctc=74.632, loss_att=56.174, acc=0.688, loss=61.711, backward_time=1.027, grad_norm=120.912, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.047e-05, train_time=2.735 +[gpub001:0/64] 2023-07-14 23:53:29,332 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub001:0/64] 2023-07-14 23:53:47,988 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 23:53:51,397 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 23:53:51,398 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub001:0/64] 2023-07-14 23:53:51,404 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 00:02:44,151 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub001:0/64] 2023-07-15 00:03:11,538 (trainer:732) INFO: 51epoch:train:801-900batch: iter_time=4.917, forward_time=0.176, loss_ctc=75.023, loss_att=61.940, acc=0.702, loss=65.865, backward_time=1.043, grad_norm=135.587, clip=100.000, loss_scale=2.914e+32, optim_step_time=0.183, optim0_lr0=5.047e-05, train_time=12.754 +[gpub001:0/64] 2023-07-15 00:05:28,614 (trainer:732) INFO: 51epoch:train:901-1000batch: iter_time=1.399e-04, forward_time=0.146, loss_ctc=77.022, loss_att=61.388, acc=0.701, loss=66.078, backward_time=1.031, grad_norm=116.891, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.046e-05, train_time=2.741 +[gpub001:0/64] 2023-07-15 00:07:45,430 (trainer:732) INFO: 51epoch:train:1001-1100batch: iter_time=1.294e-04, forward_time=0.145, loss_ctc=72.448, loss_att=57.061, acc=0.706, loss=61.677, backward_time=1.029, grad_norm=118.194, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.046e-05, train_time=2.736 +[gpub001:0/64] 2023-07-15 00:10:00,792 (trainer:732) INFO: 51epoch:train:1101-1200batch: iter_time=1.132e-04, forward_time=0.143, loss_ctc=63.641, loss_att=47.528, acc=0.704, loss=52.362, backward_time=1.025, grad_norm=122.929, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.045e-05, train_time=2.707 +[gpub001:0/64] 2023-07-15 00:12:16,409 (trainer:732) INFO: 51epoch:train:1201-1300batch: iter_time=1.234e-04, forward_time=0.144, loss_ctc=64.509, loss_att=47.066, acc=0.702, loss=52.299, backward_time=1.025, grad_norm=124.287, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.045e-05, train_time=2.712 +[gpub001:0/64] 2023-07-15 00:14:31,911 (trainer:732) INFO: 51epoch:train:1301-1400batch: iter_time=1.155e-04, forward_time=0.143, loss_ctc=81.052, loss_att=63.313, acc=0.697, loss=68.635, backward_time=1.025, grad_norm=161.228, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.044e-05, train_time=2.710 +[gpub001:0/64] 2023-07-15 00:16:47,763 (trainer:732) INFO: 51epoch:train:1401-1500batch: iter_time=1.052e-04, forward_time=0.144, loss_ctc=72.270, loss_att=53.340, acc=0.717, loss=59.019, backward_time=1.028, grad_norm=137.920, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.044e-05, train_time=2.717 +[gpub001:0/64] 2023-07-15 00:19:03,111 (trainer:732) INFO: 51epoch:train:1501-1600batch: iter_time=1.271e-04, forward_time=0.144, loss_ctc=70.162, loss_att=50.618, acc=0.706, loss=56.481, backward_time=1.025, grad_norm=170.597, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.043e-05, train_time=2.707 +[gpub001:0/64] 2023-07-15 00:20:39,237 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub001:0/64] 2023-07-15 00:20:57,151 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 00:21:00,575 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 00:21:00,575 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub001:0/64] 2023-07-15 00:21:00,582 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 00:24:56,757 (trainer:732) INFO: 51epoch:train:1601-1700batch: iter_time=1.325, forward_time=0.146, loss_ctc=74.706, loss_att=64.628, acc=0.698, loss=67.651, backward_time=1.039, grad_norm=142.799, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.043e-05, train_time=7.073 +[gpub001:0/64] 2023-07-15 00:27:14,907 (trainer:732) INFO: 51epoch:train:1701-1800batch: iter_time=1.209e-04, forward_time=0.146, loss_ctc=75.949, loss_att=63.359, acc=0.712, loss=67.136, backward_time=1.038, grad_norm=127.884, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.042e-05, train_time=2.763 +[gpub001:0/64] 2023-07-15 00:29:30,975 (trainer:732) INFO: 51epoch:train:1801-1900batch: iter_time=1.016e-04, forward_time=0.146, loss_ctc=69.913, loss_att=50.332, acc=0.719, loss=56.206, backward_time=1.031, grad_norm=110.939, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.041e-05, train_time=2.721 +[gpub001:0/64] 2023-07-15 00:31:46,677 (trainer:732) INFO: 51epoch:train:1901-2000batch: iter_time=1.133e-04, forward_time=0.145, loss_ctc=64.394, loss_att=48.413, acc=0.709, loss=53.208, backward_time=1.028, grad_norm=121.998, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.041e-05, train_time=2.714 +[gpub001:0/64] 2023-07-15 00:34:02,438 (trainer:732) INFO: 51epoch:train:2001-2100batch: iter_time=1.079e-04, forward_time=0.145, loss_ctc=66.797, loss_att=48.245, acc=0.712, loss=53.810, backward_time=1.029, grad_norm=124.842, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.040e-05, train_time=2.715 +[gpub001:0/64] 2023-07-15 00:36:19,042 (trainer:732) INFO: 51epoch:train:2101-2200batch: iter_time=1.069e-04, forward_time=0.146, loss_ctc=83.907, loss_att=64.979, acc=0.707, loss=70.657, backward_time=1.034, grad_norm=153.393, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.040e-05, train_time=2.732 +[gpub001:0/64] 2023-07-15 00:38:36,795 (trainer:732) INFO: 51epoch:train:2201-2300batch: iter_time=1.176e-04, forward_time=0.146, loss_ctc=64.525, loss_att=48.786, acc=0.723, loss=53.508, backward_time=1.030, grad_norm=103.031, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.039e-05, train_time=2.755 +[gpub001:0/64] 2023-07-15 00:40:52,543 (trainer:732) INFO: 51epoch:train:2301-2400batch: iter_time=1.146e-04, forward_time=0.145, loss_ctc=72.083, loss_att=51.081, acc=0.715, loss=57.381, backward_time=1.026, grad_norm=151.523, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.039e-05, train_time=2.715 +[gpub001:0/64] 2023-07-15 00:43:19,481 (trainer:732) INFO: 51epoch:train:2401-2500batch: iter_time=1.164e-04, forward_time=0.145, loss_ctc=70.723, loss_att=56.251, acc=0.711, loss=60.593, backward_time=1.041, grad_norm=144.872, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.038e-05, train_time=2.939 +[gpub001:0/64] 2023-07-15 00:43:21,090 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub001:0/64] 2023-07-15 00:43:39,291 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 00:43:42,732 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 00:43:42,733 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub001:0/64] 2023-07-15 00:43:42,739 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 00:49:10,741 (trainer:732) INFO: 51epoch:train:2501-2600batch: iter_time=1.293, forward_time=0.188, loss_ctc=72.326, loss_att=55.154, acc=0.706, loss=60.305, backward_time=1.054, grad_norm=150.639, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.038e-05, train_time=7.025 +[gpub001:0/64] 2023-07-15 00:51:36,074 (trainer:732) INFO: 51epoch:train:2601-2700batch: iter_time=1.215e-04, forward_time=0.149, loss_ctc=78.155, loss_att=65.045, acc=0.701, loss=68.978, backward_time=1.038, grad_norm=133.517, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.037e-05, train_time=2.907 +[gpub001:0/64] 2023-07-15 00:54:08,484 (trainer:732) INFO: 51epoch:train:2701-2800batch: iter_time=1.115e-04, forward_time=0.146, loss_ctc=68.157, loss_att=49.050, acc=0.717, loss=54.782, backward_time=1.051, grad_norm=120.581, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.037e-05, train_time=3.048 +[gpub001:0/64] 2023-07-15 00:56:50,168 (trainer:732) INFO: 51epoch:train:2801-2900batch: iter_time=1.083e-04, forward_time=0.145, loss_ctc=61.833, loss_att=46.996, acc=0.700, loss=51.447, backward_time=1.048, grad_norm=110.089, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.036e-05, train_time=3.233 +[gpub001:0/64] 2023-07-15 00:59:24,641 (trainer:732) INFO: 51epoch:train:2901-3000batch: iter_time=1.133e-04, forward_time=0.145, loss_ctc=74.094, loss_att=49.328, acc=0.708, loss=56.758, backward_time=1.046, grad_norm=137.040, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.036e-05, train_time=3.089 +[gpub001:0/64] 2023-07-15 01:01:53,489 (trainer:732) INFO: 51epoch:train:3001-3100batch: iter_time=1.135e-04, forward_time=0.145, loss_ctc=81.243, loss_att=67.309, acc=0.698, loss=71.489, backward_time=1.042, grad_norm=131.285, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.035e-05, train_time=2.977 +[gpub001:0/64] 2023-07-15 01:04:27,494 (trainer:732) INFO: 51epoch:train:3101-3200batch: iter_time=1.194e-04, forward_time=0.145, loss_ctc=61.668, loss_att=44.652, acc=0.728, loss=49.757, backward_time=1.047, grad_norm=103.749, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.035e-05, train_time=3.080 +[gpub001:0/64] 2023-07-15 01:06:55,343 (trainer:732) INFO: 51epoch:train:3201-3300batch: iter_time=1.166e-04, forward_time=0.145, loss_ctc=72.274, loss_att=53.996, acc=0.696, loss=59.479, backward_time=1.037, grad_norm=125.051, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.034e-05, train_time=2.957 +[gpub001:0/64] 2023-07-15 01:08:01,140 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub001:0/64] 2023-07-15 01:08:19,465 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 01:08:22,895 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 01:08:22,895 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub001:0/64] 2023-07-15 01:08:22,902 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 01:13:05,704 (trainer:732) INFO: 51epoch:train:3301-3400batch: iter_time=2.009, forward_time=0.148, loss_ctc=75.223, loss_att=63.691, acc=0.710, loss=67.150, backward_time=1.051, grad_norm=142.783, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.034e-05, train_time=7.407 +[gpub001:0/64] 2023-07-15 01:15:22,767 (trainer:732) INFO: 51epoch:train:3401-3500batch: iter_time=1.216e-04, forward_time=0.147, loss_ctc=74.723, loss_att=61.360, acc=0.699, loss=65.369, backward_time=1.033, grad_norm=144.345, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.033e-05, train_time=2.741 +[gpub001:0/64] 2023-07-15 01:17:38,745 (trainer:732) INFO: 51epoch:train:3501-3600batch: iter_time=1.134e-04, forward_time=0.146, loss_ctc=65.675, loss_att=49.439, acc=0.718, loss=54.310, backward_time=1.028, grad_norm=123.402, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.033e-05, train_time=2.719 +[gpub001:0/64] 2023-07-15 01:19:54,187 (trainer:732) INFO: 51epoch:train:3601-3700batch: iter_time=1.316e-04, forward_time=0.145, loss_ctc=60.673, loss_att=46.051, acc=0.693, loss=50.437, backward_time=1.026, grad_norm=100.622, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.032e-05, train_time=2.709 +[gpub001:0/64] 2023-07-15 01:22:10,043 (trainer:732) INFO: 51epoch:train:3701-3800batch: iter_time=1.174e-04, forward_time=0.146, loss_ctc=77.507, loss_att=54.217, acc=0.713, loss=61.204, backward_time=1.029, grad_norm=155.600, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.032e-05, train_time=2.717 +[gpub001:0/64] 2023-07-15 01:24:26,099 (trainer:732) INFO: 51epoch:train:3801-3900batch: iter_time=1.260e-04, forward_time=0.147, loss_ctc=77.820, loss_att=62.731, acc=0.702, loss=67.257, backward_time=1.031, grad_norm=134.212, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.031e-05, train_time=2.721 +[gpub001:0/64] 2023-07-15 01:26:42,048 (trainer:732) INFO: 51epoch:train:3901-4000batch: iter_time=1.229e-04, forward_time=0.147, loss_ctc=64.291, loss_att=46.914, acc=0.712, loss=52.127, backward_time=1.028, grad_norm=125.741, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.031e-05, train_time=2.719 +[gpub001:0/64] 2023-07-15 01:28:57,962 (trainer:732) INFO: 51epoch:train:4001-4100batch: iter_time=1.251e-04, forward_time=0.147, loss_ctc=70.662, loss_att=55.242, acc=0.705, loss=59.868, backward_time=1.027, grad_norm=125.153, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.030e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 01:30:40,640 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub001:0/64] 2023-07-15 01:30:58,549 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 01:31:02,016 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 01:31:02,017 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub001:0/64] 2023-07-15 01:31:02,023 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 01:35:33,364 (trainer:732) INFO: 51epoch:train:4101-4200batch: iter_time=1.388, forward_time=0.180, loss_ctc=68.988, loss_att=53.972, acc=0.717, loss=58.477, backward_time=1.042, grad_norm=120.989, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=5.030e-05, train_time=7.908 +[gpub001:0/64] 2023-07-15 01:37:51,985 (trainer:732) INFO: 51epoch:train:4201-4300batch: iter_time=1.179e-04, forward_time=0.147, loss_ctc=74.833, loss_att=60.854, acc=0.715, loss=65.048, backward_time=1.037, grad_norm=133.761, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.029e-05, train_time=2.772 +[gpub001:0/64] 2023-07-15 01:40:08,097 (trainer:732) INFO: 51epoch:train:4301-4400batch: iter_time=1.075e-04, forward_time=0.146, loss_ctc=69.438, loss_att=52.734, acc=0.721, loss=57.745, backward_time=1.029, grad_norm=123.930, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.029e-05, train_time=2.722 +[gpub001:0/64] 2023-07-15 01:42:25,511 (trainer:732) INFO: 51epoch:train:4401-4500batch: iter_time=1.121e-04, forward_time=0.155, loss_ctc=63.839, loss_att=48.472, acc=0.713, loss=53.082, backward_time=1.032, grad_norm=131.811, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.028e-05, train_time=2.748 +[gpub001:0/64] 2023-07-15 01:44:40,951 (trainer:732) INFO: 51epoch:train:4501-4600batch: iter_time=1.110e-04, forward_time=0.144, loss_ctc=66.389, loss_att=47.906, acc=0.715, loss=53.451, backward_time=1.026, grad_norm=153.232, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.028e-05, train_time=2.709 +[gpub001:0/64] 2023-07-15 01:46:57,089 (trainer:732) INFO: 51epoch:train:4601-4700batch: iter_time=1.045e-04, forward_time=0.145, loss_ctc=83.957, loss_att=64.618, acc=0.707, loss=70.420, backward_time=1.030, grad_norm=166.949, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.027e-05, train_time=2.723 +[gpub001:0/64] 2023-07-15 01:49:12,741 (trainer:732) INFO: 51epoch:train:4701-4800batch: iter_time=9.881e-05, forward_time=0.145, loss_ctc=64.633, loss_att=48.052, acc=0.725, loss=53.027, backward_time=1.029, grad_norm=122.350, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.027e-05, train_time=2.713 +[gpub001:0/64] 2023-07-15 01:51:28,305 (trainer:732) INFO: 51epoch:train:4801-4900batch: iter_time=1.049e-04, forward_time=0.145, loss_ctc=69.030, loss_att=50.159, acc=0.722, loss=55.820, backward_time=1.027, grad_norm=117.971, clip=100.000, loss_scale=1.947e+32, optim_step_time=0.182, optim0_lr0=5.026e-05, train_time=2.711 +[gpub001:0/64] 2023-07-15 01:53:45,279 (trainer:732) INFO: 51epoch:train:4901-5000batch: iter_time=1.268e-04, forward_time=0.153, loss_ctc=70.500, loss_att=57.120, acc=0.709, loss=61.134, backward_time=1.030, grad_norm=119.144, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.026e-05, train_time=2.739 +[gpub001:0/64] 2023-07-15 01:53:49,960 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub001:0/64] 2023-07-15 01:54:07,620 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 01:54:11,021 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 01:54:11,021 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub001:0/64] 2023-07-15 01:54:11,028 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 02:00:36,494 (trainer:732) INFO: 51epoch:train:5001-5100batch: iter_time=1.321, forward_time=0.197, loss_ctc=71.796, loss_att=54.624, acc=0.720, loss=59.775, backward_time=1.042, grad_norm=142.285, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.025e-05, train_time=8.225 +[gpub001:0/64] 2023-07-15 02:02:53,486 (trainer:732) INFO: 51epoch:train:5101-5200batch: iter_time=1.192e-04, forward_time=0.148, loss_ctc=77.425, loss_att=64.117, acc=0.711, loss=68.109, backward_time=1.031, grad_norm=121.077, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.025e-05, train_time=2.740 +[gpub001:0/64] 2023-07-15 02:05:14,153 (trainer:732) INFO: 51epoch:train:5201-5300batch: iter_time=2.312e-04, forward_time=0.185, loss_ctc=68.549, loss_att=49.844, acc=0.725, loss=55.455, backward_time=1.034, grad_norm=126.463, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.024e-05, train_time=2.813 +[gpub001:0/64] 2023-07-15 02:07:30,780 (trainer:732) INFO: 51epoch:train:5301-5400batch: iter_time=1.146e-04, forward_time=0.146, loss_ctc=61.261, loss_att=47.446, acc=0.706, loss=51.590, backward_time=1.030, grad_norm=122.414, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.024e-05, train_time=2.732 +[gpub001:0/64] 2023-07-15 02:09:46,510 (trainer:732) INFO: 51epoch:train:5401-5500batch: iter_time=1.230e-04, forward_time=0.145, loss_ctc=71.938, loss_att=49.005, acc=0.717, loss=55.885, backward_time=1.027, grad_norm=145.363, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.023e-05, train_time=2.714 +[gpub001:0/64] 2023-07-15 02:12:02,889 (trainer:732) INFO: 51epoch:train:5501-5600batch: iter_time=1.216e-04, forward_time=0.146, loss_ctc=80.684, loss_att=65.569, acc=0.713, loss=70.103, backward_time=1.031, grad_norm=154.669, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.023e-05, train_time=2.727 +[gpub001:0/64] 2023-07-15 02:14:18,639 (trainer:732) INFO: 51epoch:train:5601-5700batch: iter_time=1.232e-04, forward_time=0.145, loss_ctc=60.971, loss_att=43.444, acc=0.734, loss=48.702, backward_time=1.028, grad_norm=110.491, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.022e-05, train_time=2.715 +[gpub001:0/64] 2023-07-15 02:16:42,195 (trainer:732) INFO: 51epoch:train:5701-5800batch: iter_time=1.226e-04, forward_time=0.206, loss_ctc=71.178, loss_att=52.973, acc=0.713, loss=58.434, backward_time=1.033, grad_norm=126.966, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=5.022e-05, train_time=2.870 +[gpub001:0/64] 2023-07-15 02:17:48,783 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub001:0/64] 2023-07-15 02:18:06,852 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 02:18:10,346 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 02:18:10,346 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub001:0/64] 2023-07-15 02:18:10,352 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 02:24:23,285 (trainer:732) INFO: 51epoch:train:5801-5900batch: iter_time=3.167, forward_time=0.193, loss_ctc=73.954, loss_att=58.719, acc=0.720, loss=63.289, backward_time=1.046, grad_norm=120.270, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.021e-05, train_time=9.221 +[gpub001:0/64] 2023-07-15 02:26:42,799 (trainer:732) INFO: 51epoch:train:5901-6000batch: iter_time=1.365e-04, forward_time=0.148, loss_ctc=77.878, loss_att=59.660, acc=0.724, loss=65.125, backward_time=1.031, grad_norm=137.273, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.021e-05, train_time=2.791 +[gpub001:0/64] 2023-07-15 02:29:01,054 (trainer:732) INFO: 51epoch:train:6001-6100batch: iter_time=1.376e-04, forward_time=0.147, loss_ctc=71.477, loss_att=57.056, acc=0.717, loss=61.382, backward_time=1.032, grad_norm=132.136, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.020e-05, train_time=2.765 +[gpub001:0/64] 2023-07-15 02:31:22,810 (trainer:732) INFO: 51epoch:train:6101-6200batch: iter_time=1.325e-04, forward_time=0.145, loss_ctc=63.470, loss_att=47.131, acc=0.713, loss=52.033, backward_time=1.043, grad_norm=145.338, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.020e-05, train_time=2.835 +[gpub001:0/64] 2023-07-15 02:33:49,092 (trainer:732) INFO: 51epoch:train:6201-6300batch: iter_time=1.456e-04, forward_time=0.146, loss_ctc=63.174, loss_att=46.054, acc=0.716, loss=51.190, backward_time=1.040, grad_norm=140.173, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.019e-05, train_time=2.925 +[gpub001:0/64] 2023-07-15 02:36:09,198 (trainer:732) INFO: 51epoch:train:6301-6400batch: iter_time=9.504e-05, forward_time=0.145, loss_ctc=81.068, loss_att=60.989, acc=0.711, loss=67.012, backward_time=1.039, grad_norm=146.913, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.019e-05, train_time=2.802 +[gpub001:0/64] 2023-07-15 02:38:28,073 (trainer:732) INFO: 51epoch:train:6401-6500batch: iter_time=9.582e-05, forward_time=0.145, loss_ctc=71.425, loss_att=53.746, acc=0.727, loss=59.050, backward_time=1.032, grad_norm=128.568, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.018e-05, train_time=2.777 +[gpub001:0/64] 2023-07-15 02:40:43,948 (trainer:732) INFO: 51epoch:train:6501-6600batch: iter_time=1.048e-04, forward_time=0.144, loss_ctc=67.003, loss_att=49.566, acc=0.718, loss=54.797, backward_time=1.026, grad_norm=141.281, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.018e-05, train_time=2.717 +[gpub001:0/64] 2023-07-15 02:42:32,538 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub001:0/64] 2023-07-15 02:42:50,762 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 02:42:54,186 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 02:42:54,186 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub001:0/64] 2023-07-15 02:42:54,192 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 02:47:36,979 (trainer:732) INFO: 51epoch:train:6601-6700batch: iter_time=1.585, forward_time=0.162, loss_ctc=70.032, loss_att=58.956, acc=0.708, loss=62.279, backward_time=1.037, grad_norm=139.517, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.017e-05, train_time=8.260 +[gpub001:0/64] 2023-07-15 02:49:54,229 (trainer:732) INFO: 51epoch:train:6701-6800batch: iter_time=1.289e-04, forward_time=0.147, loss_ctc=75.545, loss_att=58.495, acc=0.711, loss=63.610, backward_time=1.033, grad_norm=141.681, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.017e-05, train_time=2.745 +[gpub001:0/64] 2023-07-15 02:52:10,761 (trainer:732) INFO: 51epoch:train:6801-6900batch: iter_time=1.144e-04, forward_time=0.145, loss_ctc=74.367, loss_att=61.655, acc=0.708, loss=65.468, backward_time=1.029, grad_norm=128.279, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.016e-05, train_time=2.730 +[gpub001:0/64] 2023-07-15 02:54:26,812 (trainer:732) INFO: 51epoch:train:6901-7000batch: iter_time=1.238e-04, forward_time=0.145, loss_ctc=65.571, loss_att=49.989, acc=0.708, loss=54.664, backward_time=1.028, grad_norm=112.823, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.016e-05, train_time=2.721 +[gpub001:0/64] 2023-07-15 02:56:42,153 (trainer:732) INFO: 51epoch:train:7001-7100batch: iter_time=1.037e-04, forward_time=0.144, loss_ctc=56.492, loss_att=43.028, acc=0.701, loss=47.067, backward_time=1.025, grad_norm=116.572, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.015e-05, train_time=2.707 +[gpub001:0/64] 2023-07-15 02:58:58,134 (trainer:732) INFO: 51epoch:train:7101-7200batch: iter_time=1.173e-04, forward_time=0.145, loss_ctc=84.088, loss_att=59.885, acc=0.716, loss=67.146, backward_time=1.030, grad_norm=159.957, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.015e-05, train_time=2.719 +[gpub001:0/64] 2023-07-15 03:01:13,992 (trainer:732) INFO: 51epoch:train:7201-7300batch: iter_time=1.333e-04, forward_time=0.145, loss_ctc=72.488, loss_att=55.906, acc=0.708, loss=60.881, backward_time=1.029, grad_norm=126.000, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.014e-05, train_time=2.717 +[gpub001:0/64] 2023-07-15 03:03:29,592 (trainer:732) INFO: 51epoch:train:7301-7400batch: iter_time=1.097e-04, forward_time=0.144, loss_ctc=67.910, loss_att=51.640, acc=0.710, loss=56.521, backward_time=1.028, grad_norm=112.939, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.014e-05, train_time=2.712 +[gpub001:0/64] 2023-07-15 03:05:45,812 (trainer:732) INFO: 51epoch:train:7401-7500batch: iter_time=9.587e-05, forward_time=0.146, loss_ctc=69.895, loss_att=58.438, acc=0.703, loss=61.875, backward_time=1.030, grad_norm=112.497, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.013e-05, train_time=2.724 +[gpub001:0/64] 2023-07-15 03:06:00,836 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub001:0/64] 2023-07-15 03:06:19,136 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 03:06:22,592 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 03:06:22,593 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub001:0/64] 2023-07-15 03:06:22,599 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 03:12:16,439 (trainer:732) INFO: 51epoch:train:7501-7600batch: iter_time=2.393, forward_time=0.145, loss_ctc=71.969, loss_att=54.560, acc=0.721, loss=59.782, backward_time=1.046, grad_norm=133.083, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.013e-05, train_time=7.812 +[gpub001:0/64] 2023-07-15 03:14:33,471 (trainer:732) INFO: 51epoch:train:7601-7700batch: iter_time=1.093e-04, forward_time=0.147, loss_ctc=76.969, loss_att=62.660, acc=0.719, loss=66.953, backward_time=1.033, grad_norm=136.541, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.012e-05, train_time=2.740 +[gpub001:0/64] 2023-07-15 03:16:50,003 (trainer:732) INFO: 51epoch:train:7701-7800batch: iter_time=1.146e-04, forward_time=0.146, loss_ctc=67.626, loss_att=48.666, acc=0.727, loss=54.354, backward_time=1.031, grad_norm=124.562, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.012e-05, train_time=2.730 +[gpub001:0/64] 2023-07-15 03:19:06,142 (trainer:732) INFO: 51epoch:train:7801-7900batch: iter_time=1.138e-04, forward_time=0.146, loss_ctc=61.458, loss_att=47.781, acc=0.708, loss=51.884, backward_time=1.029, grad_norm=132.908, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.011e-05, train_time=2.723 +[gpub001:0/64] 2023-07-15 03:21:21,748 (trainer:732) INFO: 51epoch:train:7901-8000batch: iter_time=1.118e-04, forward_time=0.144, loss_ctc=73.377, loss_att=49.099, acc=0.720, loss=56.382, backward_time=1.026, grad_norm=145.668, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.011e-05, train_time=2.712 +[gpub001:0/64] 2023-07-15 03:23:41,893 (trainer:732) INFO: 51epoch:train:8001-8100batch: iter_time=1.181e-04, forward_time=0.146, loss_ctc=80.253, loss_att=65.949, acc=0.710, loss=70.240, backward_time=1.043, grad_norm=118.794, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.010e-05, train_time=2.803 +[gpub001:0/64] 2023-07-15 03:25:57,929 (trainer:732) INFO: 51epoch:train:8101-8200batch: iter_time=1.161e-04, forward_time=0.146, loss_ctc=59.833, loss_att=43.945, acc=0.737, loss=48.712, backward_time=1.028, grad_norm=97.269, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.010e-05, train_time=2.720 +[gpub001:0/64] 2023-07-15 03:28:22,509 (trainer:732) INFO: 51epoch:train:8201-8300batch: iter_time=4.203e-04, forward_time=0.206, loss_ctc=70.476, loss_att=52.382, acc=0.712, loss=57.810, backward_time=1.036, grad_norm=124.276, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.009e-05, train_time=2.891 +[gpub001:0/64] 2023-07-15 03:29:12,660 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub001:0/64] 2023-07-15 03:29:30,661 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 03:29:34,357 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 03:29:34,357 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub001:0/64] 2023-07-15 03:29:34,364 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 03:34:24,246 (trainer:732) INFO: 51epoch:train:8301-8400batch: iter_time=1.383, forward_time=0.183, loss_ctc=74.757, loss_att=62.043, acc=0.719, loss=65.857, backward_time=1.043, grad_norm=136.488, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.009e-05, train_time=7.234 +[gpub001:0/64] 2023-07-15 03:36:41,508 (trainer:732) INFO: 51epoch:train:8401-8500batch: iter_time=9.487e-05, forward_time=0.147, loss_ctc=73.152, loss_att=60.272, acc=0.703, loss=64.136, backward_time=1.032, grad_norm=130.904, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.008e-05, train_time=2.745 +[gpub001:0/64] 2023-07-15 03:38:58,154 (trainer:732) INFO: 51epoch:train:8501-8600batch: iter_time=1.035e-04, forward_time=0.145, loss_ctc=66.048, loss_att=49.450, acc=0.717, loss=54.429, backward_time=1.029, grad_norm=115.954, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.008e-05, train_time=2.733 +[gpub001:0/64] 2023-07-15 03:41:13,765 (trainer:732) INFO: 51epoch:train:8601-8700batch: iter_time=9.860e-05, forward_time=0.143, loss_ctc=60.580, loss_att=45.944, acc=0.699, loss=50.334, backward_time=1.024, grad_norm=132.617, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.007e-05, train_time=2.712 +[gpub001:0/64] 2023-07-15 03:43:30,063 (trainer:732) INFO: 51epoch:train:8701-8800batch: iter_time=1.299e-04, forward_time=0.147, loss_ctc=75.928, loss_att=53.765, acc=0.716, loss=60.414, backward_time=1.030, grad_norm=155.068, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.006e-05, train_time=2.726 +[gpub001:0/64] 2023-07-15 03:45:21,567 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub001:0/64] 2023-07-15 03:45:45,981 (trainer:732) INFO: 51epoch:train:8801-8900batch: iter_time=1.266e-04, forward_time=0.147, loss_ctc=77.868, loss_att=61.551, acc=0.707, loss=66.446, backward_time=1.029, grad_norm=134.524, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.006e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 03:48:03,191 (trainer:732) INFO: 51epoch:train:8901-9000batch: iter_time=1.297e-04, forward_time=0.146, loss_ctc=64.524, loss_att=46.573, acc=0.715, loss=51.958, backward_time=1.029, grad_norm=138.340, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.006e-05, train_time=2.744 +[gpub001:0/64] 2023-07-15 03:50:20,560 (trainer:732) INFO: 51epoch:train:9001-9100batch: iter_time=1.038e-04, forward_time=0.146, loss_ctc=70.040, loss_att=54.802, acc=0.706, loss=59.373, backward_time=1.032, grad_norm=151.464, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.005e-05, train_time=2.747 +[gpub001:0/64] 2023-07-15 03:51:53,225 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub001:0/64] 2023-07-15 03:52:11,289 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 03:52:14,696 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 03:52:14,696 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub001:0/64] 2023-07-15 03:52:14,703 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 03:57:27,451 (trainer:732) INFO: 51epoch:train:9101-9200batch: iter_time=1.306, forward_time=0.144, loss_ctc=65.711, loss_att=50.921, acc=0.714, loss=55.358, backward_time=1.041, grad_norm=118.734, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.005e-05, train_time=8.538 +[gpub001:0/64] 2023-07-15 03:59:49,919 (trainer:732) INFO: 51epoch:train:9201-9300batch: iter_time=1.085e-04, forward_time=0.145, loss_ctc=75.976, loss_att=57.729, acc=0.724, loss=63.203, backward_time=1.042, grad_norm=148.716, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.004e-05, train_time=2.849 +[gpub001:0/64] 2023-07-15 04:02:07,659 (trainer:732) INFO: 51epoch:train:9301-9400batch: iter_time=1.137e-04, forward_time=0.148, loss_ctc=74.558, loss_att=59.518, acc=0.719, loss=64.030, backward_time=1.034, grad_norm=124.685, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.003e-05, train_time=2.755 +[gpub001:0/64] 2023-07-15 04:04:27,052 (trainer:732) INFO: 51epoch:train:9401-9500batch: iter_time=1.244e-04, forward_time=0.146, loss_ctc=64.717, loss_att=49.875, acc=0.718, loss=54.328, backward_time=1.038, grad_norm=132.071, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.003e-05, train_time=2.788 +[gpub001:0/64] 2023-07-15 04:06:45,446 (trainer:732) INFO: 51epoch:train:9501-9600batch: iter_time=9.616e-05, forward_time=0.145, loss_ctc=55.849, loss_att=42.233, acc=0.712, loss=46.317, backward_time=1.030, grad_norm=119.204, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.002e-05, train_time=2.768 +[gpub001:0/64] 2023-07-15 04:09:01,853 (trainer:732) INFO: 51epoch:train:9601-9700batch: iter_time=1.058e-04, forward_time=0.147, loss_ctc=82.299, loss_att=58.433, acc=0.727, loss=65.593, backward_time=1.032, grad_norm=158.815, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.002e-05, train_time=2.728 +[gpub001:0/64] 2023-07-15 04:11:21,471 (trainer:732) INFO: 51epoch:train:9701-9800batch: iter_time=1.045e-04, forward_time=0.146, loss_ctc=73.290, loss_att=56.427, acc=0.712, loss=61.486, backward_time=1.036, grad_norm=130.108, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.001e-05, train_time=2.792 +[gpub001:0/64] 2023-07-15 04:13:37,214 (trainer:732) INFO: 51epoch:train:9801-9900batch: iter_time=1.051e-04, forward_time=0.145, loss_ctc=67.601, loss_att=50.730, acc=0.720, loss=55.791, backward_time=1.028, grad_norm=117.358, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.001e-05, train_time=2.715 +[gpub001:0/64] 2023-07-15 04:15:53,312 (trainer:732) INFO: 51epoch:train:9901-10000batch: iter_time=9.479e-05, forward_time=0.147, loss_ctc=68.884, loss_att=56.769, acc=0.722, loss=60.403, backward_time=1.029, grad_norm=111.830, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.000e-05, train_time=2.722 +[gpub001:0/64] 2023-07-15 04:28:17,825 (trainer:338) INFO: 51epoch results: [train] iter_time=0.271, forward_time=0.150, loss_ctc=70.867, loss_att=54.317, acc=0.712, loss=59.282, backward_time=1.033, grad_norm=130.980, clip=100.000, loss_scale=2.538e+32, optim_step_time=0.182, optim0_lr0=5.025e-05, train_time=3.480, time=4 hours, 50 minutes and 9.54 seconds, total_count=480000, gpu_max_cached_mem_GB=37.635, [valid] loss_ctc=42.872, cer_ctc=0.251, loss_att=38.738, acc=0.678, cer=0.404, wer=0.996, loss=39.978, time=6 minutes and 25.08 seconds, total_count=49082, gpu_max_cached_mem_GB=37.635, [att_plot] time=5 minutes and 49.35 seconds, total_count=0, gpu_max_cached_mem_GB=37.635 +[gpub001:0/64] 2023-07-15 04:28:33,383 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub001:0/64] 2023-07-15 04:28:33,396 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/46epoch.pth +[gpub001:0/64] 2023-07-15 04:28:33,396 (trainer:272) INFO: 52/60epoch started. Estimated time to finish: 1 day, 20 hours and 51 minutes +[gpub001:0/64] 2023-07-15 04:28:33,399 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub001:0/64] 2023-07-15 04:28:51,055 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 04:28:54,528 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 04:28:54,528 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub001:0/64] 2023-07-15 04:28:54,534 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 04:35:34,681 (trainer:732) INFO: 52epoch:train:1-100batch: iter_time=2.800, forward_time=0.165, loss_ctc=69.950, loss_att=52.193, acc=0.709, loss=57.520, backward_time=1.039, grad_norm=115.030, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.000e-05, train_time=8.425 +[gpub001:0/64] 2023-07-15 04:37:51,420 (trainer:732) INFO: 52epoch:train:101-200batch: iter_time=1.270e-04, forward_time=0.146, loss_ctc=69.058, loss_att=52.313, acc=0.715, loss=57.337, backward_time=1.032, grad_norm=143.837, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.999e-05, train_time=2.735 +[gpub001:0/64] 2023-07-15 04:40:09,108 (trainer:732) INFO: 52epoch:train:201-300batch: iter_time=1.145e-04, forward_time=0.144, loss_ctc=69.512, loss_att=51.083, acc=0.718, loss=56.611, backward_time=1.032, grad_norm=136.816, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.999e-05, train_time=2.754 +[gpub001:0/64] 2023-07-15 04:42:29,211 (trainer:732) INFO: 52epoch:train:301-400batch: iter_time=1.408e-04, forward_time=0.146, loss_ctc=77.821, loss_att=61.168, acc=0.703, loss=66.164, backward_time=1.035, grad_norm=151.115, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.998e-05, train_time=2.802 +[gpub001:0/64] 2023-07-15 04:44:49,281 (trainer:732) INFO: 52epoch:train:401-500batch: iter_time=1.053e-04, forward_time=0.144, loss_ctc=59.969, loss_att=45.427, acc=0.721, loss=49.790, backward_time=1.032, grad_norm=126.820, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.998e-05, train_time=2.801 +[gpub001:0/64] 2023-07-15 04:47:08,640 (trainer:732) INFO: 52epoch:train:501-600batch: iter_time=1.146e-04, forward_time=0.145, loss_ctc=61.323, loss_att=48.328, acc=0.726, loss=52.227, backward_time=1.037, grad_norm=125.310, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.997e-05, train_time=2.787 +[gpub001:0/64] 2023-07-15 04:49:31,134 (trainer:732) INFO: 52epoch:train:601-700batch: iter_time=1.142e-04, forward_time=0.146, loss_ctc=75.112, loss_att=63.431, acc=0.711, loss=66.935, backward_time=1.054, grad_norm=142.319, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.997e-05, train_time=2.850 +[gpub001:0/64] 2023-07-15 04:51:49,592 (trainer:732) INFO: 52epoch:train:701-800batch: iter_time=1.157e-04, forward_time=0.146, loss_ctc=75.882, loss_att=61.266, acc=0.716, loss=65.651, backward_time=1.033, grad_norm=112.293, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.996e-05, train_time=2.769 +[gpub001:0/64] 2023-07-15 04:52:41,328 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub001:0/64] 2023-07-15 04:52:59,420 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 04:53:02,825 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 04:53:02,825 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub001:0/64] 2023-07-15 04:53:02,831 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 04:57:23,419 (trainer:732) INFO: 52epoch:train:801-900batch: iter_time=1.338, forward_time=0.237, loss_ctc=68.461, loss_att=51.350, acc=0.709, loss=56.483, backward_time=1.050, grad_norm=146.068, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=4.996e-05, train_time=6.676 +[gpub001:0/64] 2023-07-15 04:59:41,599 (trainer:732) INFO: 52epoch:train:901-1000batch: iter_time=1.176e-04, forward_time=0.148, loss_ctc=69.156, loss_att=58.040, acc=0.702, loss=61.375, backward_time=1.029, grad_norm=139.715, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.996e-05, train_time=2.764 +[gpub001:0/64] 2023-07-15 05:01:57,285 (trainer:732) INFO: 52epoch:train:1001-1100batch: iter_time=1.241e-04, forward_time=0.146, loss_ctc=71.143, loss_att=50.487, acc=0.721, loss=56.684, backward_time=1.029, grad_norm=117.836, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.995e-05, train_time=2.713 +[gpub001:0/64] 2023-07-15 05:04:13,647 (trainer:732) INFO: 52epoch:train:1101-1200batch: iter_time=1.046e-04, forward_time=0.147, loss_ctc=74.174, loss_att=57.692, acc=0.712, loss=62.637, backward_time=1.030, grad_norm=138.130, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.995e-05, train_time=2.727 +[gpub001:0/64] 2023-07-15 05:06:29,733 (trainer:732) INFO: 52epoch:train:1201-1300batch: iter_time=1.114e-04, forward_time=0.147, loss_ctc=57.362, loss_att=42.969, acc=0.723, loss=47.287, backward_time=1.029, grad_norm=118.350, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.994e-05, train_time=2.721 +[gpub001:0/64] 2023-07-15 05:08:45,836 (trainer:732) INFO: 52epoch:train:1301-1400batch: iter_time=1.205e-04, forward_time=0.149, loss_ctc=65.213, loss_att=52.131, acc=0.732, loss=56.055, backward_time=1.030, grad_norm=140.121, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.994e-05, train_time=2.722 +[gpub001:0/64] 2023-07-15 05:11:02,183 (trainer:732) INFO: 52epoch:train:1401-1500batch: iter_time=1.198e-04, forward_time=0.149, loss_ctc=76.107, loss_att=62.021, acc=0.715, loss=66.247, backward_time=1.032, grad_norm=127.656, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.993e-05, train_time=2.727 +[gpub001:0/64] 2023-07-15 05:13:18,216 (trainer:732) INFO: 52epoch:train:1501-1600batch: iter_time=1.083e-04, forward_time=0.148, loss_ctc=66.997, loss_att=54.395, acc=0.727, loss=58.175, backward_time=1.030, grad_norm=117.246, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.993e-05, train_time=2.720 +[gpub001:0/64] 2023-07-15 05:14:59,026 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub001:0/64] 2023-07-15 05:15:17,049 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 05:15:20,571 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 05:15:20,571 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub001:0/64] 2023-07-15 05:15:20,578 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 05:22:59,109 (trainer:732) INFO: 52epoch:train:1601-1700batch: iter_time=4.399, forward_time=0.169, loss_ctc=76.666, loss_att=57.348, acc=0.713, loss=63.143, backward_time=1.040, grad_norm=154.498, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.992e-05, train_time=11.617 +[gpub001:0/64] 2023-07-15 05:25:16,028 (trainer:732) INFO: 52epoch:train:1701-1800batch: iter_time=1.181e-04, forward_time=0.146, loss_ctc=68.875, loss_att=53.803, acc=0.711, loss=58.325, backward_time=1.031, grad_norm=128.396, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.992e-05, train_time=2.738 +[gpub001:0/64] 2023-07-15 05:27:31,940 (trainer:732) INFO: 52epoch:train:1801-1900batch: iter_time=1.251e-04, forward_time=0.146, loss_ctc=72.358, loss_att=52.034, acc=0.719, loss=58.132, backward_time=1.028, grad_norm=128.456, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.991e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 05:29:52,478 (trainer:732) INFO: 52epoch:train:1901-2000batch: iter_time=1.215e-04, forward_time=0.147, loss_ctc=74.207, loss_att=56.605, acc=0.713, loss=61.886, backward_time=1.047, grad_norm=146.145, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.991e-05, train_time=2.811 +[gpub001:0/64] 2023-07-15 05:32:13,571 (trainer:732) INFO: 52epoch:train:2001-2100batch: iter_time=1.101e-04, forward_time=0.146, loss_ctc=60.906, loss_att=47.967, acc=0.726, loss=51.849, backward_time=1.035, grad_norm=150.781, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.990e-05, train_time=2.822 +[gpub001:0/64] 2023-07-15 05:34:34,015 (trainer:732) INFO: 52epoch:train:2101-2200batch: iter_time=1.172e-04, forward_time=0.146, loss_ctc=59.010, loss_att=46.764, acc=0.727, loss=50.438, backward_time=1.032, grad_norm=205.576, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.990e-05, train_time=2.809 +[gpub001:0/64] 2023-07-15 05:37:07,034 (trainer:732) INFO: 52epoch:train:2201-2300batch: iter_time=0.005, forward_time=0.207, loss_ctc=72.052, loss_att=58.747, acc=0.714, loss=62.738, backward_time=1.065, grad_norm=153.377, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.212, optim0_lr0=4.989e-05, train_time=3.058 +[gpub001:0/64] 2023-07-15 05:39:24,955 (trainer:732) INFO: 52epoch:train:2301-2400batch: iter_time=1.118e-04, forward_time=0.148, loss_ctc=77.542, loss_att=63.924, acc=0.722, loss=68.010, backward_time=1.031, grad_norm=138.062, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.989e-05, train_time=2.760 +[gpub001:0/64] 2023-07-15 05:41:45,240 (trainer:732) INFO: 52epoch:train:2401-2500batch: iter_time=1.128e-04, forward_time=0.144, loss_ctc=69.658, loss_att=49.632, acc=0.723, loss=55.639, backward_time=1.028, grad_norm=121.430, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.988e-05, train_time=2.805 +[gpub001:0/64] 2023-07-15 05:41:58,122 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub001:0/64] 2023-07-15 05:42:16,233 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 05:42:19,646 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 05:42:19,646 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub001:0/64] 2023-07-15 05:42:19,652 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 05:47:06,685 (trainer:732) INFO: 52epoch:train:2501-2600batch: iter_time=1.721, forward_time=0.145, loss_ctc=68.445, loss_att=51.045, acc=0.711, loss=56.265, backward_time=1.045, grad_norm=122.585, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.988e-05, train_time=6.429 +[gpub001:0/64] 2023-07-15 05:49:23,184 (trainer:732) INFO: 52epoch:train:2601-2700batch: iter_time=1.130e-04, forward_time=0.147, loss_ctc=68.564, loss_att=53.136, acc=0.713, loss=57.764, backward_time=1.031, grad_norm=117.186, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.987e-05, train_time=2.730 +[gpub001:0/64] 2023-07-15 05:51:38,947 (trainer:732) INFO: 52epoch:train:2701-2800batch: iter_time=1.209e-04, forward_time=0.145, loss_ctc=68.961, loss_att=48.677, acc=0.721, loss=54.762, backward_time=1.027, grad_norm=134.473, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.987e-05, train_time=2.715 +[gpub001:0/64] 2023-07-15 05:53:54,877 (trainer:732) INFO: 52epoch:train:2801-2900batch: iter_time=1.247e-04, forward_time=0.146, loss_ctc=77.652, loss_att=61.174, acc=0.707, loss=66.117, backward_time=1.028, grad_norm=169.753, clip=100.000, loss_scale=3.829e+32, optim_step_time=0.181, optim0_lr0=4.986e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 05:56:10,325 (trainer:732) INFO: 52epoch:train:2901-3000batch: iter_time=1.269e-04, forward_time=0.146, loss_ctc=59.198, loss_att=43.540, acc=0.729, loss=48.237, backward_time=1.026, grad_norm=130.238, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=4.986e-05, train_time=2.709 +[gpub001:0/64] 2023-07-15 05:58:26,241 (trainer:732) INFO: 52epoch:train:3001-3100batch: iter_time=1.275e-04, forward_time=0.146, loss_ctc=61.246, loss_att=47.337, acc=0.732, loss=51.510, backward_time=1.029, grad_norm=113.002, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=4.985e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 06:00:42,516 (trainer:732) INFO: 52epoch:train:3101-3200batch: iter_time=1.293e-04, forward_time=0.147, loss_ctc=73.840, loss_att=62.268, acc=0.715, loss=65.740, backward_time=1.030, grad_norm=132.126, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=4.985e-05, train_time=2.725 +[gpub001:0/64] 2023-07-15 06:02:01,422 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub001:0/64] 2023-07-15 06:02:58,632 (trainer:732) INFO: 52epoch:train:3201-3300batch: iter_time=1.225e-04, forward_time=0.147, loss_ctc=72.796, loss_att=59.618, acc=0.723, loss=63.572, backward_time=1.031, grad_norm=118.588, clip=100.000, loss_scale=5.100e+32, optim_step_time=0.182, optim0_lr0=4.984e-05, train_time=2.722 +[gpub001:0/64] 2023-07-15 06:03:46,609 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub001:0/64] 2023-07-15 06:04:04,946 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 06:04:08,402 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 06:04:08,402 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub001:0/64] 2023-07-15 06:04:08,408 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 06:08:26,592 (trainer:732) INFO: 52epoch:train:3301-3400batch: iter_time=1.265, forward_time=0.146, loss_ctc=67.963, loss_att=50.761, acc=0.709, loss=55.921, backward_time=1.042, grad_norm=138.942, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.984e-05, train_time=6.559 +[gpub001:0/64] 2023-07-15 06:10:43,347 (trainer:732) INFO: 52epoch:train:3401-3500batch: iter_time=1.063e-04, forward_time=0.145, loss_ctc=68.987, loss_att=57.616, acc=0.701, loss=61.027, backward_time=1.030, grad_norm=121.698, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.983e-05, train_time=2.735 +[gpub001:0/64] 2023-07-15 06:12:59,043 (trainer:732) INFO: 52epoch:train:3501-3600batch: iter_time=1.163e-04, forward_time=0.145, loss_ctc=70.640, loss_att=49.445, acc=0.720, loss=55.804, backward_time=1.027, grad_norm=163.147, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.983e-05, train_time=2.714 +[gpub001:0/64] 2023-07-15 06:15:17,290 (trainer:732) INFO: 52epoch:train:3601-3700batch: iter_time=1.213e-04, forward_time=0.146, loss_ctc=72.309, loss_att=57.367, acc=0.701, loss=61.849, backward_time=1.036, grad_norm=192.071, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.982e-05, train_time=2.765 +[gpub001:0/64] 2023-07-15 06:17:32,908 (trainer:732) INFO: 52epoch:train:3701-3800batch: iter_time=1.242e-04, forward_time=0.146, loss_ctc=56.135, loss_att=43.432, acc=0.725, loss=47.242, backward_time=1.027, grad_norm=120.250, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.982e-05, train_time=2.712 +[gpub001:0/64] 2023-07-15 06:19:48,547 (trainer:732) INFO: 52epoch:train:3801-3900batch: iter_time=1.126e-04, forward_time=0.145, loss_ctc=64.984, loss_att=53.140, acc=0.717, loss=56.693, backward_time=1.026, grad_norm=124.613, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.981e-05, train_time=2.713 +[gpub001:0/64] 2023-07-15 06:22:04,604 (trainer:732) INFO: 52epoch:train:3901-4000batch: iter_time=1.046e-04, forward_time=0.146, loss_ctc=75.460, loss_att=63.239, acc=0.704, loss=66.905, backward_time=1.030, grad_norm=124.785, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.981e-05, train_time=2.721 +[gpub001:0/64] 2023-07-15 06:24:20,473 (trainer:732) INFO: 52epoch:train:4001-4100batch: iter_time=1.162e-04, forward_time=0.145, loss_ctc=68.466, loss_att=55.208, acc=0.726, loss=59.186, backward_time=1.028, grad_norm=118.649, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.980e-05, train_time=2.717 +[gpub001:0/64] 2023-07-15 06:25:55,117 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub001:0/64] 2023-07-15 06:26:13,418 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 06:26:16,916 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 06:26:16,916 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub001:0/64] 2023-07-15 06:26:16,923 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 06:30:23,969 (trainer:732) INFO: 52epoch:train:4101-4200batch: iter_time=1.316, forward_time=0.182, loss_ctc=70.188, loss_att=50.115, acc=0.713, loss=56.137, backward_time=1.038, grad_norm=155.015, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=4.980e-05, train_time=7.268 +[gpub001:0/64] 2023-07-15 06:32:52,217 (trainer:732) INFO: 52epoch:train:4201-4300batch: iter_time=1.130e-04, forward_time=0.147, loss_ctc=69.316, loss_att=51.445, acc=0.717, loss=56.806, backward_time=1.043, grad_norm=137.794, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.979e-05, train_time=2.966 +[gpub001:0/64] 2023-07-15 06:35:08,342 (trainer:732) INFO: 52epoch:train:4301-4400batch: iter_time=1.201e-04, forward_time=0.146, loss_ctc=72.977, loss_att=57.668, acc=0.705, loss=62.261, backward_time=1.029, grad_norm=141.607, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.979e-05, train_time=2.722 +[gpub001:0/64] 2023-07-15 06:37:26,814 (trainer:732) INFO: 52epoch:train:4401-4500batch: iter_time=1.139e-04, forward_time=0.146, loss_ctc=63.643, loss_att=45.419, acc=0.719, loss=50.886, backward_time=1.056, grad_norm=113.299, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.978e-05, train_time=2.769 +[gpub001:0/64] 2023-07-15 06:39:57,698 (trainer:732) INFO: 52epoch:train:4501-4600batch: iter_time=1.134e-04, forward_time=0.147, loss_ctc=75.027, loss_att=58.300, acc=0.701, loss=63.318, backward_time=1.043, grad_norm=180.474, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.978e-05, train_time=3.017 +[gpub001:0/64] 2023-07-15 06:42:13,491 (trainer:732) INFO: 52epoch:train:4601-4700batch: iter_time=1.131e-04, forward_time=0.147, loss_ctc=56.007, loss_att=43.374, acc=0.727, loss=47.164, backward_time=1.027, grad_norm=140.554, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.977e-05, train_time=2.716 +[gpub001:0/64] 2023-07-15 06:44:29,393 (trainer:732) INFO: 52epoch:train:4701-4800batch: iter_time=1.151e-04, forward_time=0.147, loss_ctc=67.817, loss_att=52.699, acc=0.717, loss=57.235, backward_time=1.029, grad_norm=125.462, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.977e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 06:46:45,318 (trainer:732) INFO: 52epoch:train:4801-4900batch: iter_time=1.277e-04, forward_time=0.146, loss_ctc=73.382, loss_att=62.579, acc=0.709, loss=65.820, backward_time=1.029, grad_norm=138.062, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.976e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 06:49:01,133 (trainer:732) INFO: 52epoch:train:4901-5000batch: iter_time=1.282e-04, forward_time=0.148, loss_ctc=67.029, loss_att=51.868, acc=0.723, loss=56.416, backward_time=1.028, grad_norm=126.438, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.976e-05, train_time=2.716 +[gpub001:0/64] 2023-07-15 06:49:16,564 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub001:0/64] 2023-07-15 06:49:34,812 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 06:49:38,218 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 06:49:38,218 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub001:0/64] 2023-07-15 06:49:38,224 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 06:55:32,657 (trainer:732) INFO: 52epoch:train:5001-5100batch: iter_time=2.462, forward_time=0.167, loss_ctc=69.941, loss_att=52.394, acc=0.707, loss=57.658, backward_time=1.039, grad_norm=123.555, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.975e-05, train_time=7.830 +[gpub001:0/64] 2023-07-15 06:57:48,838 (trainer:732) INFO: 52epoch:train:5101-5200batch: iter_time=1.244e-04, forward_time=0.145, loss_ctc=67.509, loss_att=53.022, acc=0.712, loss=57.368, backward_time=1.027, grad_norm=124.915, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.975e-05, train_time=2.723 +[gpub001:0/64] 2023-07-15 07:00:08,648 (trainer:732) INFO: 52epoch:train:5201-5300batch: iter_time=1.262e-04, forward_time=0.145, loss_ctc=66.544, loss_att=46.869, acc=0.728, loss=52.771, backward_time=1.037, grad_norm=123.961, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.974e-05, train_time=2.796 +[gpub001:0/64] 2023-07-15 07:02:28,465 (trainer:732) INFO: 52epoch:train:5301-5400batch: iter_time=1.234e-04, forward_time=0.147, loss_ctc=76.176, loss_att=59.538, acc=0.700, loss=64.529, backward_time=1.041, grad_norm=207.376, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.974e-05, train_time=2.796 +[gpub001:0/64] 2023-07-15 07:04:57,605 (trainer:732) INFO: 52epoch:train:5401-5500batch: iter_time=1.271e-04, forward_time=0.145, loss_ctc=59.021, loss_att=44.631, acc=0.724, loss=48.948, backward_time=1.045, grad_norm=134.157, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.973e-05, train_time=2.983 +[gpub001:0/64] 2023-07-15 07:07:19,312 (trainer:732) INFO: 52epoch:train:5501-5600batch: iter_time=1.223e-04, forward_time=0.145, loss_ctc=60.113, loss_att=46.606, acc=0.727, loss=50.658, backward_time=1.044, grad_norm=107.221, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.973e-05, train_time=2.834 +[gpub001:0/64] 2023-07-15 07:09:42,242 (trainer:732) INFO: 52epoch:train:5601-5700batch: iter_time=1.310e-04, forward_time=0.146, loss_ctc=74.213, loss_att=63.764, acc=0.702, loss=66.899, backward_time=1.047, grad_norm=124.188, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.972e-05, train_time=2.858 +[gpub001:0/64] 2023-07-15 07:11:58,361 (trainer:732) INFO: 52epoch:train:5701-5800batch: iter_time=1.328e-04, forward_time=0.146, loss_ctc=72.924, loss_att=58.764, acc=0.720, loss=63.012, backward_time=1.029, grad_norm=109.159, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.972e-05, train_time=2.722 +[gpub001:0/64] 2023-07-15 07:12:46,518 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub001:0/64] 2023-07-15 07:13:04,771 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 07:13:08,290 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 07:13:08,290 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub001:0/64] 2023-07-15 07:13:08,297 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 07:17:17,109 (trainer:732) INFO: 52epoch:train:5801-5900batch: iter_time=1.320, forward_time=0.194, loss_ctc=67.235, loss_att=50.707, acc=0.707, loss=55.665, backward_time=1.041, grad_norm=126.723, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.971e-05, train_time=6.375 +[gpub001:0/64] 2023-07-15 07:19:34,139 (trainer:732) INFO: 52epoch:train:5901-6000batch: iter_time=1.124e-04, forward_time=0.146, loss_ctc=67.571, loss_att=55.834, acc=0.707, loss=59.355, backward_time=1.028, grad_norm=119.801, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.971e-05, train_time=2.740 +[gpub001:0/64] 2023-07-15 07:21:50,446 (trainer:732) INFO: 52epoch:train:6001-6100batch: iter_time=1.117e-04, forward_time=0.145, loss_ctc=69.750, loss_att=48.735, acc=0.721, loss=55.039, backward_time=1.028, grad_norm=137.401, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.970e-05, train_time=2.726 +[gpub001:0/64] 2023-07-15 07:24:05,901 (trainer:732) INFO: 52epoch:train:6101-6200batch: iter_time=1.260e-04, forward_time=0.143, loss_ctc=74.044, loss_att=56.325, acc=0.704, loss=61.641, backward_time=1.026, grad_norm=136.136, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.970e-05, train_time=2.709 +[gpub001:0/64] 2023-07-15 07:26:21,636 (trainer:732) INFO: 52epoch:train:6201-6300batch: iter_time=1.212e-04, forward_time=0.145, loss_ctc=55.855, loss_att=43.352, acc=0.724, loss=47.103, backward_time=1.028, grad_norm=115.672, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.969e-05, train_time=2.714 +[gpub001:0/64] 2023-07-15 07:28:42,756 (trainer:732) INFO: 52epoch:train:6301-6400batch: iter_time=1.105e-04, forward_time=0.145, loss_ctc=62.969, loss_att=51.098, acc=0.723, loss=54.659, backward_time=1.046, grad_norm=130.164, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.969e-05, train_time=2.822 +[gpub001:0/64] 2023-07-15 07:31:00,939 (trainer:732) INFO: 52epoch:train:6401-6500batch: iter_time=1.146e-04, forward_time=0.147, loss_ctc=75.551, loss_att=63.770, acc=0.701, loss=67.304, backward_time=1.032, grad_norm=129.815, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.968e-05, train_time=2.763 +[gpub001:0/64] 2023-07-15 07:33:28,577 (trainer:732) INFO: 52epoch:train:6501-6600batch: iter_time=1.292e-04, forward_time=0.146, loss_ctc=68.149, loss_att=54.330, acc=0.725, loss=58.476, backward_time=1.036, grad_norm=113.777, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.968e-05, train_time=2.953 +[gpub001:0/64] 2023-07-15 07:35:03,598 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub001:0/64] 2023-07-15 07:35:21,711 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 07:35:25,095 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 07:35:25,095 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub001:0/64] 2023-07-15 07:35:25,101 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 07:39:15,653 (trainer:732) INFO: 52epoch:train:6601-6700batch: iter_time=1.307, forward_time=0.166, loss_ctc=70.728, loss_att=49.460, acc=0.717, loss=55.840, backward_time=1.039, grad_norm=123.671, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.967e-05, train_time=6.941 +[gpub001:0/64] 2023-07-15 07:41:33,326 (trainer:732) INFO: 52epoch:train:6701-6800batch: iter_time=1.170e-04, forward_time=0.145, loss_ctc=68.471, loss_att=51.019, acc=0.714, loss=56.255, backward_time=1.034, grad_norm=118.156, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.967e-05, train_time=2.754 +[gpub001:0/64] 2023-07-15 07:43:49,511 (trainer:732) INFO: 52epoch:train:6801-6900batch: iter_time=1.138e-04, forward_time=0.145, loss_ctc=72.102, loss_att=56.769, acc=0.710, loss=61.369, backward_time=1.028, grad_norm=153.381, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.966e-05, train_time=2.723 +[gpub001:0/64] 2023-07-15 07:46:08,170 (trainer:732) INFO: 52epoch:train:6901-7000batch: iter_time=1.145e-04, forward_time=0.145, loss_ctc=62.933, loss_att=45.330, acc=0.724, loss=50.611, backward_time=1.039, grad_norm=115.571, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.966e-05, train_time=2.773 +[gpub001:0/64] 2023-07-15 07:48:28,609 (trainer:732) INFO: 52epoch:train:7001-7100batch: iter_time=1.199e-04, forward_time=0.147, loss_ctc=76.028, loss_att=57.030, acc=0.707, loss=62.729, backward_time=1.034, grad_norm=162.178, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.965e-05, train_time=2.809 +[gpub001:0/64] 2023-07-15 07:50:46,730 (trainer:732) INFO: 52epoch:train:7101-7200batch: iter_time=1.191e-04, forward_time=0.146, loss_ctc=56.737, loss_att=43.740, acc=0.729, loss=47.639, backward_time=1.031, grad_norm=117.600, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.965e-05, train_time=2.762 +[gpub001:0/64] 2023-07-15 07:53:15,548 (trainer:732) INFO: 52epoch:train:7201-7300batch: iter_time=1.316e-04, forward_time=0.146, loss_ctc=67.374, loss_att=53.427, acc=0.716, loss=57.611, backward_time=1.037, grad_norm=133.942, clip=100.000, loss_scale=4.608e+32, optim_step_time=0.182, optim0_lr0=4.964e-05, train_time=2.976 +[gpub001:0/64] 2023-07-15 07:55:26,256 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub001:0/64] 2023-07-15 07:55:39,865 (trainer:732) INFO: 52epoch:train:7301-7400batch: iter_time=1.232e-04, forward_time=0.148, loss_ctc=73.022, loss_att=61.882, acc=0.708, loss=65.224, backward_time=1.056, grad_norm=127.732, clip=100.000, loss_scale=6.159e+32, optim_step_time=0.182, optim0_lr0=4.964e-05, train_time=2.886 +[gpub001:0/64] 2023-07-15 07:58:00,543 (trainer:732) INFO: 52epoch:train:7401-7500batch: iter_time=1.102e-04, forward_time=0.146, loss_ctc=65.887, loss_att=51.810, acc=0.721, loss=56.033, backward_time=1.031, grad_norm=112.643, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.963e-05, train_time=2.813 +[gpub001:0/64] 2023-07-15 07:58:11,614 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub001:0/64] 2023-07-15 07:58:29,689 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 07:58:33,138 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 07:58:33,138 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub001:0/64] 2023-07-15 07:58:33,144 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 08:04:38,481 (trainer:732) INFO: 52epoch:train:7501-7600batch: iter_time=2.524, forward_time=0.173, loss_ctc=68.274, loss_att=52.729, acc=0.716, loss=57.393, backward_time=1.043, grad_norm=122.527, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.963e-05, train_time=7.958 +[gpub001:0/64] 2023-07-15 08:06:55,378 (trainer:732) INFO: 52epoch:train:7601-7700batch: iter_time=1.197e-04, forward_time=0.150, loss_ctc=66.701, loss_att=51.740, acc=0.717, loss=56.228, backward_time=1.030, grad_norm=132.874, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.962e-05, train_time=2.738 +[gpub001:0/64] 2023-07-15 08:09:14,728 (trainer:732) INFO: 52epoch:train:7701-7800batch: iter_time=1.194e-04, forward_time=0.163, loss_ctc=66.481, loss_att=47.413, acc=0.730, loss=53.133, backward_time=1.030, grad_norm=123.853, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.201, optim0_lr0=4.962e-05, train_time=2.787 +[gpub001:0/64] 2023-07-15 08:11:09,763 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub001:0/64] 2023-07-15 08:11:31,528 (trainer:732) INFO: 52epoch:train:7801-7900batch: iter_time=1.227e-04, forward_time=0.147, loss_ctc=76.969, loss_att=61.099, acc=0.712, loss=65.860, backward_time=1.030, grad_norm=147.788, clip=100.000, loss_scale=2.980e+32, optim_step_time=0.182, optim0_lr0=4.961e-05, train_time=2.736 +[gpub001:0/64] 2023-07-15 08:13:47,018 (trainer:732) INFO: 52epoch:train:7901-8000batch: iter_time=1.201e-04, forward_time=0.146, loss_ctc=58.050, loss_att=42.970, acc=0.735, loss=47.494, backward_time=1.025, grad_norm=116.421, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.961e-05, train_time=2.710 +[gpub001:0/64] 2023-07-15 08:16:02,954 (trainer:732) INFO: 52epoch:train:8001-8100batch: iter_time=1.129e-04, forward_time=0.146, loss_ctc=60.651, loss_att=47.371, acc=0.734, loss=51.355, backward_time=1.027, grad_norm=123.406, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.960e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 08:18:19,062 (trainer:732) INFO: 52epoch:train:8101-8200batch: iter_time=1.261e-04, forward_time=0.147, loss_ctc=72.709, loss_att=62.133, acc=0.722, loss=65.306, backward_time=1.028, grad_norm=112.891, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.960e-05, train_time=2.722 +[gpub001:0/64] 2023-07-15 08:20:35,779 (trainer:732) INFO: 52epoch:train:8201-8300batch: iter_time=1.170e-04, forward_time=0.146, loss_ctc=72.592, loss_att=59.622, acc=0.726, loss=63.513, backward_time=1.028, grad_norm=112.657, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.960e-05, train_time=2.734 +[gpub001:0/64] 2023-07-15 08:21:28,081 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub001:0/64] 2023-07-15 08:21:46,305 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 08:21:50,018 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 08:21:50,018 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub001:0/64] 2023-07-15 08:21:50,025 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 08:27:03,659 (trainer:732) INFO: 52epoch:train:8301-8400batch: iter_time=1.383, forward_time=0.161, loss_ctc=63.582, loss_att=46.401, acc=0.711, loss=51.555, backward_time=1.043, grad_norm=110.495, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.959e-05, train_time=7.757 +[gpub001:0/64] 2023-07-15 08:29:20,503 (trainer:732) INFO: 52epoch:train:8401-8500batch: iter_time=1.169e-04, forward_time=0.146, loss_ctc=71.706, loss_att=52.790, acc=0.718, loss=58.465, backward_time=1.029, grad_norm=132.484, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.959e-05, train_time=2.737 +[gpub001:0/64] 2023-07-15 08:31:36,508 (trainer:732) INFO: 52epoch:train:8501-8600batch: iter_time=1.157e-04, forward_time=0.146, loss_ctc=69.582, loss_att=53.043, acc=0.721, loss=58.005, backward_time=1.030, grad_norm=129.666, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.958e-05, train_time=2.720 +[gpub001:0/64] 2023-07-15 08:33:53,300 (trainer:732) INFO: 52epoch:train:8601-8700batch: iter_time=1.138e-04, forward_time=0.146, loss_ctc=63.441, loss_att=46.306, acc=0.719, loss=51.447, backward_time=1.030, grad_norm=134.435, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.958e-05, train_time=2.736 +[gpub001:0/64] 2023-07-15 08:36:12,547 (trainer:732) INFO: 52epoch:train:8701-8800batch: iter_time=1.142e-04, forward_time=0.146, loss_ctc=72.367, loss_att=57.240, acc=0.706, loss=61.778, backward_time=1.040, grad_norm=144.562, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.957e-05, train_time=2.785 +[gpub001:0/64] 2023-07-15 08:38:28,158 (trainer:732) INFO: 52epoch:train:8801-8900batch: iter_time=1.144e-04, forward_time=0.146, loss_ctc=54.536, loss_att=41.153, acc=0.739, loss=45.168, backward_time=1.028, grad_norm=114.551, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.957e-05, train_time=2.712 +[gpub001:0/64] 2023-07-15 08:40:58,526 (trainer:732) INFO: 52epoch:train:8901-9000batch: iter_time=1.149e-04, forward_time=0.147, loss_ctc=69.535, loss_att=56.601, acc=0.709, loss=60.481, backward_time=1.044, grad_norm=139.440, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.956e-05, train_time=3.007 +[gpub001:0/64] 2023-07-15 08:43:14,542 (trainer:732) INFO: 52epoch:train:9001-9100batch: iter_time=1.258e-04, forward_time=0.146, loss_ctc=76.360, loss_att=63.389, acc=0.715, loss=67.281, backward_time=1.029, grad_norm=114.369, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.956e-05, train_time=2.720 +[gpub001:0/64] 2023-07-15 08:44:48,439 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub001:0/64] 2023-07-15 08:45:06,514 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 08:45:09,993 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 08:45:09,993 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub001:0/64] 2023-07-15 08:45:09,999 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 08:50:38,705 (trainer:732) INFO: 52epoch:train:9101-9200batch: iter_time=1.386, forward_time=0.204, loss_ctc=64.693, loss_att=46.910, acc=0.724, loss=52.245, backward_time=1.042, grad_norm=132.581, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.186, optim0_lr0=4.955e-05, train_time=8.882 +[gpub001:0/64] 2023-07-15 08:52:56,341 (trainer:732) INFO: 52epoch:train:9201-9300batch: iter_time=1.218e-04, forward_time=0.149, loss_ctc=67.583, loss_att=50.815, acc=0.725, loss=55.846, backward_time=1.032, grad_norm=120.075, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.955e-05, train_time=2.753 +[gpub001:0/64] 2023-07-15 08:55:13,695 (trainer:732) INFO: 52epoch:train:9301-9400batch: iter_time=1.130e-04, forward_time=0.149, loss_ctc=72.507, loss_att=57.099, acc=0.717, loss=61.721, backward_time=1.030, grad_norm=148.100, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.954e-05, train_time=2.747 +[gpub001:0/64] 2023-07-15 08:57:29,816 (trainer:732) INFO: 52epoch:train:9401-9500batch: iter_time=1.235e-04, forward_time=0.147, loss_ctc=61.723, loss_att=44.944, acc=0.730, loss=49.978, backward_time=1.028, grad_norm=107.006, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.954e-05, train_time=2.722 +[gpub001:0/64] 2023-07-15 08:59:46,404 (trainer:732) INFO: 52epoch:train:9501-9600batch: iter_time=1.182e-04, forward_time=0.147, loss_ctc=73.882, loss_att=57.515, acc=0.715, loss=62.425, backward_time=1.031, grad_norm=134.396, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.953e-05, train_time=2.732 +[gpub001:0/64] 2023-07-15 09:02:02,343 (trainer:732) INFO: 52epoch:train:9601-9700batch: iter_time=1.242e-04, forward_time=0.145, loss_ctc=57.834, loss_att=43.892, acc=0.733, loss=48.075, backward_time=1.027, grad_norm=105.315, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.953e-05, train_time=2.719 +[gpub001:0/64] 2023-07-15 09:04:18,628 (trainer:732) INFO: 52epoch:train:9701-9800batch: iter_time=1.178e-04, forward_time=0.145, loss_ctc=69.046, loss_att=55.521, acc=0.724, loss=59.578, backward_time=1.028, grad_norm=134.539, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.952e-05, train_time=2.725 +[gpub001:0/64] 2023-07-15 09:06:40,864 (trainer:732) INFO: 52epoch:train:9801-9900batch: iter_time=1.086e-04, forward_time=0.146, loss_ctc=71.846, loss_att=59.131, acc=0.721, loss=62.945, backward_time=1.034, grad_norm=118.400, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.952e-05, train_time=2.844 +[gpub001:0/64] 2023-07-15 09:08:57,505 (trainer:732) INFO: 52epoch:train:9901-10000batch: iter_time=1.061e-04, forward_time=0.148, loss_ctc=67.461, loss_att=52.187, acc=0.727, loss=56.769, backward_time=1.031, grad_norm=132.078, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.951e-05, train_time=2.733 +[gpub001:0/64] 2023-07-15 09:23:19,047 (trainer:338) INFO: 52epoch results: [train] iter_time=0.232, forward_time=0.151, loss_ctc=68.405, loss_att=53.118, acc=0.717, loss=57.704, backward_time=1.034, grad_norm=132.000, clip=100.000, loss_scale=3.065e+32, optim_step_time=0.182, optim0_lr0=4.975e-05, train_time=3.365, time=4 hours, 40 minutes and 41.9 seconds, total_count=490000, gpu_max_cached_mem_GB=37.635, [valid] loss_ctc=43.162, cer_ctc=0.252, loss_att=38.727, acc=0.677, cer=0.407, wer=0.996, loss=40.058, time=8 minutes and 9.5 seconds, total_count=50094, gpu_max_cached_mem_GB=37.635, [att_plot] time=5 minutes and 54.24 seconds, total_count=0, gpu_max_cached_mem_GB=37.635 +[gpub001:0/64] 2023-07-15 09:23:36,296 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub001:0/64] 2023-07-15 09:23:36,354 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/47epoch.pth +[gpub001:0/64] 2023-07-15 09:23:36,354 (trainer:272) INFO: 53/60epoch started. Estimated time to finish: 1 day, 15 hours and 44 minutes +[gpub001:0/64] 2023-07-15 09:23:37,760 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub001:0/64] 2023-07-15 09:23:55,704 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 09:23:59,027 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 09:23:59,027 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub001:0/64] 2023-07-15 09:23:59,041 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 09:31:30,451 (trainer:732) INFO: 53epoch:train:1-100batch: iter_time=3.311, forward_time=0.176, loss_ctc=77.249, loss_att=58.785, acc=0.708, loss=64.324, backward_time=1.043, grad_norm=155.630, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.185, optim0_lr0=4.951e-05, train_time=9.474 +[gpub001:0/64] 2023-07-15 09:33:55,969 (trainer:732) INFO: 53epoch:train:101-200batch: iter_time=1.288e-04, forward_time=0.189, loss_ctc=71.700, loss_att=52.162, acc=0.707, loss=58.023, backward_time=1.035, grad_norm=122.394, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=4.950e-05, train_time=2.911 +[gpub001:0/64] 2023-07-15 09:36:26,554 (trainer:732) INFO: 53epoch:train:201-300batch: iter_time=0.001, forward_time=0.236, loss_ctc=66.496, loss_att=46.521, acc=0.739, loss=52.513, backward_time=1.048, grad_norm=117.834, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.186, optim0_lr0=4.950e-05, train_time=3.011 +[gpub001:0/64] 2023-07-15 09:39:01,453 (trainer:732) INFO: 53epoch:train:301-400batch: iter_time=9.101e-04, forward_time=0.287, loss_ctc=75.211, loss_att=58.135, acc=0.711, loss=63.258, backward_time=1.053, grad_norm=108.586, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.190, optim0_lr0=4.949e-05, train_time=3.098 +[gpub001:0/64] 2023-07-15 09:41:30,212 (trainer:732) INFO: 53epoch:train:401-500batch: iter_time=3.592e-04, forward_time=0.238, loss_ctc=68.220, loss_att=49.582, acc=0.727, loss=55.173, backward_time=1.045, grad_norm=122.577, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.189, optim0_lr0=4.949e-05, train_time=2.975 +[gpub001:0/64] 2023-07-15 09:43:59,845 (trainer:732) INFO: 53epoch:train:501-600batch: iter_time=0.003, forward_time=0.238, loss_ctc=65.657, loss_att=47.631, acc=0.729, loss=53.039, backward_time=1.049, grad_norm=152.877, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.188, optim0_lr0=4.948e-05, train_time=2.992 +[gpub001:0/64] 2023-07-15 09:46:31,374 (trainer:732) INFO: 53epoch:train:601-700batch: iter_time=1.153e-04, forward_time=0.177, loss_ctc=72.494, loss_att=54.683, acc=0.710, loss=60.026, backward_time=1.055, grad_norm=148.168, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.188, optim0_lr0=4.948e-05, train_time=3.031 +[gpub001:0/64] 2023-07-15 09:49:00,984 (trainer:732) INFO: 53epoch:train:701-800batch: iter_time=6.123e-04, forward_time=0.231, loss_ctc=65.196, loss_att=47.527, acc=0.726, loss=52.828, backward_time=1.042, grad_norm=130.075, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.186, optim0_lr0=4.947e-05, train_time=2.992 +[gpub001:0/64] 2023-07-15 09:49:58,288 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub001:0/64] 2023-07-15 09:50:16,392 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 09:50:19,748 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 09:50:19,748 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub001:0/64] 2023-07-15 09:50:19,754 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 09:58:24,759 (trainer:732) INFO: 53epoch:train:801-900batch: iter_time=4.199, forward_time=0.197, loss_ctc=70.653, loss_att=50.295, acc=0.719, loss=56.402, backward_time=1.041, grad_norm=118.018, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.947e-05, train_time=11.275 +[gpub001:0/64] 2023-07-15 10:00:41,776 (trainer:732) INFO: 53epoch:train:901-1000batch: iter_time=1.306e-04, forward_time=0.151, loss_ctc=77.224, loss_att=55.334, acc=0.705, loss=61.901, backward_time=1.032, grad_norm=132.329, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.946e-05, train_time=2.740 +[gpub001:0/64] 2023-07-15 10:02:57,402 (trainer:732) INFO: 53epoch:train:1001-1100batch: iter_time=1.317e-04, forward_time=0.148, loss_ctc=64.572, loss_att=46.014, acc=0.728, loss=51.581, backward_time=1.027, grad_norm=123.422, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.946e-05, train_time=2.712 +[gpub001:0/64] 2023-07-15 10:05:13,259 (trainer:732) INFO: 53epoch:train:1101-1200batch: iter_time=1.391e-04, forward_time=0.148, loss_ctc=73.301, loss_att=55.843, acc=0.715, loss=61.080, backward_time=1.029, grad_norm=135.140, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.945e-05, train_time=2.717 +[gpub001:0/64] 2023-07-15 10:07:28,967 (trainer:732) INFO: 53epoch:train:1201-1300batch: iter_time=1.231e-04, forward_time=0.147, loss_ctc=63.739, loss_att=47.917, acc=0.729, loss=52.664, backward_time=1.028, grad_norm=114.091, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.945e-05, train_time=2.714 +[gpub001:0/64] 2023-07-15 10:09:44,862 (trainer:732) INFO: 53epoch:train:1301-1400batch: iter_time=1.252e-04, forward_time=0.149, loss_ctc=67.179, loss_att=47.097, acc=0.730, loss=53.122, backward_time=1.028, grad_norm=116.673, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.944e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 10:12:00,515 (trainer:732) INFO: 53epoch:train:1401-1500batch: iter_time=1.255e-04, forward_time=0.148, loss_ctc=68.130, loss_att=49.836, acc=0.714, loss=55.324, backward_time=1.027, grad_norm=129.206, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.944e-05, train_time=2.713 +[gpub001:0/64] 2023-07-15 10:14:17,608 (trainer:732) INFO: 53epoch:train:1501-1600batch: iter_time=1.335e-04, forward_time=0.147, loss_ctc=67.907, loss_att=53.286, acc=0.708, loss=57.672, backward_time=1.028, grad_norm=135.533, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.943e-05, train_time=2.742 +[gpub001:0/64] 2023-07-15 10:15:56,480 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub001:0/64] 2023-07-15 10:16:14,877 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 10:16:18,381 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 10:16:18,381 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub001:0/64] 2023-07-15 10:16:18,387 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 10:20:43,071 (trainer:732) INFO: 53epoch:train:1601-1700batch: iter_time=1.340, forward_time=0.148, loss_ctc=70.666, loss_att=56.816, acc=0.715, loss=60.971, backward_time=1.036, grad_norm=162.033, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.943e-05, train_time=7.709 +[gpub001:0/64] 2023-07-15 10:22:59,760 (trainer:732) INFO: 53epoch:train:1701-1800batch: iter_time=1.258e-04, forward_time=0.147, loss_ctc=71.652, loss_att=52.533, acc=0.712, loss=58.269, backward_time=1.033, grad_norm=141.266, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.943e-05, train_time=2.734 +[gpub001:0/64] 2023-07-15 10:25:15,620 (trainer:732) INFO: 53epoch:train:1801-1900batch: iter_time=1.295e-04, forward_time=0.145, loss_ctc=76.945, loss_att=54.785, acc=0.701, loss=61.433, backward_time=1.028, grad_norm=120.255, clip=100.000, loss_scale=1.882e+32, optim_step_time=0.182, optim0_lr0=4.942e-05, train_time=2.717 +[gpub001:0/64] 2023-07-15 10:27:32,921 (trainer:732) INFO: 53epoch:train:1901-2000batch: iter_time=1.302e-04, forward_time=0.146, loss_ctc=65.557, loss_att=44.420, acc=0.733, loss=50.761, backward_time=1.026, grad_norm=170.277, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.942e-05, train_time=2.746 +[gpub001:0/64] 2023-07-15 10:29:54,896 (trainer:732) INFO: 53epoch:train:2001-2100batch: iter_time=1.297e-04, forward_time=0.146, loss_ctc=78.932, loss_att=60.005, acc=0.708, loss=65.683, backward_time=1.032, grad_norm=154.538, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.941e-05, train_time=2.839 +[gpub001:0/64] 2023-07-15 10:32:19,721 (trainer:732) INFO: 53epoch:train:2101-2200batch: iter_time=1.312e-04, forward_time=0.146, loss_ctc=65.440, loss_att=48.548, acc=0.725, loss=53.615, backward_time=1.039, grad_norm=135.394, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.941e-05, train_time=2.896 +[gpub001:0/64] 2023-07-15 10:34:37,291 (trainer:732) INFO: 53epoch:train:2201-2300batch: iter_time=1.376e-04, forward_time=0.147, loss_ctc=63.624, loss_att=44.869, acc=0.738, loss=50.495, backward_time=1.031, grad_norm=144.749, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.940e-05, train_time=2.751 +[gpub001:0/64] 2023-07-15 10:36:56,681 (trainer:732) INFO: 53epoch:train:2301-2400batch: iter_time=1.248e-04, forward_time=0.145, loss_ctc=71.643, loss_att=51.467, acc=0.712, loss=57.520, backward_time=1.028, grad_norm=180.650, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.940e-05, train_time=2.788 +[gpub001:0/64] 2023-07-15 10:39:14,003 (trainer:732) INFO: 53epoch:train:2401-2500batch: iter_time=1.187e-04, forward_time=0.146, loss_ctc=64.355, loss_att=47.519, acc=0.719, loss=52.570, backward_time=1.029, grad_norm=114.242, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.939e-05, train_time=2.746 +[gpub001:0/64] 2023-07-15 10:39:17,582 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub001:0/64] 2023-07-15 10:39:35,923 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 10:39:39,455 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 10:39:39,455 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub001:0/64] 2023-07-15 10:39:39,461 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 10:44:46,978 (trainer:732) INFO: 53epoch:train:2501-2600batch: iter_time=1.885, forward_time=0.176, loss_ctc=73.411, loss_att=53.483, acc=0.721, loss=59.461, backward_time=1.044, grad_norm=123.712, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.939e-05, train_time=6.659 +[gpub001:0/64] 2023-07-15 10:47:15,365 (trainer:732) INFO: 53epoch:train:2601-2700batch: iter_time=1.000e-04, forward_time=0.145, loss_ctc=75.773, loss_att=56.053, acc=0.699, loss=61.969, backward_time=1.046, grad_norm=116.565, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.938e-05, train_time=2.968 +[gpub001:0/64] 2023-07-15 10:49:45,331 (trainer:732) INFO: 53epoch:train:2701-2800batch: iter_time=1.085e-04, forward_time=0.145, loss_ctc=65.403, loss_att=45.525, acc=0.728, loss=51.488, backward_time=1.046, grad_norm=116.454, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.938e-05, train_time=2.999 +[gpub001:0/64] 2023-07-15 10:52:05,173 (trainer:732) INFO: 53epoch:train:2801-2900batch: iter_time=1.075e-04, forward_time=0.144, loss_ctc=73.294, loss_att=53.142, acc=0.721, loss=59.188, backward_time=1.038, grad_norm=148.478, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.937e-05, train_time=2.797 +[gpub001:0/64] 2023-07-15 10:54:39,743 (trainer:732) INFO: 53epoch:train:2901-3000batch: iter_time=1.066e-04, forward_time=0.144, loss_ctc=67.600, loss_att=51.971, acc=0.725, loss=56.660, backward_time=1.042, grad_norm=138.355, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.937e-05, train_time=3.091 +[gpub001:0/64] 2023-07-15 10:56:58,255 (trainer:732) INFO: 53epoch:train:3001-3100batch: iter_time=1.071e-04, forward_time=0.144, loss_ctc=65.833, loss_att=46.073, acc=0.732, loss=52.001, backward_time=1.031, grad_norm=112.000, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.936e-05, train_time=2.770 +[gpub001:0/64] 2023-07-15 10:59:19,483 (trainer:732) INFO: 53epoch:train:3101-3200batch: iter_time=1.012e-04, forward_time=0.145, loss_ctc=66.802, loss_att=49.191, acc=0.717, loss=54.475, backward_time=1.039, grad_norm=159.228, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.936e-05, train_time=2.824 +[gpub001:0/64] 2023-07-15 11:00:27,528 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub001:0/64] 2023-07-15 11:01:42,885 (trainer:732) INFO: 53epoch:train:3201-3300batch: iter_time=1.020e-04, forward_time=0.145, loss_ctc=69.338, loss_att=52.658, acc=0.720, loss=57.662, backward_time=1.040, grad_norm=112.301, clip=100.000, loss_scale=2.351e+32, optim_step_time=0.182, optim0_lr0=4.935e-05, train_time=2.868 +[gpub001:0/64] 2023-07-15 11:02:40,035 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub001:0/64] 2023-07-15 11:02:58,063 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 11:03:01,531 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 11:03:01,531 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub001:0/64] 2023-07-15 11:03:01,537 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 11:07:57,739 (trainer:732) INFO: 53epoch:train:3301-3400batch: iter_time=2.006, forward_time=0.145, loss_ctc=69.350, loss_att=48.327, acc=0.728, loss=54.634, backward_time=1.039, grad_norm=118.392, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.935e-05, train_time=7.497 +[gpub001:0/64] 2023-07-15 11:10:14,439 (trainer:732) INFO: 53epoch:train:3401-3500batch: iter_time=1.282e-04, forward_time=0.147, loss_ctc=77.528, loss_att=60.141, acc=0.707, loss=65.357, backward_time=1.032, grad_norm=137.504, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.934e-05, train_time=2.734 +[gpub001:0/64] 2023-07-15 11:12:30,025 (trainer:732) INFO: 53epoch:train:3501-3600batch: iter_time=1.138e-04, forward_time=0.145, loss_ctc=67.358, loss_att=47.997, acc=0.719, loss=53.805, backward_time=1.026, grad_norm=117.273, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.934e-05, train_time=2.711 +[gpub001:0/64] 2023-07-15 11:14:48,416 (trainer:732) INFO: 53epoch:train:3601-3700batch: iter_time=1.272e-04, forward_time=0.146, loss_ctc=73.623, loss_att=53.854, acc=0.738, loss=59.785, backward_time=1.029, grad_norm=143.444, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.933e-05, train_time=2.768 +[gpub001:0/64] 2023-07-15 11:17:21,226 (trainer:732) INFO: 53epoch:train:3701-3800batch: iter_time=1.341e-04, forward_time=0.146, loss_ctc=70.255, loss_att=50.718, acc=0.718, loss=56.579, backward_time=1.047, grad_norm=167.446, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.933e-05, train_time=3.056 +[gpub001:0/64] 2023-07-15 11:19:38,219 (trainer:732) INFO: 53epoch:train:3801-3900batch: iter_time=1.312e-04, forward_time=0.148, loss_ctc=65.971, loss_att=48.571, acc=0.738, loss=53.791, backward_time=1.029, grad_norm=126.572, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.187, optim0_lr0=4.932e-05, train_time=2.740 +[gpub001:0/64] 2023-07-15 11:21:58,735 (trainer:732) INFO: 53epoch:train:3901-4000batch: iter_time=1.312e-04, forward_time=0.147, loss_ctc=63.471, loss_att=47.210, acc=0.729, loss=52.089, backward_time=1.030, grad_norm=141.710, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.932e-05, train_time=2.810 +[gpub001:0/64] 2023-07-15 11:24:16,739 (trainer:732) INFO: 53epoch:train:4001-4100batch: iter_time=1.362e-04, forward_time=0.148, loss_ctc=71.783, loss_att=52.931, acc=0.721, loss=58.586, backward_time=1.028, grad_norm=117.704, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.931e-05, train_time=2.760 +[gpub001:0/64] 2023-07-15 11:25:58,082 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub001:0/64] 2023-07-15 11:26:16,209 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 11:26:19,575 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 11:26:19,575 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub001:0/64] 2023-07-15 11:26:19,581 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 11:31:12,862 (trainer:732) INFO: 53epoch:train:4101-4200batch: iter_time=1.396, forward_time=0.174, loss_ctc=67.126, loss_att=50.278, acc=0.730, loss=55.332, backward_time=1.038, grad_norm=122.884, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.931e-05, train_time=8.322 +[gpub001:0/64] 2023-07-15 11:33:32,008 (trainer:732) INFO: 53epoch:train:4201-4300batch: iter_time=1.106e-04, forward_time=0.146, loss_ctc=72.073, loss_att=52.426, acc=0.717, loss=58.320, backward_time=1.031, grad_norm=122.383, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.931e-05, train_time=2.783 +[gpub001:0/64] 2023-07-15 11:35:47,563 (trainer:732) INFO: 53epoch:train:4301-4400batch: iter_time=9.438e-05, forward_time=0.144, loss_ctc=75.067, loss_att=52.917, acc=0.710, loss=59.562, backward_time=1.028, grad_norm=140.286, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.930e-05, train_time=2.711 +[gpub001:0/64] 2023-07-15 11:38:03,264 (trainer:732) INFO: 53epoch:train:4401-4500batch: iter_time=1.051e-04, forward_time=0.146, loss_ctc=63.278, loss_att=44.313, acc=0.735, loss=50.003, backward_time=1.027, grad_norm=155.846, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.930e-05, train_time=2.714 +[gpub001:0/64] 2023-07-15 11:40:20,170 (trainer:732) INFO: 53epoch:train:4501-4600batch: iter_time=1.002e-04, forward_time=0.145, loss_ctc=75.680, loss_att=58.305, acc=0.715, loss=63.517, backward_time=1.032, grad_norm=150.291, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.929e-05, train_time=2.738 +[gpub001:0/64] 2023-07-15 11:42:36,442 (trainer:732) INFO: 53epoch:train:4601-4700batch: iter_time=1.032e-04, forward_time=0.144, loss_ctc=65.197, loss_att=48.186, acc=0.727, loss=53.289, backward_time=1.027, grad_norm=138.175, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.929e-05, train_time=2.725 +[gpub001:0/64] 2023-07-15 11:44:54,859 (trainer:732) INFO: 53epoch:train:4701-4800batch: iter_time=9.797e-05, forward_time=0.145, loss_ctc=64.764, loss_att=45.735, acc=0.737, loss=51.444, backward_time=1.029, grad_norm=220.036, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.928e-05, train_time=2.768 +[gpub001:0/64] 2023-07-15 11:47:16,838 (trainer:732) INFO: 53epoch:train:4801-4900batch: iter_time=1.102e-04, forward_time=0.146, loss_ctc=70.186, loss_att=49.914, acc=0.714, loss=55.995, backward_time=1.034, grad_norm=126.685, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.928e-05, train_time=2.839 +[gpub001:0/64] 2023-07-15 11:49:35,518 (trainer:732) INFO: 53epoch:train:4901-5000batch: iter_time=1.101e-04, forward_time=0.144, loss_ctc=63.751, loss_att=47.279, acc=0.720, loss=52.221, backward_time=1.036, grad_norm=128.634, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.927e-05, train_time=2.773 +[gpub001:0/64] 2023-07-15 11:49:40,092 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub001:0/64] 2023-07-15 11:49:58,371 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 11:50:01,785 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 11:50:01,785 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub001:0/64] 2023-07-15 11:50:01,791 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 11:56:06,405 (trainer:732) INFO: 53epoch:train:5001-5100batch: iter_time=1.688, forward_time=0.158, loss_ctc=76.559, loss_att=57.521, acc=0.703, loss=63.233, backward_time=1.040, grad_norm=125.506, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.927e-05, train_time=7.818 +[gpub001:0/64] 2023-07-15 11:58:22,014 (trainer:732) INFO: 53epoch:train:5101-5200batch: iter_time=1.050e-04, forward_time=0.145, loss_ctc=69.719, loss_att=48.716, acc=0.715, loss=55.017, backward_time=1.026, grad_norm=133.259, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.926e-05, train_time=2.712 +[gpub001:0/64] 2023-07-15 12:00:37,642 (trainer:732) INFO: 53epoch:train:5201-5300batch: iter_time=1.061e-04, forward_time=0.145, loss_ctc=66.930, loss_att=46.500, acc=0.734, loss=52.629, backward_time=1.027, grad_norm=125.915, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.926e-05, train_time=2.712 +[gpub001:0/64] 2023-07-15 12:02:53,478 (trainer:732) INFO: 53epoch:train:5301-5400batch: iter_time=1.105e-04, forward_time=0.146, loss_ctc=73.301, loss_att=55.694, acc=0.715, loss=60.976, backward_time=1.028, grad_norm=137.805, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.925e-05, train_time=2.717 +[gpub001:0/64] 2023-07-15 12:05:09,897 (trainer:732) INFO: 53epoch:train:5401-5500batch: iter_time=1.072e-04, forward_time=0.146, loss_ctc=67.056, loss_att=49.311, acc=0.723, loss=54.635, backward_time=1.028, grad_norm=120.485, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.925e-05, train_time=2.728 +[gpub001:0/64] 2023-07-15 12:07:26,024 (trainer:732) INFO: 53epoch:train:5501-5600batch: iter_time=1.192e-04, forward_time=0.147, loss_ctc=63.690, loss_att=46.252, acc=0.732, loss=51.484, backward_time=1.029, grad_norm=139.681, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.924e-05, train_time=2.722 +[gpub001:0/64] 2023-07-15 12:09:41,948 (trainer:732) INFO: 53epoch:train:5601-5700batch: iter_time=1.552e-04, forward_time=0.147, loss_ctc=70.104, loss_att=52.001, acc=0.712, loss=57.432, backward_time=1.028, grad_norm=107.690, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.924e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 12:11:58,251 (trainer:732) INFO: 53epoch:train:5701-5800batch: iter_time=1.338e-04, forward_time=0.148, loss_ctc=63.833, loss_att=47.985, acc=0.723, loss=52.739, backward_time=1.029, grad_norm=129.181, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.923e-05, train_time=2.726 +[gpub001:0/64] 2023-07-15 12:12:53,156 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub001:0/64] 2023-07-15 12:13:10,963 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 12:13:14,420 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 12:13:14,420 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub001:0/64] 2023-07-15 12:13:14,439 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 12:18:21,859 (trainer:732) INFO: 53epoch:train:5801-5900batch: iter_time=1.831, forward_time=0.146, loss_ctc=71.893, loss_att=49.578, acc=0.732, loss=56.273, backward_time=1.038, grad_norm=140.020, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.923e-05, train_time=7.672 +[gpub001:0/64] 2023-07-15 12:20:39,651 (trainer:732) INFO: 53epoch:train:5901-6000batch: iter_time=1.334e-04, forward_time=0.153, loss_ctc=77.718, loss_att=60.620, acc=0.708, loss=65.749, backward_time=1.031, grad_norm=115.613, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.922e-05, train_time=2.756 +[gpub001:0/64] 2023-07-15 12:22:55,460 (trainer:732) INFO: 53epoch:train:6001-6100batch: iter_time=1.329e-04, forward_time=0.147, loss_ctc=66.395, loss_att=47.215, acc=0.723, loss=52.969, backward_time=1.028, grad_norm=119.839, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.922e-05, train_time=2.716 +[gpub001:0/64] 2023-07-15 12:25:14,502 (trainer:732) INFO: 53epoch:train:6101-6200batch: iter_time=0.003, forward_time=0.146, loss_ctc=73.384, loss_att=54.398, acc=0.735, loss=60.094, backward_time=1.038, grad_norm=134.904, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.921e-05, train_time=2.781 +[gpub001:0/64] 2023-07-15 12:27:31,260 (trainer:732) INFO: 53epoch:train:6201-6300batch: iter_time=1.497e-04, forward_time=0.148, loss_ctc=68.480, loss_att=49.380, acc=0.721, loss=55.110, backward_time=1.029, grad_norm=133.346, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.921e-05, train_time=2.735 +[gpub001:0/64] 2023-07-15 12:29:47,555 (trainer:732) INFO: 53epoch:train:6301-6400batch: iter_time=9.751e-05, forward_time=0.147, loss_ctc=64.980, loss_att=48.650, acc=0.736, loss=53.549, backward_time=1.029, grad_norm=109.673, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.920e-05, train_time=2.726 +[gpub001:0/64] 2023-07-15 12:32:03,537 (trainer:732) INFO: 53epoch:train:6401-6500batch: iter_time=9.952e-05, forward_time=0.147, loss_ctc=64.249, loss_att=47.186, acc=0.732, loss=52.305, backward_time=1.029, grad_norm=131.597, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.920e-05, train_time=2.719 +[gpub001:0/64] 2023-07-15 12:34:25,293 (trainer:732) INFO: 53epoch:train:6501-6600batch: iter_time=6.416e-04, forward_time=0.158, loss_ctc=70.757, loss_att=53.176, acc=0.719, loss=58.450, backward_time=1.030, grad_norm=142.896, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=4.920e-05, train_time=2.835 +[gpub001:0/64] 2023-07-15 12:36:07,690 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub001:0/64] 2023-07-15 12:36:25,678 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 12:36:29,157 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 12:36:29,157 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub001:0/64] 2023-07-15 12:36:29,164 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 12:42:08,942 (trainer:732) INFO: 53epoch:train:6601-6700batch: iter_time=1.626, forward_time=0.195, loss_ctc=66.551, loss_att=48.083, acc=0.738, loss=53.624, backward_time=1.040, grad_norm=118.665, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.919e-05, train_time=9.271 +[gpub001:0/64] 2023-07-15 12:44:26,138 (trainer:732) INFO: 53epoch:train:6701-6800batch: iter_time=1.118e-04, forward_time=0.145, loss_ctc=72.189, loss_att=55.718, acc=0.714, loss=60.660, backward_time=1.030, grad_norm=123.601, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.919e-05, train_time=2.745 +[gpub001:0/64] 2023-07-15 12:46:42,970 (trainer:732) INFO: 53epoch:train:6801-6900batch: iter_time=1.227e-04, forward_time=0.147, loss_ctc=74.928, loss_att=53.819, acc=0.718, loss=60.152, backward_time=1.032, grad_norm=125.290, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.918e-05, train_time=2.736 +[gpub001:0/64] 2023-07-15 12:48:58,893 (trainer:732) INFO: 53epoch:train:6901-7000batch: iter_time=1.136e-04, forward_time=0.146, loss_ctc=63.121, loss_att=45.095, acc=0.738, loss=50.503, backward_time=1.027, grad_norm=109.171, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.918e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 12:51:14,725 (trainer:732) INFO: 53epoch:train:7001-7100batch: iter_time=1.081e-04, forward_time=0.145, loss_ctc=77.407, loss_att=58.128, acc=0.716, loss=63.911, backward_time=1.029, grad_norm=119.319, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.917e-05, train_time=2.716 +[gpub001:0/64] 2023-07-15 12:53:30,552 (trainer:732) INFO: 53epoch:train:7101-7200batch: iter_time=1.094e-04, forward_time=0.147, loss_ctc=64.425, loss_att=47.865, acc=0.737, loss=52.833, backward_time=1.028, grad_norm=136.329, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.917e-05, train_time=2.716 +[gpub001:0/64] 2023-07-15 12:55:46,313 (trainer:732) INFO: 53epoch:train:7201-7300batch: iter_time=1.091e-04, forward_time=0.147, loss_ctc=64.912, loss_att=45.342, acc=0.743, loss=51.213, backward_time=1.029, grad_norm=110.998, clip=100.000, loss_scale=2.499e+32, optim_step_time=0.182, optim0_lr0=4.916e-05, train_time=2.715 +[gpub001:0/64] 2023-07-15 12:58:02,139 (trainer:732) INFO: 53epoch:train:7301-7400batch: iter_time=1.144e-04, forward_time=0.147, loss_ctc=70.223, loss_att=50.001, acc=0.723, loss=56.068, backward_time=1.028, grad_norm=128.640, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.916e-05, train_time=2.716 +[gpub001:0/64] 2023-07-15 13:00:18,214 (trainer:732) INFO: 53epoch:train:7401-7500batch: iter_time=1.159e-04, forward_time=0.148, loss_ctc=62.811, loss_att=47.105, acc=0.732, loss=51.817, backward_time=1.030, grad_norm=123.147, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.915e-05, train_time=2.721 +[gpub001:0/64] 2023-07-15 13:00:22,641 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub001:0/64] 2023-07-15 13:00:40,613 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 13:00:44,325 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 13:00:44,325 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub001:0/64] 2023-07-15 13:00:44,332 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 13:06:53,659 (trainer:732) INFO: 53epoch:train:7501-7600batch: iter_time=1.682, forward_time=0.153, loss_ctc=71.689, loss_att=50.993, acc=0.731, loss=57.202, backward_time=1.056, grad_norm=110.911, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.915e-05, train_time=7.909 +[gpub001:0/64] 2023-07-15 13:09:10,347 (trainer:732) INFO: 53epoch:train:7601-7700batch: iter_time=1.160e-04, forward_time=0.146, loss_ctc=75.829, loss_att=57.328, acc=0.712, loss=62.878, backward_time=1.028, grad_norm=137.383, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.914e-05, train_time=2.734 +[gpub001:0/64] 2023-07-15 13:11:26,829 (trainer:732) INFO: 53epoch:train:7701-7800batch: iter_time=1.304e-04, forward_time=0.148, loss_ctc=65.217, loss_att=45.189, acc=0.736, loss=51.198, backward_time=1.029, grad_norm=122.205, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.914e-05, train_time=2.729 +[gpub001:0/64] 2023-07-15 13:13:42,554 (trainer:732) INFO: 53epoch:train:7801-7900batch: iter_time=1.287e-04, forward_time=0.146, loss_ctc=74.146, loss_att=54.219, acc=0.730, loss=60.197, backward_time=1.028, grad_norm=119.101, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.913e-05, train_time=2.714 +[gpub001:0/64] 2023-07-15 13:15:58,492 (trainer:732) INFO: 53epoch:train:7901-8000batch: iter_time=1.298e-04, forward_time=0.147, loss_ctc=64.820, loss_att=50.432, acc=0.736, loss=54.748, backward_time=1.029, grad_norm=136.726, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.913e-05, train_time=2.719 +[gpub001:0/64] 2023-07-15 13:18:14,196 (trainer:732) INFO: 53epoch:train:8001-8100batch: iter_time=1.249e-04, forward_time=0.147, loss_ctc=65.159, loss_att=46.648, acc=0.739, loss=52.201, backward_time=1.027, grad_norm=129.032, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.912e-05, train_time=2.714 +[gpub001:0/64] 2023-07-15 13:20:30,190 (trainer:732) INFO: 53epoch:train:8101-8200batch: iter_time=1.194e-04, forward_time=0.148, loss_ctc=67.450, loss_att=49.032, acc=0.722, loss=54.557, backward_time=1.029, grad_norm=149.096, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.912e-05, train_time=2.720 +[gpub001:0/64] 2023-07-15 13:22:47,766 (trainer:732) INFO: 53epoch:train:8201-8300batch: iter_time=1.230e-04, forward_time=0.147, loss_ctc=69.780, loss_att=53.305, acc=0.725, loss=58.248, backward_time=1.029, grad_norm=135.432, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.911e-05, train_time=2.751 +[gpub001:0/64] 2023-07-15 13:23:44,787 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub001:0/64] 2023-07-15 13:24:02,966 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 13:24:06,390 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 13:24:06,390 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub001:0/64] 2023-07-15 13:24:06,396 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 13:30:26,163 (trainer:732) INFO: 53epoch:train:8301-8400batch: iter_time=2.133, forward_time=0.174, loss_ctc=68.152, loss_att=51.251, acc=0.719, loss=56.321, backward_time=1.042, grad_norm=127.178, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.911e-05, train_time=9.168 +[gpub001:0/64] 2023-07-15 13:32:43,842 (trainer:732) INFO: 53epoch:train:8401-8500batch: iter_time=1.123e-04, forward_time=0.147, loss_ctc=75.378, loss_att=53.951, acc=0.719, loss=60.379, backward_time=1.030, grad_norm=120.972, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.910e-05, train_time=2.753 +[gpub001:0/64] 2023-07-15 13:35:00,654 (trainer:732) INFO: 53epoch:train:8501-8600batch: iter_time=1.160e-04, forward_time=0.147, loss_ctc=63.746, loss_att=45.123, acc=0.736, loss=50.710, backward_time=1.027, grad_norm=118.053, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.910e-05, train_time=2.736 +[gpub001:0/64] 2023-07-15 13:37:16,881 (trainer:732) INFO: 53epoch:train:8601-8700batch: iter_time=1.145e-04, forward_time=0.145, loss_ctc=72.822, loss_att=55.363, acc=0.722, loss=60.601, backward_time=1.029, grad_norm=139.587, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.910e-05, train_time=2.724 +[gpub001:0/64] 2023-07-15 13:39:32,720 (trainer:732) INFO: 53epoch:train:8701-8800batch: iter_time=1.193e-04, forward_time=0.146, loss_ctc=62.997, loss_att=46.111, acc=0.740, loss=51.177, backward_time=1.028, grad_norm=115.444, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.909e-05, train_time=2.717 +[gpub001:0/64] 2023-07-15 13:41:48,463 (trainer:732) INFO: 53epoch:train:8801-8900batch: iter_time=1.169e-04, forward_time=0.147, loss_ctc=66.723, loss_att=46.736, acc=0.734, loss=52.732, backward_time=1.028, grad_norm=102.729, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.909e-05, train_time=2.715 +[gpub001:0/64] 2023-07-15 13:44:04,390 (trainer:732) INFO: 53epoch:train:8901-9000batch: iter_time=1.211e-04, forward_time=0.147, loss_ctc=65.966, loss_att=49.188, acc=0.724, loss=54.222, backward_time=1.029, grad_norm=110.737, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.908e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 13:46:20,504 (trainer:732) INFO: 53epoch:train:9001-9100batch: iter_time=1.170e-04, forward_time=0.147, loss_ctc=67.534, loss_att=51.762, acc=0.723, loss=56.493, backward_time=1.030, grad_norm=127.205, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.908e-05, train_time=2.722 +[gpub001:0/64] 2023-07-15 13:47:59,767 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub001:0/64] 2023-07-15 13:48:18,017 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 13:48:21,456 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 13:48:21,456 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub001:0/64] 2023-07-15 13:48:21,462 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 13:52:48,491 (trainer:732) INFO: 53epoch:train:9101-9200batch: iter_time=1.496, forward_time=0.181, loss_ctc=70.548, loss_att=52.593, acc=0.730, loss=57.980, backward_time=1.037, grad_norm=137.642, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.907e-05, train_time=7.758 +[gpub001:0/64] 2023-07-15 13:55:08,582 (trainer:732) INFO: 53epoch:train:9201-9300batch: iter_time=1.120e-04, forward_time=0.145, loss_ctc=71.911, loss_att=54.358, acc=0.714, loss=59.624, backward_time=1.034, grad_norm=117.736, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.907e-05, train_time=2.803 +[gpub001:0/64] 2023-07-15 13:57:24,812 (trainer:732) INFO: 53epoch:train:9301-9400batch: iter_time=1.046e-04, forward_time=0.144, loss_ctc=75.774, loss_att=53.635, acc=0.711, loss=60.277, backward_time=1.026, grad_norm=116.262, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.906e-05, train_time=2.724 +[gpub001:0/64] 2023-07-15 13:59:47,479 (trainer:732) INFO: 53epoch:train:9401-9500batch: iter_time=1.084e-04, forward_time=0.145, loss_ctc=63.534, loss_att=44.227, acc=0.735, loss=50.019, backward_time=1.037, grad_norm=112.332, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.906e-05, train_time=2.853 +[gpub001:0/64] 2023-07-15 14:02:07,901 (trainer:732) INFO: 53epoch:train:9501-9600batch: iter_time=1.118e-04, forward_time=0.146, loss_ctc=75.989, loss_att=57.625, acc=0.716, loss=63.134, backward_time=1.035, grad_norm=128.818, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.905e-05, train_time=2.808 +[gpub001:0/64] 2023-07-15 14:04:30,560 (trainer:732) INFO: 53epoch:train:9601-9700batch: iter_time=1.133e-04, forward_time=0.145, loss_ctc=64.849, loss_att=47.746, acc=0.733, loss=52.877, backward_time=1.032, grad_norm=145.957, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.905e-05, train_time=2.853 +[gpub001:0/64] 2023-07-15 14:06:49,135 (trainer:732) INFO: 53epoch:train:9701-9800batch: iter_time=1.010e-04, forward_time=0.145, loss_ctc=64.302, loss_att=46.018, acc=0.737, loss=51.503, backward_time=1.028, grad_norm=115.494, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.904e-05, train_time=2.771 +[gpub001:0/64] 2023-07-15 14:09:06,265 (trainer:732) INFO: 53epoch:train:9801-9900batch: iter_time=1.128e-04, forward_time=0.146, loss_ctc=70.320, loss_att=50.257, acc=0.719, loss=56.276, backward_time=1.032, grad_norm=123.049, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.904e-05, train_time=2.742 +[gpub001:0/64] 2023-07-15 14:11:24,322 (trainer:732) INFO: 53epoch:train:9901-10000batch: iter_time=1.171e-04, forward_time=0.145, loss_ctc=62.892, loss_att=47.066, acc=0.722, loss=51.814, backward_time=1.028, grad_norm=133.856, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.903e-05, train_time=2.761 +[gpub001:0/64] 2023-07-15 14:25:24,779 (trainer:338) INFO: 53epoch results: [train] iter_time=0.246, forward_time=0.155, loss_ctc=69.208, loss_att=50.743, acc=0.723, loss=56.282, backward_time=1.033, grad_norm=130.642, clip=100.000, loss_scale=2.290e+32, optim_step_time=0.182, optim0_lr0=4.927e-05, train_time=3.453, time=4 hours, 48 minutes and 11.54 seconds, total_count=500000, gpu_max_cached_mem_GB=37.635, [valid] loss_ctc=43.074, cer_ctc=0.248, loss_att=38.316, acc=0.682, cer=0.391, wer=0.994, loss=39.744, time=7 minutes and 22.59 seconds, total_count=51106, gpu_max_cached_mem_GB=37.635, [att_plot] time=6 minutes and 14.16 seconds, total_count=0, gpu_max_cached_mem_GB=37.635 +[gpub001:0/64] 2023-07-15 14:25:40,654 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub001:0/64] 2023-07-15 14:25:40,674 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/48epoch.pth +[gpub001:0/64] 2023-07-15 14:25:40,674 (trainer:272) INFO: 54/60epoch started. Estimated time to finish: 1 day, 10 hours and 52 minutes +[gpub001:0/64] 2023-07-15 14:25:40,788 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub001:0/64] 2023-07-15 14:25:59,055 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 14:26:03,030 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 14:26:03,030 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub001:0/64] 2023-07-15 14:26:03,051 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 14:32:22,669 (trainer:732) INFO: 54epoch:train:1-100batch: iter_time=2.563, forward_time=0.175, loss_ctc=63.502, loss_att=48.578, acc=0.708, loss=53.055, backward_time=1.049, grad_norm=140.341, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.187, optim0_lr0=4.903e-05, train_time=8.037 +[gpub001:0/64] 2023-07-15 14:34:38,802 (trainer:732) INFO: 54epoch:train:101-200batch: iter_time=1.105e-04, forward_time=0.145, loss_ctc=62.392, loss_att=47.095, acc=0.700, loss=51.684, backward_time=1.029, grad_norm=120.836, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.902e-05, train_time=2.723 +[gpub001:0/64] 2023-07-15 14:36:54,626 (trainer:732) INFO: 54epoch:train:201-300batch: iter_time=9.869e-05, forward_time=0.145, loss_ctc=72.046, loss_att=53.141, acc=0.703, loss=58.812, backward_time=1.028, grad_norm=147.156, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.902e-05, train_time=2.716 +[gpub001:0/64] 2023-07-15 14:39:10,400 (trainer:732) INFO: 54epoch:train:301-400batch: iter_time=1.070e-04, forward_time=0.144, loss_ctc=68.492, loss_att=54.163, acc=0.695, loss=58.462, backward_time=1.028, grad_norm=132.331, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.902e-05, train_time=2.715 +[gpub001:0/64] 2023-07-15 14:41:40,557 (trainer:732) INFO: 54epoch:train:401-500batch: iter_time=1.053e-04, forward_time=0.144, loss_ctc=74.081, loss_att=54.631, acc=0.712, loss=60.466, backward_time=1.039, grad_norm=142.817, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.901e-05, train_time=3.003 +[gpub001:0/64] 2023-07-15 14:44:08,475 (trainer:732) INFO: 54epoch:train:501-600batch: iter_time=1.074e-04, forward_time=0.146, loss_ctc=75.789, loss_att=56.471, acc=0.714, loss=62.266, backward_time=1.041, grad_norm=129.097, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.901e-05, train_time=2.958 +[gpub001:0/64] 2023-07-15 14:46:25,669 (trainer:732) INFO: 54epoch:train:601-700batch: iter_time=1.051e-04, forward_time=0.145, loss_ctc=80.391, loss_att=63.078, acc=0.698, loss=68.272, backward_time=1.030, grad_norm=144.568, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.900e-05, train_time=2.744 +[gpub001:0/64] 2023-07-15 14:48:45,282 (trainer:732) INFO: 54epoch:train:701-800batch: iter_time=9.712e-05, forward_time=0.144, loss_ctc=76.898, loss_att=54.873, acc=0.711, loss=61.480, backward_time=1.034, grad_norm=129.027, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.900e-05, train_time=2.792 +[gpub001:0/64] 2023-07-15 14:49:40,812 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub001:0/64] 2023-07-15 14:49:58,541 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 14:50:02,149 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 14:50:02,149 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub001:0/64] 2023-07-15 14:50:02,155 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 14:55:15,387 (trainer:732) INFO: 54epoch:train:801-900batch: iter_time=1.314, forward_time=0.146, loss_ctc=67.481, loss_att=53.521, acc=0.714, loss=57.709, backward_time=1.050, grad_norm=123.737, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.899e-05, train_time=7.802 +[gpub001:0/64] 2023-07-15 14:57:33,121 (trainer:732) INFO: 54epoch:train:901-1000batch: iter_time=1.229e-04, forward_time=0.147, loss_ctc=61.392, loss_att=45.351, acc=0.703, loss=50.163, backward_time=1.028, grad_norm=131.178, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.899e-05, train_time=2.754 +[gpub001:0/64] 2023-07-15 14:59:32,594 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub001:0/64] 2023-07-15 14:59:48,916 (trainer:732) INFO: 54epoch:train:1001-1100batch: iter_time=1.176e-04, forward_time=0.148, loss_ctc=65.249, loss_att=46.994, acc=0.724, loss=52.470, backward_time=1.028, grad_norm=120.404, clip=100.000, loss_scale=3.047e+32, optim_step_time=0.182, optim0_lr0=4.898e-05, train_time=2.716 +[gpub001:0/64] 2023-07-15 15:02:05,028 (trainer:732) INFO: 54epoch:train:1101-1200batch: iter_time=1.215e-04, forward_time=0.147, loss_ctc=68.386, loss_att=53.369, acc=0.710, loss=57.874, backward_time=1.030, grad_norm=121.179, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.898e-05, train_time=2.722 +[gpub001:0/64] 2023-07-15 15:04:21,637 (trainer:732) INFO: 54epoch:train:1201-1300batch: iter_time=1.120e-04, forward_time=0.148, loss_ctc=72.716, loss_att=55.502, acc=0.716, loss=60.666, backward_time=1.033, grad_norm=135.679, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.897e-05, train_time=2.732 +[gpub001:0/64] 2023-07-15 15:06:37,386 (trainer:732) INFO: 54epoch:train:1301-1400batch: iter_time=1.122e-04, forward_time=0.147, loss_ctc=70.008, loss_att=51.346, acc=0.719, loss=56.945, backward_time=1.029, grad_norm=128.324, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.897e-05, train_time=2.715 +[gpub001:0/64] 2023-07-15 15:08:54,748 (trainer:732) INFO: 54epoch:train:1401-1500batch: iter_time=1.246e-04, forward_time=0.149, loss_ctc=83.968, loss_att=69.519, acc=0.699, loss=73.854, backward_time=1.032, grad_norm=138.800, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.896e-05, train_time=2.747 +[gpub001:0/64] 2023-07-15 15:11:10,946 (trainer:732) INFO: 54epoch:train:1501-1600batch: iter_time=1.227e-04, forward_time=0.148, loss_ctc=72.417, loss_att=52.717, acc=0.726, loss=58.627, backward_time=1.031, grad_norm=125.644, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.896e-05, train_time=2.724 +[gpub001:0/64] 2023-07-15 15:12:42,398 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub001:0/64] 2023-07-15 15:13:00,574 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 15:13:04,259 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 15:13:04,259 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub001:0/64] 2023-07-15 15:13:04,265 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 15:17:27,833 (trainer:732) INFO: 54epoch:train:1601-1700batch: iter_time=1.373, forward_time=0.166, loss_ctc=71.162, loss_att=54.576, acc=0.714, loss=59.552, backward_time=1.038, grad_norm=130.731, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.186, optim0_lr0=4.895e-05, train_time=7.537 +[gpub001:0/64] 2023-07-15 15:19:44,876 (trainer:732) INFO: 54epoch:train:1701-1800batch: iter_time=1.002e-04, forward_time=0.146, loss_ctc=57.674, loss_att=42.891, acc=0.710, loss=47.326, backward_time=1.033, grad_norm=146.138, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.895e-05, train_time=2.741 +[gpub001:0/64] 2023-07-15 15:22:00,770 (trainer:732) INFO: 54epoch:train:1801-1900batch: iter_time=1.005e-04, forward_time=0.145, loss_ctc=69.982, loss_att=52.765, acc=0.709, loss=57.930, backward_time=1.029, grad_norm=110.570, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.894e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 15:24:16,473 (trainer:732) INFO: 54epoch:train:1901-2000batch: iter_time=1.014e-04, forward_time=0.145, loss_ctc=68.795, loss_att=50.053, acc=0.721, loss=55.676, backward_time=1.027, grad_norm=128.134, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.894e-05, train_time=2.714 +[gpub001:0/64] 2023-07-15 15:26:32,120 (trainer:732) INFO: 54epoch:train:2001-2100batch: iter_time=1.016e-04, forward_time=0.144, loss_ctc=71.032, loss_att=54.634, acc=0.702, loss=59.553, backward_time=1.026, grad_norm=146.644, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.894e-05, train_time=2.713 +[gpub001:0/64] 2023-07-15 15:28:47,771 (trainer:732) INFO: 54epoch:train:2101-2200batch: iter_time=1.116e-04, forward_time=0.144, loss_ctc=69.011, loss_att=51.902, acc=0.714, loss=57.035, backward_time=1.027, grad_norm=146.707, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.893e-05, train_time=2.713 +[gpub001:0/64] 2023-07-15 15:31:09,057 (trainer:732) INFO: 54epoch:train:2201-2300batch: iter_time=1.051e-04, forward_time=0.145, loss_ctc=77.368, loss_att=56.839, acc=0.720, loss=62.997, backward_time=1.037, grad_norm=146.981, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.893e-05, train_time=2.826 +[gpub001:0/64] 2023-07-15 15:33:27,573 (trainer:732) INFO: 54epoch:train:2301-2400batch: iter_time=1.123e-04, forward_time=0.145, loss_ctc=72.933, loss_att=60.662, acc=0.700, loss=64.343, backward_time=1.031, grad_norm=140.913, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.892e-05, train_time=2.770 +[gpub001:0/64] 2023-07-15 15:36:01,415 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub001:0/64] 2023-07-15 15:36:19,553 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 15:36:23,193 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 15:36:23,194 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub001:0/64] 2023-07-15 15:36:23,200 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 15:39:55,199 (trainer:732) INFO: 54epoch:train:2401-2500batch: iter_time=2.445, forward_time=0.145, loss_ctc=74.369, loss_att=54.412, acc=0.716, loss=60.399, backward_time=1.038, grad_norm=132.718, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.892e-05, train_time=7.752 +[gpub001:0/64] 2023-07-15 15:42:12,710 (trainer:732) INFO: 54epoch:train:2501-2600batch: iter_time=1.524e-04, forward_time=0.147, loss_ctc=59.869, loss_att=46.039, acc=0.696, loss=50.188, backward_time=1.033, grad_norm=131.162, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.891e-05, train_time=2.750 +[gpub001:0/64] 2023-07-15 15:44:29,535 (trainer:732) INFO: 54epoch:train:2601-2700batch: iter_time=1.293e-04, forward_time=0.147, loss_ctc=65.703, loss_att=49.009, acc=0.712, loss=54.017, backward_time=1.025, grad_norm=128.350, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.891e-05, train_time=2.736 +[gpub001:0/64] 2023-07-15 15:46:45,197 (trainer:732) INFO: 54epoch:train:2701-2800batch: iter_time=1.495e-04, forward_time=0.147, loss_ctc=68.702, loss_att=53.287, acc=0.707, loss=57.911, backward_time=1.027, grad_norm=154.384, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.890e-05, train_time=2.713 +[gpub001:0/64] 2023-07-15 15:49:01,013 (trainer:732) INFO: 54epoch:train:2801-2900batch: iter_time=1.303e-04, forward_time=0.147, loss_ctc=73.798, loss_att=55.447, acc=0.706, loss=60.952, backward_time=1.029, grad_norm=127.920, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.890e-05, train_time=2.716 +[gpub001:0/64] 2023-07-15 15:49:11,705 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub001:0/64] 2023-07-15 15:51:16,520 (trainer:732) INFO: 54epoch:train:2901-3000batch: iter_time=1.349e-04, forward_time=0.146, loss_ctc=67.917, loss_att=49.080, acc=0.724, loss=54.731, backward_time=1.029, grad_norm=136.985, clip=100.000, loss_scale=8.610e+31, optim_step_time=0.182, optim0_lr0=4.889e-05, train_time=2.710 +[gpub001:0/64] 2023-07-15 15:53:32,664 (trainer:732) INFO: 54epoch:train:3001-3100batch: iter_time=1.471e-04, forward_time=0.148, loss_ctc=81.457, loss_att=61.670, acc=0.702, loss=67.606, backward_time=1.030, grad_norm=170.754, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.889e-05, train_time=2.723 +[gpub001:0/64] 2023-07-15 15:55:52,060 (trainer:732) INFO: 54epoch:train:3101-3200batch: iter_time=1.276e-04, forward_time=0.147, loss_ctc=74.030, loss_att=59.359, acc=0.705, loss=63.760, backward_time=1.030, grad_norm=138.690, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.888e-05, train_time=2.788 +[gpub001:0/64] 2023-07-15 15:58:14,921 (trainer:732) INFO: 54epoch:train:3201-3300batch: iter_time=1.442e-04, forward_time=0.146, loss_ctc=71.516, loss_att=52.026, acc=0.713, loss=57.873, backward_time=1.035, grad_norm=134.585, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.888e-05, train_time=2.857 +[gpub001:0/64] 2023-07-15 15:59:06,473 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub001:0/64] 2023-07-15 15:59:24,546 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 15:59:28,036 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 15:59:28,036 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub001:0/64] 2023-07-15 15:59:28,042 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 16:04:12,014 (trainer:732) INFO: 54epoch:train:3301-3400batch: iter_time=1.395, forward_time=0.180, loss_ctc=67.158, loss_att=49.393, acc=0.713, loss=54.722, backward_time=1.042, grad_norm=131.945, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.887e-05, train_time=7.141 +[gpub001:0/64] 2023-07-15 16:06:28,728 (trainer:732) INFO: 54epoch:train:3401-3500batch: iter_time=9.250e-05, forward_time=0.146, loss_ctc=65.344, loss_att=46.346, acc=0.721, loss=52.046, backward_time=1.028, grad_norm=126.382, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.887e-05, train_time=2.735 +[gpub001:0/64] 2023-07-15 16:08:45,609 (trainer:732) INFO: 54epoch:train:3501-3600batch: iter_time=9.346e-05, forward_time=0.147, loss_ctc=69.324, loss_att=54.156, acc=0.716, loss=58.706, backward_time=1.030, grad_norm=127.329, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.887e-05, train_time=2.737 +[gpub001:0/64] 2023-07-15 16:11:02,343 (trainer:732) INFO: 54epoch:train:3601-3700batch: iter_time=9.335e-05, forward_time=0.146, loss_ctc=68.249, loss_att=52.531, acc=0.713, loss=57.246, backward_time=1.034, grad_norm=131.595, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.886e-05, train_time=2.734 +[gpub001:0/64] 2023-07-15 16:13:18,306 (trainer:732) INFO: 54epoch:train:3701-3800batch: iter_time=9.645e-05, forward_time=0.146, loss_ctc=67.231, loss_att=51.222, acc=0.716, loss=56.025, backward_time=1.030, grad_norm=145.759, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.886e-05, train_time=2.719 +[gpub001:0/64] 2023-07-15 16:15:35,928 (trainer:732) INFO: 54epoch:train:3801-3900batch: iter_time=9.782e-05, forward_time=0.146, loss_ctc=76.405, loss_att=57.298, acc=0.718, loss=63.030, backward_time=1.030, grad_norm=118.733, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.885e-05, train_time=2.752 +[gpub001:0/64] 2023-07-15 16:17:53,102 (trainer:732) INFO: 54epoch:train:3901-4000batch: iter_time=9.738e-05, forward_time=0.146, loss_ctc=77.861, loss_att=59.602, acc=0.715, loss=65.080, backward_time=1.032, grad_norm=143.778, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.885e-05, train_time=2.743 +[gpub001:0/64] 2023-07-15 16:20:12,772 (trainer:732) INFO: 54epoch:train:4001-4100batch: iter_time=9.171e-05, forward_time=0.146, loss_ctc=71.078, loss_att=52.212, acc=0.724, loss=57.872, backward_time=1.031, grad_norm=122.391, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.884e-05, train_time=2.793 +[gpub001:0/64] 2023-07-15 16:21:56,724 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub001:0/64] 2023-07-15 16:22:14,655 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 16:22:18,028 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 16:22:18,028 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub001:0/64] 2023-07-15 16:22:18,035 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 16:29:57,478 (trainer:732) INFO: 54epoch:train:4101-4200batch: iter_time=4.421, forward_time=0.186, loss_ctc=72.658, loss_att=54.839, acc=0.713, loss=60.185, backward_time=1.041, grad_norm=113.223, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.185, optim0_lr0=4.884e-05, train_time=11.694 +[gpub001:0/64] 2023-07-15 16:32:14,169 (trainer:732) INFO: 54epoch:train:4201-4300batch: iter_time=1.315e-04, forward_time=0.149, loss_ctc=62.009, loss_att=45.248, acc=0.711, loss=50.276, backward_time=1.028, grad_norm=132.170, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.883e-05, train_time=2.734 +[gpub001:0/64] 2023-07-15 16:34:31,503 (trainer:732) INFO: 54epoch:train:4301-4400batch: iter_time=1.108e-04, forward_time=0.146, loss_ctc=68.740, loss_att=51.311, acc=0.716, loss=56.540, backward_time=1.028, grad_norm=148.146, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=4.883e-05, train_time=2.746 +[gpub001:0/64] 2023-07-15 16:36:47,096 (trainer:732) INFO: 54epoch:train:4401-4500batch: iter_time=1.169e-04, forward_time=0.145, loss_ctc=64.509, loss_att=49.340, acc=0.715, loss=53.891, backward_time=1.026, grad_norm=142.094, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.882e-05, train_time=2.712 +[gpub001:0/64] 2023-07-15 16:39:14,472 (trainer:732) INFO: 54epoch:train:4501-4600batch: iter_time=5.487e-04, forward_time=0.188, loss_ctc=71.996, loss_att=52.884, acc=0.721, loss=58.617, backward_time=1.057, grad_norm=114.514, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.191, optim0_lr0=4.882e-05, train_time=2.945 +[gpub001:0/64] 2023-07-15 16:41:40,016 (trainer:732) INFO: 54epoch:train:4601-4700batch: iter_time=1.099e-04, forward_time=0.216, loss_ctc=69.267, loss_att=53.635, acc=0.719, loss=58.325, backward_time=1.040, grad_norm=162.003, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.186, optim0_lr0=4.881e-05, train_time=2.913 +[gpub001:0/64] 2023-07-15 16:43:56,617 (trainer:732) INFO: 54epoch:train:4701-4800batch: iter_time=1.111e-04, forward_time=0.147, loss_ctc=79.072, loss_att=61.795, acc=0.708, loss=66.978, backward_time=1.032, grad_norm=130.009, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.881e-05, train_time=2.732 +[gpub001:0/64] 2023-07-15 16:46:13,780 (trainer:732) INFO: 54epoch:train:4801-4900batch: iter_time=1.238e-04, forward_time=0.146, loss_ctc=75.323, loss_att=51.827, acc=0.719, loss=58.876, backward_time=1.030, grad_norm=137.087, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.880e-05, train_time=2.743 +[gpub001:0/64] 2023-07-15 16:48:31,776 (trainer:732) INFO: 54epoch:train:4901-5000batch: iter_time=1.177e-04, forward_time=0.146, loss_ctc=74.927, loss_att=57.150, acc=0.708, loss=62.483, backward_time=1.033, grad_norm=132.363, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.880e-05, train_time=2.760 +[gpub001:0/64] 2023-07-15 16:48:53,054 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub001:0/64] 2023-07-15 16:49:11,204 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 16:49:14,657 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 16:49:14,657 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub001:0/64] 2023-07-15 16:49:14,723 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 16:55:54,724 (trainer:732) INFO: 54epoch:train:5001-5100batch: iter_time=2.962, forward_time=0.147, loss_ctc=58.843, loss_att=46.145, acc=0.708, loss=49.955, backward_time=1.045, grad_norm=115.373, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.880e-05, train_time=8.859 +[gpub001:0/64] 2023-07-15 16:58:11,635 (trainer:732) INFO: 54epoch:train:5101-5200batch: iter_time=1.173e-04, forward_time=0.145, loss_ctc=66.317, loss_att=46.348, acc=0.724, loss=52.339, backward_time=1.031, grad_norm=131.819, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.879e-05, train_time=2.738 +[gpub001:0/64] 2023-07-15 17:00:27,591 (trainer:732) INFO: 54epoch:train:5201-5300batch: iter_time=1.180e-04, forward_time=0.146, loss_ctc=67.278, loss_att=51.549, acc=0.714, loss=56.268, backward_time=1.029, grad_norm=135.720, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.879e-05, train_time=2.719 +[gpub001:0/64] 2023-07-15 17:02:43,684 (trainer:732) INFO: 54epoch:train:5301-5400batch: iter_time=1.163e-04, forward_time=0.147, loss_ctc=73.708, loss_att=57.069, acc=0.716, loss=62.061, backward_time=1.030, grad_norm=153.196, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.878e-05, train_time=2.722 +[gpub001:0/64] 2023-07-15 17:05:10,383 (trainer:732) INFO: 54epoch:train:5401-5500batch: iter_time=1.142e-04, forward_time=0.146, loss_ctc=68.066, loss_att=48.978, acc=0.724, loss=54.704, backward_time=1.038, grad_norm=142.518, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.878e-05, train_time=2.934 +[gpub001:0/64] 2023-07-15 17:07:31,542 (trainer:732) INFO: 54epoch:train:5501-5600batch: iter_time=1.149e-04, forward_time=0.148, loss_ctc=79.121, loss_att=60.754, acc=0.714, loss=66.264, backward_time=1.037, grad_norm=156.564, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.877e-05, train_time=2.823 +[gpub001:0/64] 2023-07-15 17:09:48,129 (trainer:732) INFO: 54epoch:train:5601-5700batch: iter_time=1.147e-04, forward_time=0.148, loss_ctc=71.339, loss_att=55.929, acc=0.723, loss=60.552, backward_time=1.033, grad_norm=154.635, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.877e-05, train_time=2.732 +[gpub001:0/64] 2023-07-15 17:12:04,010 (trainer:732) INFO: 54epoch:train:5701-5800batch: iter_time=1.086e-04, forward_time=0.147, loss_ctc=72.126, loss_att=54.273, acc=0.721, loss=59.629, backward_time=1.028, grad_norm=132.905, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.876e-05, train_time=2.717 +[gpub001:0/64] 2023-07-15 17:12:54,371 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub001:0/64] 2023-07-15 17:13:12,535 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 17:13:15,973 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 17:13:15,973 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub001:0/64] 2023-07-15 17:13:15,979 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 17:18:33,665 (trainer:732) INFO: 54epoch:train:5801-5900batch: iter_time=1.397, forward_time=0.233, loss_ctc=70.879, loss_att=52.656, acc=0.723, loss=58.123, backward_time=1.073, grad_norm=140.446, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.184, optim0_lr0=4.876e-05, train_time=7.793 +[gpub001:0/64] 2023-07-15 17:21:02,892 (trainer:732) INFO: 54epoch:train:5901-6000batch: iter_time=1.398e-04, forward_time=0.165, loss_ctc=60.858, loss_att=44.904, acc=0.700, loss=49.690, backward_time=1.040, grad_norm=132.788, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.185, optim0_lr0=4.875e-05, train_time=2.984 +[gpub001:0/64] 2023-07-15 17:23:19,553 (trainer:732) INFO: 54epoch:train:6001-6100batch: iter_time=1.350e-04, forward_time=0.145, loss_ctc=64.750, loss_att=48.839, acc=0.720, loss=53.613, backward_time=1.028, grad_norm=136.365, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.875e-05, train_time=2.733 +[gpub001:0/64] 2023-07-15 17:25:35,434 (trainer:732) INFO: 54epoch:train:6101-6200batch: iter_time=1.440e-04, forward_time=0.147, loss_ctc=66.947, loss_att=50.736, acc=0.710, loss=55.599, backward_time=1.026, grad_norm=127.831, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=4.874e-05, train_time=2.717 +[gpub001:0/64] 2023-07-15 17:27:59,502 (trainer:732) INFO: 54epoch:train:6201-6300batch: iter_time=1.252e-04, forward_time=0.147, loss_ctc=71.889, loss_att=54.562, acc=0.718, loss=59.761, backward_time=1.036, grad_norm=116.556, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.874e-05, train_time=2.881 +[gpub001:0/64] 2023-07-15 17:30:18,402 (trainer:732) INFO: 54epoch:train:6301-6400batch: iter_time=1.250e-04, forward_time=0.147, loss_ctc=66.848, loss_att=48.903, acc=0.725, loss=54.286, backward_time=1.032, grad_norm=118.817, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.874e-05, train_time=2.778 +[gpub001:0/64] 2023-07-15 17:32:34,639 (trainer:732) INFO: 54epoch:train:6401-6500batch: iter_time=1.384e-04, forward_time=0.146, loss_ctc=79.035, loss_att=65.024, acc=0.701, loss=69.227, backward_time=1.030, grad_norm=146.629, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.873e-05, train_time=2.724 +[gpub001:0/64] 2023-07-15 17:34:53,111 (trainer:732) INFO: 54epoch:train:6501-6600batch: iter_time=1.287e-04, forward_time=0.145, loss_ctc=73.072, loss_att=54.719, acc=0.716, loss=60.225, backward_time=1.030, grad_norm=141.338, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.873e-05, train_time=2.769 +[gpub001:0/64] 2023-07-15 17:36:40,428 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub001:0/64] 2023-07-15 17:36:58,440 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 17:37:01,910 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 17:37:01,911 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub001:0/64] 2023-07-15 17:37:01,917 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 17:41:31,451 (trainer:732) INFO: 54epoch:train:6601-6700batch: iter_time=1.482, forward_time=0.207, loss_ctc=70.480, loss_att=52.884, acc=0.716, loss=58.163, backward_time=1.044, grad_norm=119.526, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.186, optim0_lr0=4.872e-05, train_time=7.967 +[gpub001:0/64] 2023-07-15 17:43:48,493 (trainer:732) INFO: 54epoch:train:6701-6800batch: iter_time=1.090e-04, forward_time=0.148, loss_ctc=56.961, loss_att=42.537, acc=0.721, loss=46.864, backward_time=1.031, grad_norm=127.255, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.872e-05, train_time=2.741 +[gpub001:0/64] 2023-07-15 17:46:04,798 (trainer:732) INFO: 54epoch:train:6801-6900batch: iter_time=1.087e-04, forward_time=0.147, loss_ctc=66.029, loss_att=48.816, acc=0.717, loss=53.980, backward_time=1.029, grad_norm=110.226, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.871e-05, train_time=2.726 +[gpub001:0/64] 2023-07-15 17:48:20,547 (trainer:732) INFO: 54epoch:train:6901-7000batch: iter_time=1.089e-04, forward_time=0.146, loss_ctc=68.985, loss_att=52.724, acc=0.718, loss=57.602, backward_time=1.026, grad_norm=155.820, clip=100.000, loss_scale=1.558e+32, optim_step_time=0.182, optim0_lr0=4.871e-05, train_time=2.715 +[gpub001:0/64] 2023-07-15 17:50:36,889 (trainer:732) INFO: 54epoch:train:7001-7100batch: iter_time=1.080e-04, forward_time=0.147, loss_ctc=69.839, loss_att=54.243, acc=0.714, loss=58.922, backward_time=1.029, grad_norm=144.642, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.870e-05, train_time=2.727 +[gpub001:0/64] 2023-07-15 17:52:52,346 (trainer:732) INFO: 54epoch:train:7101-7200batch: iter_time=1.146e-04, forward_time=0.145, loss_ctc=67.013, loss_att=51.423, acc=0.715, loss=56.100, backward_time=1.025, grad_norm=131.762, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.870e-05, train_time=2.709 +[gpub001:0/64] 2023-07-15 17:55:09,258 (trainer:732) INFO: 54epoch:train:7201-7300batch: iter_time=1.155e-04, forward_time=0.148, loss_ctc=73.813, loss_att=55.438, acc=0.729, loss=60.950, backward_time=1.031, grad_norm=134.728, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.869e-05, train_time=2.738 +[gpub001:0/64] 2023-07-15 17:57:25,341 (trainer:732) INFO: 54epoch:train:7301-7400batch: iter_time=1.162e-04, forward_time=0.147, loss_ctc=77.874, loss_att=60.697, acc=0.708, loss=65.850, backward_time=1.030, grad_norm=138.884, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.869e-05, train_time=2.721 +[gpub001:0/64] 2023-07-15 17:59:41,257 (trainer:732) INFO: 54epoch:train:7401-7500batch: iter_time=1.076e-04, forward_time=0.147, loss_ctc=72.240, loss_att=53.009, acc=0.727, loss=58.778, backward_time=1.029, grad_norm=151.515, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.868e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 17:59:45,954 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub001:0/64] 2023-07-15 18:00:04,194 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 18:00:07,635 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 18:00:07,635 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub001:0/64] 2023-07-15 18:00:07,736 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 18:05:23,755 (trainer:732) INFO: 54epoch:train:7501-7600batch: iter_time=1.578, forward_time=0.148, loss_ctc=63.070, loss_att=46.075, acc=0.730, loss=51.173, backward_time=1.046, grad_norm=104.077, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.868e-05, train_time=6.850 +[gpub001:0/64] 2023-07-15 18:07:39,960 (trainer:732) INFO: 54epoch:train:7601-7700batch: iter_time=1.174e-04, forward_time=0.147, loss_ctc=61.589, loss_att=43.882, acc=0.723, loss=49.194, backward_time=1.027, grad_norm=131.469, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.868e-05, train_time=2.724 +[gpub001:0/64] 2023-07-15 18:09:56,626 (trainer:732) INFO: 54epoch:train:7701-7800batch: iter_time=1.127e-04, forward_time=0.149, loss_ctc=67.589, loss_att=49.630, acc=0.721, loss=55.018, backward_time=1.029, grad_norm=119.043, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.867e-05, train_time=2.733 +[gpub001:0/64] 2023-07-15 18:12:12,863 (trainer:732) INFO: 54epoch:train:7801-7900batch: iter_time=1.177e-04, forward_time=0.147, loss_ctc=68.700, loss_att=54.220, acc=0.708, loss=58.564, backward_time=1.028, grad_norm=137.783, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.867e-05, train_time=2.725 +[gpub001:0/64] 2023-07-15 18:14:41,639 (trainer:732) INFO: 54epoch:train:7901-8000batch: iter_time=5.809e-04, forward_time=0.239, loss_ctc=69.066, loss_att=52.174, acc=0.726, loss=57.241, backward_time=1.045, grad_norm=143.195, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.187, optim0_lr0=4.866e-05, train_time=2.975 +[gpub001:0/64] 2023-07-15 18:17:01,347 (trainer:732) INFO: 54epoch:train:8001-8100batch: iter_time=1.186e-04, forward_time=0.170, loss_ctc=74.493, loss_att=53.218, acc=0.731, loss=59.601, backward_time=1.032, grad_norm=137.223, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.866e-05, train_time=2.794 +[gpub001:0/64] 2023-07-15 18:19:39,609 (trainer:732) INFO: 54epoch:train:8101-8200batch: iter_time=1.190e-04, forward_time=0.154, loss_ctc=76.741, loss_att=61.285, acc=0.712, loss=65.922, backward_time=1.056, grad_norm=126.126, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.865e-05, train_time=3.165 +[gpub001:0/64] 2023-07-15 18:21:56,191 (trainer:732) INFO: 54epoch:train:8201-8300batch: iter_time=1.161e-04, forward_time=0.148, loss_ctc=74.008, loss_att=52.223, acc=0.727, loss=58.758, backward_time=1.032, grad_norm=115.839, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.865e-05, train_time=2.731 +[gpub001:0/64] 2023-07-15 18:23:02,635 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub001:0/64] 2023-07-15 18:23:20,841 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 18:23:24,297 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 18:23:24,297 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub001:0/64] 2023-07-15 18:23:24,303 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 18:29:23,956 (trainer:732) INFO: 54epoch:train:8301-8400batch: iter_time=2.345, forward_time=0.153, loss_ctc=64.437, loss_att=50.931, acc=0.730, loss=54.983, backward_time=1.080, grad_norm=113.050, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.864e-05, train_time=8.955 +[gpub001:0/64] 2023-07-15 18:31:53,086 (trainer:732) INFO: 54epoch:train:8401-8500batch: iter_time=1.081e-04, forward_time=0.147, loss_ctc=60.709, loss_att=44.071, acc=0.715, loss=49.063, backward_time=1.058, grad_norm=119.273, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.864e-05, train_time=2.982 +[gpub001:0/64] 2023-07-15 18:34:12,345 (trainer:732) INFO: 54epoch:train:8501-8600batch: iter_time=1.015e-04, forward_time=0.146, loss_ctc=63.847, loss_att=44.978, acc=0.734, loss=50.639, backward_time=1.044, grad_norm=106.758, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.863e-05, train_time=2.785 +[gpub001:0/64] 2023-07-15 18:36:33,395 (trainer:732) INFO: 54epoch:train:8601-8700batch: iter_time=1.047e-04, forward_time=0.145, loss_ctc=67.021, loss_att=51.807, acc=0.717, loss=56.371, backward_time=1.036, grad_norm=147.733, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.863e-05, train_time=2.821 +[gpub001:0/64] 2023-07-15 18:38:52,658 (trainer:732) INFO: 54epoch:train:8701-8800batch: iter_time=9.446e-05, forward_time=0.146, loss_ctc=71.996, loss_att=54.637, acc=0.721, loss=59.845, backward_time=1.031, grad_norm=134.907, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.862e-05, train_time=2.785 +[gpub001:0/64] 2023-07-15 18:41:08,460 (trainer:732) INFO: 54epoch:train:8801-8900batch: iter_time=9.579e-05, forward_time=0.147, loss_ctc=67.011, loss_att=49.373, acc=0.731, loss=54.664, backward_time=1.028, grad_norm=118.447, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.862e-05, train_time=2.716 +[gpub001:0/64] 2023-07-15 18:43:31,785 (trainer:732) INFO: 54epoch:train:8901-9000batch: iter_time=9.115e-05, forward_time=0.147, loss_ctc=80.349, loss_att=63.204, acc=0.712, loss=68.347, backward_time=1.040, grad_norm=116.323, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.862e-05, train_time=2.866 +[gpub001:0/64] 2023-07-15 18:45:47,919 (trainer:732) INFO: 54epoch:train:9001-9100batch: iter_time=1.010e-04, forward_time=0.146, loss_ctc=72.525, loss_att=52.621, acc=0.728, loss=58.592, backward_time=1.031, grad_norm=131.623, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.861e-05, train_time=2.722 +[gpub001:0/64] 2023-07-15 18:47:20,788 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub001:0/64] 2023-07-15 18:47:38,830 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 18:47:42,516 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 18:47:42,516 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub001:0/64] 2023-07-15 18:47:42,522 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 18:51:38,331 (trainer:732) INFO: 54epoch:train:9101-9200batch: iter_time=1.531, forward_time=0.173, loss_ctc=69.490, loss_att=52.765, acc=0.723, loss=57.782, backward_time=1.037, grad_norm=124.708, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.861e-05, train_time=7.008 +[gpub001:0/64] 2023-07-15 18:53:54,968 (trainer:732) INFO: 54epoch:train:9201-9300batch: iter_time=1.110e-04, forward_time=0.147, loss_ctc=56.277, loss_att=41.943, acc=0.713, loss=46.244, backward_time=1.031, grad_norm=107.239, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.860e-05, train_time=2.733 +[gpub001:0/64] 2023-07-15 18:56:11,597 (trainer:732) INFO: 54epoch:train:9301-9400batch: iter_time=1.083e-04, forward_time=0.147, loss_ctc=66.657, loss_att=51.226, acc=0.713, loss=55.856, backward_time=1.029, grad_norm=120.878, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.860e-05, train_time=2.732 +[gpub001:0/64] 2023-07-15 18:58:28,213 (trainer:732) INFO: 54epoch:train:9401-9500batch: iter_time=1.148e-04, forward_time=0.146, loss_ctc=68.048, loss_att=49.821, acc=0.723, loss=55.289, backward_time=1.032, grad_norm=148.242, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.859e-05, train_time=2.732 +[gpub001:0/64] 2023-07-15 19:00:44,029 (trainer:732) INFO: 54epoch:train:9501-9600batch: iter_time=1.119e-04, forward_time=0.146, loss_ctc=70.255, loss_att=54.181, acc=0.708, loss=59.003, backward_time=1.028, grad_norm=132.345, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.859e-05, train_time=2.716 +[gpub001:0/64] 2023-07-15 19:03:00,419 (trainer:732) INFO: 54epoch:train:9601-9700batch: iter_time=1.089e-04, forward_time=0.146, loss_ctc=66.069, loss_att=50.762, acc=0.716, loss=55.354, backward_time=1.028, grad_norm=135.698, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.858e-05, train_time=2.728 +[gpub001:0/64] 2023-07-15 19:05:16,211 (trainer:732) INFO: 54epoch:train:9701-9800batch: iter_time=1.160e-04, forward_time=0.146, loss_ctc=76.996, loss_att=58.173, acc=0.719, loss=63.820, backward_time=1.029, grad_norm=132.925, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.858e-05, train_time=2.716 +[gpub001:0/64] 2023-07-15 19:07:32,296 (trainer:732) INFO: 54epoch:train:9801-9900batch: iter_time=1.342e-04, forward_time=0.147, loss_ctc=71.876, loss_att=58.022, acc=0.710, loss=62.178, backward_time=1.030, grad_norm=155.420, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.857e-05, train_time=2.721 +[gpub001:0/64] 2023-07-15 19:09:47,953 (trainer:732) INFO: 54epoch:train:9901-10000batch: iter_time=1.147e-04, forward_time=0.146, loss_ctc=72.341, loss_att=53.585, acc=0.723, loss=59.212, backward_time=1.026, grad_norm=135.675, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.857e-05, train_time=2.713 +[gpub001:0/64] 2023-07-15 19:23:03,469 (trainer:338) INFO: 54epoch results: [train] iter_time=0.248, forward_time=0.152, loss_ctc=69.871, loss_att=52.737, acc=0.715, loss=57.878, backward_time=1.034, grad_norm=133.170, clip=100.000, loss_scale=1.474e+32, optim_step_time=0.182, optim0_lr0=4.880e-05, train_time=3.409, time=4 hours, 44 minutes and 19.81 seconds, total_count=510000, gpu_max_cached_mem_GB=37.635, [valid] loss_ctc=41.546, cer_ctc=0.245, loss_att=36.383, acc=0.679, cer=0.423, wer=1.000, loss=37.932, time=7 minutes and 4.7 seconds, total_count=52118, gpu_max_cached_mem_GB=37.635, [att_plot] time=5 minutes and 58.2 seconds, total_count=0, gpu_max_cached_mem_GB=37.635 +[gpub001:0/64] 2023-07-15 19:23:19,345 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub001:0/64] 2023-07-15 19:23:19,357 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/49epoch.pth +[gpub001:0/64] 2023-07-15 19:23:19,357 (trainer:272) INFO: 55/60epoch started. Estimated time to finish: 1 day, 5 hours and 52 minutes +[gpub001:0/64] 2023-07-15 19:23:19,377 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub001:0/64] 2023-07-15 19:23:37,050 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 19:23:40,335 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 19:23:40,335 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub001:0/64] 2023-07-15 19:23:40,341 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 19:31:02,897 (trainer:732) INFO: 55epoch:train:1-100batch: iter_time=3.212, forward_time=0.179, loss_ctc=66.161, loss_att=47.370, acc=0.712, loss=53.007, backward_time=1.042, grad_norm=114.013, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=4.856e-05, train_time=9.270 +[gpub001:0/64] 2023-07-15 19:33:19,196 (trainer:732) INFO: 55epoch:train:101-200batch: iter_time=1.151e-04, forward_time=0.146, loss_ctc=80.026, loss_att=57.239, acc=0.710, loss=64.075, backward_time=1.030, grad_norm=155.693, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.856e-05, train_time=2.726 +[gpub001:0/64] 2023-07-15 19:35:36,675 (trainer:732) INFO: 55epoch:train:201-300batch: iter_time=1.160e-04, forward_time=0.146, loss_ctc=70.409, loss_att=49.481, acc=0.714, loss=55.759, backward_time=1.028, grad_norm=126.941, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.856e-05, train_time=2.749 +[gpub001:0/64] 2023-07-15 19:37:52,657 (trainer:732) INFO: 55epoch:train:301-400batch: iter_time=1.215e-04, forward_time=0.146, loss_ctc=73.324, loss_att=56.085, acc=0.696, loss=61.256, backward_time=1.028, grad_norm=136.348, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.855e-05, train_time=2.719 +[gpub001:0/64] 2023-07-15 19:40:11,554 (trainer:732) INFO: 55epoch:train:401-500batch: iter_time=1.186e-04, forward_time=0.145, loss_ctc=68.927, loss_att=51.897, acc=0.701, loss=57.006, backward_time=1.027, grad_norm=132.079, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.855e-05, train_time=2.778 +[gpub001:0/64] 2023-07-15 19:42:27,271 (trainer:732) INFO: 55epoch:train:501-600batch: iter_time=1.254e-04, forward_time=0.145, loss_ctc=70.748, loss_att=54.161, acc=0.713, loss=59.137, backward_time=1.026, grad_norm=129.751, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.854e-05, train_time=2.714 +[gpub001:0/64] 2023-07-15 19:44:43,089 (trainer:732) INFO: 55epoch:train:601-700batch: iter_time=1.243e-04, forward_time=0.146, loss_ctc=70.260, loss_att=56.700, acc=0.709, loss=60.768, backward_time=1.027, grad_norm=134.754, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.854e-05, train_time=2.716 +[gpub001:0/64] 2023-07-15 19:47:02,647 (trainer:732) INFO: 55epoch:train:701-800batch: iter_time=1.169e-04, forward_time=0.159, loss_ctc=61.315, loss_att=47.512, acc=0.708, loss=51.653, backward_time=1.032, grad_norm=119.787, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.853e-05, train_time=2.791 +[gpub001:0/64] 2023-07-15 19:47:56,741 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub001:0/64] 2023-07-15 19:48:14,565 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 19:48:17,927 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 19:48:17,927 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub001:0/64] 2023-07-15 19:48:17,934 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 19:52:24,523 (trainer:732) INFO: 55epoch:train:801-900batch: iter_time=1.522, forward_time=0.203, loss_ctc=69.155, loss_att=52.160, acc=0.709, loss=57.258, backward_time=1.045, grad_norm=146.382, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.853e-05, train_time=6.437 +[gpub001:0/64] 2023-07-15 19:54:41,572 (trainer:732) INFO: 55epoch:train:901-1000batch: iter_time=1.320e-04, forward_time=0.146, loss_ctc=70.035, loss_att=56.263, acc=0.706, loss=60.395, backward_time=1.032, grad_norm=129.876, clip=100.000, loss_scale=3.115e+32, optim_step_time=0.182, optim0_lr0=4.852e-05, train_time=2.741 +[gpub001:0/64] 2023-07-15 19:56:57,064 (trainer:732) INFO: 55epoch:train:1001-1100batch: iter_time=1.091e-04, forward_time=0.144, loss_ctc=78.100, loss_att=53.099, acc=0.718, loss=60.599, backward_time=1.025, grad_norm=139.253, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.852e-05, train_time=2.710 +[gpub001:0/64] 2023-07-15 19:59:12,943 (trainer:732) INFO: 55epoch:train:1101-1200batch: iter_time=1.165e-04, forward_time=0.144, loss_ctc=74.777, loss_att=55.606, acc=0.702, loss=61.357, backward_time=1.027, grad_norm=126.416, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.851e-05, train_time=2.717 +[gpub001:0/64] 2023-07-15 20:01:28,825 (trainer:732) INFO: 55epoch:train:1201-1300batch: iter_time=1.217e-04, forward_time=0.145, loss_ctc=69.023, loss_att=53.038, acc=0.705, loss=57.833, backward_time=1.029, grad_norm=128.178, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.851e-05, train_time=2.717 +[gpub001:0/64] 2023-07-15 20:03:45,287 (trainer:732) INFO: 55epoch:train:1301-1400batch: iter_time=1.178e-04, forward_time=0.147, loss_ctc=65.828, loss_att=47.995, acc=0.715, loss=53.345, backward_time=1.029, grad_norm=139.823, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.851e-05, train_time=2.729 +[gpub001:0/64] 2023-07-15 20:04:15,026 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub001:0/64] 2023-07-15 20:06:00,814 (trainer:732) INFO: 55epoch:train:1401-1500batch: iter_time=8.046e-04, forward_time=0.145, loss_ctc=70.081, loss_att=54.160, acc=0.716, loss=58.936, backward_time=1.028, grad_norm=161.967, clip=100.000, loss_scale=1.954e+32, optim_step_time=0.182, optim0_lr0=4.850e-05, train_time=2.710 +[gpub001:0/64] 2023-07-15 20:08:16,706 (trainer:732) INFO: 55epoch:train:1501-1600batch: iter_time=1.300e-04, forward_time=0.148, loss_ctc=65.333, loss_att=50.645, acc=0.717, loss=55.052, backward_time=1.028, grad_norm=125.542, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.850e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 20:09:58,752 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub001:0/64] 2023-07-15 20:10:16,629 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 20:10:20,102 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 20:10:20,102 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub001:0/64] 2023-07-15 20:10:20,108 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 20:15:05,333 (trainer:732) INFO: 55epoch:train:1601-1700batch: iter_time=2.640, forward_time=0.161, loss_ctc=69.772, loss_att=56.145, acc=0.702, loss=60.233, backward_time=1.045, grad_norm=123.498, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.849e-05, train_time=8.172 +[gpub001:0/64] 2023-07-15 20:17:22,257 (trainer:732) INFO: 55epoch:train:1701-1800batch: iter_time=1.131e-04, forward_time=0.146, loss_ctc=71.715, loss_att=51.174, acc=0.722, loss=57.336, backward_time=1.032, grad_norm=144.148, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.849e-05, train_time=2.738 +[gpub001:0/64] 2023-07-15 20:19:38,257 (trainer:732) INFO: 55epoch:train:1801-1900batch: iter_time=1.163e-04, forward_time=0.146, loss_ctc=71.559, loss_att=54.084, acc=0.725, loss=59.327, backward_time=1.029, grad_norm=122.104, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.848e-05, train_time=2.720 +[gpub001:0/64] 2023-07-15 20:21:54,506 (trainer:732) INFO: 55epoch:train:1901-2000batch: iter_time=1.228e-04, forward_time=0.146, loss_ctc=72.459, loss_att=49.201, acc=0.735, loss=56.178, backward_time=1.029, grad_norm=122.229, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.848e-05, train_time=2.725 +[gpub001:0/64] 2023-07-15 20:24:10,321 (trainer:732) INFO: 55epoch:train:2001-2100batch: iter_time=1.121e-04, forward_time=0.146, loss_ctc=72.859, loss_att=55.435, acc=0.707, loss=60.662, backward_time=1.027, grad_norm=135.154, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.847e-05, train_time=2.716 +[gpub001:0/64] 2023-07-15 20:26:25,933 (trainer:732) INFO: 55epoch:train:2101-2200batch: iter_time=1.085e-04, forward_time=0.145, loss_ctc=69.278, loss_att=52.189, acc=0.717, loss=57.316, backward_time=1.026, grad_norm=183.531, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.847e-05, train_time=2.712 +[gpub001:0/64] 2023-07-15 20:28:41,637 (trainer:732) INFO: 55epoch:train:2201-2300batch: iter_time=1.061e-04, forward_time=0.146, loss_ctc=67.008, loss_att=48.170, acc=0.728, loss=53.821, backward_time=1.027, grad_norm=136.904, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.846e-05, train_time=2.714 +[gpub001:0/64] 2023-07-15 20:30:57,552 (trainer:732) INFO: 55epoch:train:2301-2400batch: iter_time=1.115e-04, forward_time=0.145, loss_ctc=69.173, loss_att=54.867, acc=0.725, loss=59.159, backward_time=1.028, grad_norm=129.937, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.846e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 20:33:13,592 (trainer:732) INFO: 55epoch:train:2401-2500batch: iter_time=1.065e-04, forward_time=0.146, loss_ctc=64.746, loss_att=49.739, acc=0.715, loss=54.241, backward_time=1.028, grad_norm=119.142, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.846e-05, train_time=2.721 +[gpub001:0/64] 2023-07-15 20:33:16,890 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub001:0/64] 2023-07-15 20:33:34,658 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 20:33:38,071 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 20:33:38,071 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub001:0/64] 2023-07-15 20:33:38,078 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 20:39:15,364 (trainer:732) INFO: 55epoch:train:2501-2600batch: iter_time=1.324, forward_time=0.155, loss_ctc=75.159, loss_att=52.705, acc=0.723, loss=59.441, backward_time=1.050, grad_norm=172.779, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.845e-05, train_time=7.235 +[gpub001:0/64] 2023-07-15 20:41:32,183 (trainer:732) INFO: 55epoch:train:2601-2700batch: iter_time=1.182e-04, forward_time=0.147, loss_ctc=69.915, loss_att=53.588, acc=0.721, loss=58.486, backward_time=1.031, grad_norm=134.920, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.845e-05, train_time=2.736 +[gpub001:0/64] 2023-07-15 20:43:48,482 (trainer:732) INFO: 55epoch:train:2701-2800batch: iter_time=1.217e-04, forward_time=0.147, loss_ctc=76.857, loss_att=50.857, acc=0.733, loss=58.657, backward_time=1.031, grad_norm=125.091, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.844e-05, train_time=2.726 +[gpub001:0/64] 2023-07-15 20:46:04,499 (trainer:732) INFO: 55epoch:train:2801-2900batch: iter_time=1.144e-04, forward_time=0.146, loss_ctc=70.084, loss_att=55.592, acc=0.713, loss=59.939, backward_time=1.030, grad_norm=156.149, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.844e-05, train_time=2.720 +[gpub001:0/64] 2023-07-15 20:48:20,501 (trainer:732) INFO: 55epoch:train:2901-3000batch: iter_time=1.091e-04, forward_time=0.146, loss_ctc=65.972, loss_att=49.663, acc=0.717, loss=54.556, backward_time=1.030, grad_norm=162.432, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.843e-05, train_time=2.720 +[gpub001:0/64] 2023-07-15 20:50:45,633 (trainer:732) INFO: 55epoch:train:3001-3100batch: iter_time=1.104e-04, forward_time=0.194, loss_ctc=66.366, loss_att=46.152, acc=0.727, loss=52.216, backward_time=1.050, grad_norm=136.185, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.189, optim0_lr0=4.843e-05, train_time=2.901 +[gpub001:0/64] 2023-07-15 20:53:05,403 (trainer:732) INFO: 55epoch:train:3101-3200batch: iter_time=1.162e-04, forward_time=0.173, loss_ctc=72.329, loss_att=57.968, acc=0.722, loss=62.276, backward_time=1.030, grad_norm=129.959, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.842e-05, train_time=2.796 +[gpub001:0/64] 2023-07-15 20:55:21,845 (trainer:732) INFO: 55epoch:train:3201-3300batch: iter_time=1.217e-04, forward_time=0.146, loss_ctc=60.730, loss_att=48.110, acc=0.725, loss=51.896, backward_time=1.030, grad_norm=110.140, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.842e-05, train_time=2.729 +[gpub001:0/64] 2023-07-15 20:56:24,324 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub001:0/64] 2023-07-15 20:56:42,220 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 20:56:45,641 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 20:56:45,641 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub001:0/64] 2023-07-15 20:56:45,671 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 21:01:16,198 (trainer:732) INFO: 55epoch:train:3301-3400batch: iter_time=2.037, forward_time=0.159, loss_ctc=65.648, loss_att=46.081, acc=0.723, loss=51.951, backward_time=1.054, grad_norm=141.567, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.841e-05, train_time=7.087 +[gpub001:0/64] 2023-07-15 21:03:32,649 (trainer:732) INFO: 55epoch:train:3401-3500batch: iter_time=1.188e-04, forward_time=0.146, loss_ctc=68.666, loss_att=55.199, acc=0.722, loss=59.239, backward_time=1.030, grad_norm=148.789, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.841e-05, train_time=2.729 +[gpub001:0/64] 2023-07-15 21:05:48,957 (trainer:732) INFO: 55epoch:train:3501-3600batch: iter_time=1.139e-04, forward_time=0.147, loss_ctc=75.185, loss_att=50.223, acc=0.733, loss=57.711, backward_time=1.031, grad_norm=142.225, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.841e-05, train_time=2.726 +[gpub001:0/64] 2023-07-15 21:08:05,090 (trainer:732) INFO: 55epoch:train:3601-3700batch: iter_time=1.175e-04, forward_time=0.147, loss_ctc=72.633, loss_att=54.354, acc=0.712, loss=59.838, backward_time=1.029, grad_norm=156.626, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.840e-05, train_time=2.722 +[gpub001:0/64] 2023-07-15 21:10:21,444 (trainer:732) INFO: 55epoch:train:3701-3800batch: iter_time=1.204e-04, forward_time=0.147, loss_ctc=69.561, loss_att=51.973, acc=0.725, loss=57.249, backward_time=1.030, grad_norm=129.593, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.840e-05, train_time=2.727 +[gpub001:0/64] 2023-07-15 21:12:37,572 (trainer:732) INFO: 55epoch:train:3801-3900batch: iter_time=1.255e-04, forward_time=0.146, loss_ctc=64.566, loss_att=47.497, acc=0.722, loss=52.618, backward_time=1.027, grad_norm=133.138, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.839e-05, train_time=2.722 +[gpub001:0/64] 2023-07-15 21:14:53,769 (trainer:732) INFO: 55epoch:train:3901-4000batch: iter_time=1.269e-04, forward_time=0.146, loss_ctc=67.393, loss_att=53.835, acc=0.726, loss=57.902, backward_time=1.029, grad_norm=140.927, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.839e-05, train_time=2.724 +[gpub001:0/64] 2023-07-15 21:17:09,543 (trainer:732) INFO: 55epoch:train:4001-4100batch: iter_time=1.338e-04, forward_time=0.146, loss_ctc=65.438, loss_att=51.648, acc=0.716, loss=55.785, backward_time=1.027, grad_norm=112.494, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.838e-05, train_time=2.715 +[gpub001:0/64] 2023-07-15 21:18:46,330 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub001:0/64] 2023-07-15 21:19:04,591 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 21:19:08,060 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 21:19:08,060 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub001:0/64] 2023-07-15 21:19:08,066 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 21:23:46,402 (trainer:732) INFO: 55epoch:train:4101-4200batch: iter_time=1.571, forward_time=0.171, loss_ctc=69.400, loss_att=56.671, acc=0.707, loss=60.489, backward_time=1.040, grad_norm=139.666, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=4.838e-05, train_time=7.937 +[gpub001:0/64] 2023-07-15 21:26:03,353 (trainer:732) INFO: 55epoch:train:4201-4300batch: iter_time=1.202e-04, forward_time=0.146, loss_ctc=71.370, loss_att=51.067, acc=0.719, loss=57.158, backward_time=1.033, grad_norm=124.956, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.837e-05, train_time=2.739 +[gpub001:0/64] 2023-07-15 21:28:19,368 (trainer:732) INFO: 55epoch:train:4301-4400batch: iter_time=1.251e-04, forward_time=0.147, loss_ctc=70.788, loss_att=55.523, acc=0.716, loss=60.102, backward_time=1.030, grad_norm=145.654, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.837e-05, train_time=2.720 +[gpub001:0/64] 2023-07-15 21:30:35,376 (trainer:732) INFO: 55epoch:train:4401-4500batch: iter_time=1.251e-04, forward_time=0.146, loss_ctc=73.170, loss_att=49.309, acc=0.727, loss=56.467, backward_time=1.030, grad_norm=177.689, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.836e-05, train_time=2.720 +[gpub001:0/64] 2023-07-15 21:32:57,674 (trainer:732) INFO: 55epoch:train:4501-4600batch: iter_time=1.231e-04, forward_time=0.170, loss_ctc=71.775, loss_att=55.086, acc=0.701, loss=60.093, backward_time=1.065, grad_norm=138.671, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.836e-05, train_time=2.845 +[gpub001:0/64] 2023-07-15 21:35:18,977 (trainer:732) INFO: 55epoch:train:4601-4700batch: iter_time=1.292e-04, forward_time=0.163, loss_ctc=68.115, loss_att=52.067, acc=0.705, loss=56.882, backward_time=1.030, grad_norm=146.040, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.836e-05, train_time=2.826 +[gpub001:0/64] 2023-07-15 21:37:37,938 (trainer:732) INFO: 55epoch:train:4701-4800batch: iter_time=1.370e-04, forward_time=0.146, loss_ctc=67.563, loss_att=48.505, acc=0.725, loss=54.222, backward_time=1.041, grad_norm=114.887, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.835e-05, train_time=2.779 +[gpub001:0/64] 2023-07-15 21:39:53,753 (trainer:732) INFO: 55epoch:train:4801-4900batch: iter_time=1.201e-04, forward_time=0.146, loss_ctc=69.293, loss_att=55.110, acc=0.720, loss=59.365, backward_time=1.028, grad_norm=141.733, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.835e-05, train_time=2.715 +[gpub001:0/64] 2023-07-15 21:42:09,547 (trainer:732) INFO: 55epoch:train:4901-5000batch: iter_time=1.178e-04, forward_time=0.146, loss_ctc=62.874, loss_att=48.554, acc=0.716, loss=52.850, backward_time=1.029, grad_norm=127.693, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.834e-05, train_time=2.717 +[gpub001:0/64] 2023-07-15 21:42:13,217 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub001:0/64] 2023-07-15 21:42:31,219 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 21:42:34,649 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 21:42:34,649 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub001:0/64] 2023-07-15 21:42:34,655 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 21:48:26,273 (trainer:732) INFO: 55epoch:train:5001-5100batch: iter_time=1.355, forward_time=0.182, loss_ctc=63.212, loss_att=44.533, acc=0.732, loss=50.136, backward_time=1.043, grad_norm=148.605, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.834e-05, train_time=7.534 +[gpub001:0/64] 2023-07-15 21:50:42,538 (trainer:732) INFO: 55epoch:train:5101-5200batch: iter_time=9.720e-05, forward_time=0.144, loss_ctc=75.917, loss_att=55.962, acc=0.723, loss=61.949, backward_time=1.030, grad_norm=125.059, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.833e-05, train_time=2.725 +[gpub001:0/64] 2023-07-15 21:52:58,854 (trainer:732) INFO: 55epoch:train:5201-5300batch: iter_time=8.952e-05, forward_time=0.144, loss_ctc=68.540, loss_att=46.952, acc=0.732, loss=53.429, backward_time=1.030, grad_norm=136.243, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.833e-05, train_time=2.726 +[gpub001:0/64] 2023-07-15 21:55:19,419 (trainer:732) INFO: 55epoch:train:5301-5400batch: iter_time=8.648e-05, forward_time=0.144, loss_ctc=72.817, loss_att=54.724, acc=0.714, loss=60.152, backward_time=1.034, grad_norm=142.694, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.832e-05, train_time=2.811 +[gpub001:0/64] 2023-07-15 21:57:36,018 (trainer:732) INFO: 55epoch:train:5401-5500batch: iter_time=9.088e-05, forward_time=0.144, loss_ctc=68.000, loss_att=50.279, acc=0.722, loss=55.595, backward_time=1.030, grad_norm=120.000, clip=100.000, loss_scale=2.888e+32, optim_step_time=0.182, optim0_lr0=4.832e-05, train_time=2.732 +[gpub001:0/64] 2023-07-15 21:59:52,599 (trainer:732) INFO: 55epoch:train:5501-5600batch: iter_time=9.922e-05, forward_time=0.145, loss_ctc=70.562, loss_att=53.030, acc=0.727, loss=58.290, backward_time=1.031, grad_norm=151.714, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.831e-05, train_time=2.731 +[gpub001:0/64] 2023-07-15 22:02:08,924 (trainer:732) INFO: 55epoch:train:5601-5700batch: iter_time=1.006e-04, forward_time=0.144, loss_ctc=67.520, loss_att=56.170, acc=0.720, loss=59.575, backward_time=1.030, grad_norm=179.978, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.831e-05, train_time=2.726 +[gpub001:0/64] 2023-07-15 22:04:27,557 (trainer:732) INFO: 55epoch:train:5701-5800batch: iter_time=9.311e-05, forward_time=0.144, loss_ctc=60.189, loss_att=46.927, acc=0.714, loss=50.906, backward_time=1.031, grad_norm=118.818, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.831e-05, train_time=2.772 +[gpub001:0/64] 2023-07-15 22:05:31,121 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub001:0/64] 2023-07-15 22:05:48,967 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 22:05:52,430 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 22:05:52,430 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub001:0/64] 2023-07-15 22:05:52,437 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 22:12:10,087 (trainer:732) INFO: 55epoch:train:5801-5900batch: iter_time=3.061, forward_time=0.185, loss_ctc=75.455, loss_att=58.296, acc=0.718, loss=63.444, backward_time=1.105, grad_norm=123.117, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=4.830e-05, train_time=9.250 +[gpub001:0/64] 2023-07-15 22:14:27,175 (trainer:732) INFO: 55epoch:train:5901-6000batch: iter_time=1.074e-04, forward_time=0.145, loss_ctc=68.118, loss_att=46.658, acc=0.734, loss=53.096, backward_time=1.031, grad_norm=177.767, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.830e-05, train_time=2.742 +[gpub001:0/64] 2023-07-15 22:16:43,727 (trainer:732) INFO: 55epoch:train:6001-6100batch: iter_time=1.079e-04, forward_time=0.146, loss_ctc=79.566, loss_att=57.355, acc=0.725, loss=64.019, backward_time=1.031, grad_norm=166.617, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.829e-05, train_time=2.731 +[gpub001:0/64] 2023-07-15 22:18:59,637 (trainer:732) INFO: 55epoch:train:6101-6200batch: iter_time=1.009e-04, forward_time=0.144, loss_ctc=69.002, loss_att=52.731, acc=0.722, loss=57.612, backward_time=1.029, grad_norm=119.107, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.829e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 22:21:16,210 (trainer:732) INFO: 55epoch:train:6201-6300batch: iter_time=1.116e-04, forward_time=0.145, loss_ctc=68.288, loss_att=50.084, acc=0.721, loss=55.545, backward_time=1.030, grad_norm=139.701, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.828e-05, train_time=2.731 +[gpub001:0/64] 2023-07-15 22:23:44,539 (trainer:732) INFO: 55epoch:train:6301-6400batch: iter_time=1.056e-04, forward_time=0.144, loss_ctc=67.344, loss_att=48.425, acc=0.728, loss=54.101, backward_time=1.045, grad_norm=133.354, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.828e-05, train_time=2.966 +[gpub001:0/64] 2023-07-15 22:26:07,503 (trainer:732) INFO: 55epoch:train:6401-6500batch: iter_time=1.176e-04, forward_time=0.152, loss_ctc=72.129, loss_att=56.674, acc=0.724, loss=61.311, backward_time=1.051, grad_norm=128.706, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.827e-05, train_time=2.859 +[gpub001:0/64] 2023-07-15 22:28:29,981 (trainer:732) INFO: 55epoch:train:6501-6600batch: iter_time=1.072e-04, forward_time=0.146, loss_ctc=62.301, loss_att=49.950, acc=0.726, loss=53.655, backward_time=1.040, grad_norm=125.862, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.827e-05, train_time=2.849 +[gpub001:0/64] 2023-07-15 22:30:06,709 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub001:0/64] 2023-07-15 22:30:25,064 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 22:30:28,526 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 22:30:28,526 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub001:0/64] 2023-07-15 22:30:28,532 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 22:36:04,422 (trainer:732) INFO: 55epoch:train:6601-6700batch: iter_time=1.567, forward_time=0.148, loss_ctc=66.536, loss_att=49.542, acc=0.714, loss=54.640, backward_time=1.031, grad_norm=147.774, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.827e-05, train_time=9.089 +[gpub001:0/64] 2023-07-15 22:38:24,208 (trainer:732) INFO: 55epoch:train:6701-6800batch: iter_time=1.030e-04, forward_time=0.167, loss_ctc=70.272, loss_att=49.751, acc=0.724, loss=55.907, backward_time=1.037, grad_norm=112.465, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.826e-05, train_time=2.796 +[gpub001:0/64] 2023-07-15 22:40:41,823 (trainer:732) INFO: 55epoch:train:6801-6900batch: iter_time=9.641e-05, forward_time=0.146, loss_ctc=70.101, loss_att=53.067, acc=0.720, loss=58.177, backward_time=1.032, grad_norm=122.968, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.826e-05, train_time=2.752 +[gpub001:0/64] 2023-07-15 22:42:59,328 (trainer:732) INFO: 55epoch:train:6901-7000batch: iter_time=9.366e-05, forward_time=0.145, loss_ctc=72.478, loss_att=49.337, acc=0.730, loss=56.279, backward_time=1.029, grad_norm=129.336, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.825e-05, train_time=2.750 +[gpub001:0/64] 2023-07-15 22:45:16,265 (trainer:732) INFO: 55epoch:train:7001-7100batch: iter_time=1.199e-04, forward_time=0.145, loss_ctc=72.021, loss_att=55.230, acc=0.700, loss=60.268, backward_time=1.029, grad_norm=126.971, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.825e-05, train_time=2.739 +[gpub001:0/64] 2023-07-15 22:47:32,473 (trainer:732) INFO: 55epoch:train:7101-7200batch: iter_time=1.102e-04, forward_time=0.145, loss_ctc=67.237, loss_att=51.993, acc=0.709, loss=56.566, backward_time=1.029, grad_norm=119.930, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.824e-05, train_time=2.724 +[gpub001:0/64] 2023-07-15 22:49:48,357 (trainer:732) INFO: 55epoch:train:7201-7300batch: iter_time=1.017e-04, forward_time=0.145, loss_ctc=67.345, loss_att=47.923, acc=0.726, loss=53.750, backward_time=1.029, grad_norm=118.761, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.824e-05, train_time=2.717 +[gpub001:0/64] 2023-07-15 22:52:04,142 (trainer:732) INFO: 55epoch:train:7301-7400batch: iter_time=1.125e-04, forward_time=0.144, loss_ctc=68.462, loss_att=53.569, acc=0.725, loss=58.037, backward_time=1.028, grad_norm=138.314, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.823e-05, train_time=2.715 +[gpub001:0/64] 2023-07-15 22:54:24,008 (trainer:732) INFO: 55epoch:train:7401-7500batch: iter_time=1.110e-04, forward_time=0.145, loss_ctc=63.726, loss_att=49.352, acc=0.717, loss=53.664, backward_time=1.035, grad_norm=118.056, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.823e-05, train_time=2.797 +[gpub001:0/64] 2023-07-15 22:54:29,095 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub001:0/64] 2023-07-15 22:54:47,130 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 22:54:50,600 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 22:54:50,600 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub001:0/64] 2023-07-15 22:54:50,606 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 22:59:53,940 (trainer:732) INFO: 55epoch:train:7501-7600batch: iter_time=1.342, forward_time=0.179, loss_ctc=75.400, loss_att=51.040, acc=0.730, loss=58.348, backward_time=1.045, grad_norm=136.571, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.822e-05, train_time=6.598 +[gpub001:0/64] 2023-07-15 23:02:10,577 (trainer:732) INFO: 55epoch:train:7601-7700batch: iter_time=1.152e-04, forward_time=0.146, loss_ctc=69.202, loss_att=52.650, acc=0.727, loss=57.616, backward_time=1.028, grad_norm=137.867, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.822e-05, train_time=2.733 +[gpub001:0/64] 2023-07-15 23:04:27,339 (trainer:732) INFO: 55epoch:train:7701-7800batch: iter_time=1.118e-04, forward_time=0.147, loss_ctc=75.729, loss_att=50.295, acc=0.738, loss=57.925, backward_time=1.033, grad_norm=129.910, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.822e-05, train_time=2.735 +[gpub001:0/64] 2023-07-15 23:06:43,267 (trainer:732) INFO: 55epoch:train:7801-7900batch: iter_time=1.329e-04, forward_time=0.147, loss_ctc=68.754, loss_att=54.027, acc=0.720, loss=58.445, backward_time=1.030, grad_norm=130.834, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.821e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 23:09:14,681 (trainer:732) INFO: 55epoch:train:7901-8000batch: iter_time=1.080e-04, forward_time=0.146, loss_ctc=65.383, loss_att=49.653, acc=0.720, loss=54.372, backward_time=1.082, grad_norm=143.167, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.821e-05, train_time=3.028 +[gpub001:0/64] 2023-07-15 23:11:49,953 (trainer:732) INFO: 55epoch:train:8001-8100batch: iter_time=1.179e-04, forward_time=0.148, loss_ctc=65.409, loss_att=44.547, acc=0.734, loss=50.806, backward_time=1.053, grad_norm=133.133, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.820e-05, train_time=3.105 +[gpub001:0/64] 2023-07-15 23:14:06,705 (trainer:732) INFO: 55epoch:train:8101-8200batch: iter_time=1.099e-04, forward_time=0.148, loss_ctc=71.658, loss_att=56.942, acc=0.730, loss=61.357, backward_time=1.035, grad_norm=138.515, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.820e-05, train_time=2.735 +[gpub001:0/64] 2023-07-15 23:16:22,766 (trainer:732) INFO: 55epoch:train:8201-8300batch: iter_time=1.156e-04, forward_time=0.146, loss_ctc=60.074, loss_att=47.491, acc=0.727, loss=51.266, backward_time=1.030, grad_norm=119.828, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.819e-05, train_time=2.721 +[gpub001:0/64] 2023-07-15 23:17:21,807 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub001:0/64] 2023-07-15 23:17:40,310 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 23:17:44,092 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 23:17:44,092 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub001:0/64] 2023-07-15 23:17:44,098 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 23:22:06,581 (trainer:732) INFO: 55epoch:train:8301-8400batch: iter_time=1.931, forward_time=0.162, loss_ctc=70.513, loss_att=50.076, acc=0.728, loss=56.207, backward_time=1.053, grad_norm=120.350, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.819e-05, train_time=6.876 +[gpub001:0/64] 2023-07-15 23:24:24,068 (trainer:732) INFO: 55epoch:train:8401-8500batch: iter_time=1.156e-04, forward_time=0.146, loss_ctc=66.708, loss_att=47.369, acc=0.724, loss=53.171, backward_time=1.031, grad_norm=116.972, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.818e-05, train_time=2.750 +[gpub001:0/64] 2023-07-15 23:26:42,851 (trainer:732) INFO: 55epoch:train:8501-8600batch: iter_time=1.086e-04, forward_time=0.167, loss_ctc=78.244, loss_att=58.054, acc=0.714, loss=64.111, backward_time=1.031, grad_norm=173.126, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.818e-05, train_time=2.775 +[gpub001:0/64] 2023-07-15 23:29:02,728 (trainer:732) INFO: 55epoch:train:8601-8700batch: iter_time=1.328e-04, forward_time=0.155, loss_ctc=69.989, loss_att=54.707, acc=0.715, loss=59.291, backward_time=1.031, grad_norm=149.360, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.818e-05, train_time=2.797 +[gpub001:0/64] 2023-07-15 23:31:26,010 (trainer:732) INFO: 55epoch:train:8701-8800batch: iter_time=1.131e-04, forward_time=0.196, loss_ctc=67.734, loss_att=49.675, acc=0.708, loss=55.093, backward_time=1.032, grad_norm=151.352, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.199, optim0_lr0=4.817e-05, train_time=2.865 +[gpub001:0/64] 2023-07-15 23:33:45,012 (trainer:732) INFO: 55epoch:train:8801-8900batch: iter_time=1.067e-04, forward_time=0.145, loss_ctc=66.717, loss_att=48.204, acc=0.719, loss=53.758, backward_time=1.032, grad_norm=131.081, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.817e-05, train_time=2.780 +[gpub001:0/64] 2023-07-15 23:36:00,960 (trainer:732) INFO: 55epoch:train:8901-9000batch: iter_time=1.112e-04, forward_time=0.146, loss_ctc=70.848, loss_att=55.620, acc=0.725, loss=60.188, backward_time=1.029, grad_norm=143.176, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.816e-05, train_time=2.719 +[gpub001:0/64] 2023-07-15 23:38:18,241 (trainer:732) INFO: 55epoch:train:9001-9100batch: iter_time=1.011e-04, forward_time=0.150, loss_ctc=62.371, loss_att=49.098, acc=0.726, loss=53.080, backward_time=1.032, grad_norm=147.200, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.816e-05, train_time=2.745 +[gpub001:0/64] 2023-07-15 23:40:07,256 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub001:0/64] 2023-07-15 23:40:25,764 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 23:40:29,558 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 23:40:29,558 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub001:0/64] 2023-07-15 23:40:29,564 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 23:45:21,086 (trainer:732) INFO: 55epoch:train:9101-9200batch: iter_time=2.793, forward_time=0.191, loss_ctc=66.256, loss_att=48.196, acc=0.723, loss=53.614, backward_time=1.044, grad_norm=112.598, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.815e-05, train_time=8.457 +[gpub001:0/64] 2023-07-15 23:47:43,332 (trainer:732) INFO: 55epoch:train:9201-9300batch: iter_time=9.859e-05, forward_time=0.145, loss_ctc=70.085, loss_att=49.743, acc=0.725, loss=55.846, backward_time=1.042, grad_norm=152.031, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.815e-05, train_time=2.845 +[gpub001:0/64] 2023-07-15 23:50:02,620 (trainer:732) INFO: 55epoch:train:9301-9400batch: iter_time=9.019e-05, forward_time=0.145, loss_ctc=69.933, loss_att=52.729, acc=0.721, loss=57.890, backward_time=1.040, grad_norm=139.705, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.814e-05, train_time=2.786 +[gpub001:0/64] 2023-07-15 23:52:24,700 (trainer:732) INFO: 55epoch:train:9401-9500batch: iter_time=1.298e-04, forward_time=0.146, loss_ctc=72.083, loss_att=48.816, acc=0.733, loss=55.796, backward_time=1.032, grad_norm=144.424, clip=100.000, loss_scale=5.776e+32, optim_step_time=0.181, optim0_lr0=4.814e-05, train_time=2.841 +[gpub001:0/64] 2023-07-15 23:54:49,710 (trainer:732) INFO: 55epoch:train:9501-9600batch: iter_time=1.283e-04, forward_time=0.196, loss_ctc=70.036, loss_att=53.272, acc=0.706, loss=58.301, backward_time=1.036, grad_norm=201.540, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.186, optim0_lr0=4.814e-05, train_time=2.899 +[gpub001:0/64] 2023-07-15 23:57:14,693 (trainer:732) INFO: 55epoch:train:9601-9700batch: iter_time=1.226e-04, forward_time=0.148, loss_ctc=67.979, loss_att=52.018, acc=0.708, loss=56.807, backward_time=1.042, grad_norm=195.141, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=4.813e-05, train_time=2.900 +[gpub001:0/64] 2023-07-15 23:59:36,152 (trainer:732) INFO: 55epoch:train:9701-9800batch: iter_time=1.320e-04, forward_time=0.146, loss_ctc=68.810, loss_att=49.128, acc=0.729, loss=55.032, backward_time=1.052, grad_norm=143.908, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=4.813e-05, train_time=2.829 +[gpub001:0/64] 2023-07-16 00:02:00,185 (trainer:732) INFO: 55epoch:train:9801-9900batch: iter_time=1.242e-04, forward_time=0.147, loss_ctc=66.852, loss_att=52.866, acc=0.723, loss=57.062, backward_time=1.040, grad_norm=130.349, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=4.812e-05, train_time=2.881 +[gpub001:0/64] 2023-07-16 00:04:16,085 (trainer:732) INFO: 55epoch:train:9901-10000batch: iter_time=1.235e-04, forward_time=0.147, loss_ctc=62.767, loss_att=47.965, acc=0.721, loss=52.406, backward_time=1.029, grad_norm=113.224, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=4.812e-05, train_time=2.718 +[gpub001:0/64] 2023-07-16 00:16:37,923 (trainer:338) INFO: 55epoch results: [train] iter_time=0.244, forward_time=0.152, loss_ctc=69.296, loss_att=51.744, acc=0.719, loss=57.010, backward_time=1.035, grad_norm=137.602, clip=100.000, loss_scale=2.636e+32, optim_step_time=0.182, optim0_lr0=4.834e-05, train_time=3.371, time=4 hours, 41 minutes and 10.78 seconds, total_count=520000, gpu_max_cached_mem_GB=37.635, [valid] loss_ctc=42.158, cer_ctc=0.245, loss_att=35.877, acc=0.700, cer=0.361, wer=0.989, loss=37.761, time=6 minutes and 6.77 seconds, total_count=53130, gpu_max_cached_mem_GB=37.635, [att_plot] time=6 minutes and 0.96 seconds, total_count=0, gpu_max_cached_mem_GB=37.635 +[gpub001:0/64] 2023-07-16 00:16:57,201 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub001:0/64] 2023-07-16 00:16:57,394 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/38epoch.pth, exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/50epoch.pth +[gpub001:0/64] 2023-07-16 00:16:57,395 (trainer:272) INFO: 56/60epoch started. Estimated time to finish: 1 day, 49 minutes and 46.32 seconds +[gpub001:0/64] 2023-07-16 00:16:59,197 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub001:0/64] 2023-07-16 00:17:17,371 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-16 00:17:22,642 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-16 00:17:22,642 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub001:0/64] 2023-07-16 00:17:22,740 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-16 00:25:03,887 (trainer:732) INFO: 56epoch:train:1-100batch: iter_time=3.403, forward_time=0.191, loss_ctc=67.004, loss_att=52.989, acc=0.697, loss=57.193, backward_time=1.047, grad_norm=134.202, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=4.811e-05, train_time=9.711 +[gpub001:0/64] 2023-07-16 00:26:04,353 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub001:0/64] 2023-07-16 00:27:28,104 (trainer:732) INFO: 56epoch:train:101-200batch: iter_time=9.493e-05, forward_time=0.145, loss_ctc=68.220, loss_att=50.035, acc=0.712, loss=55.491, backward_time=1.051, grad_norm=141.648, clip=100.000, loss_scale=4.570e+32, optim_step_time=0.182, optim0_lr0=4.811e-05, train_time=2.884 +[gpub001:0/64] 2023-07-16 00:29:45,320 (trainer:732) INFO: 56epoch:train:201-300batch: iter_time=1.056e-04, forward_time=0.143, loss_ctc=82.452, loss_att=59.724, acc=0.705, loss=66.542, backward_time=1.029, grad_norm=146.750, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.810e-05, train_time=2.744 +[gpub001:0/64] 2023-07-16 00:32:02,788 (trainer:732) INFO: 56epoch:train:301-400batch: iter_time=9.678e-05, forward_time=0.144, loss_ctc=72.797, loss_att=51.224, acc=0.713, loss=57.695, backward_time=1.028, grad_norm=135.271, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.810e-05, train_time=2.749 +[gpub001:0/64] 2023-07-16 00:34:19,996 (trainer:732) INFO: 56epoch:train:401-500batch: iter_time=1.025e-04, forward_time=0.145, loss_ctc=64.462, loss_att=47.713, acc=0.715, loss=52.738, backward_time=1.028, grad_norm=123.919, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.810e-05, train_time=2.744 +[gpub001:0/64] 2023-07-16 00:36:48,635 (trainer:732) INFO: 56epoch:train:501-600batch: iter_time=2.047e-04, forward_time=0.231, loss_ctc=68.197, loss_att=52.344, acc=0.718, loss=57.100, backward_time=1.043, grad_norm=143.676, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=4.809e-05, train_time=2.972 +[gpub001:0/64] 2023-07-16 00:39:11,940 (trainer:732) INFO: 56epoch:train:601-700batch: iter_time=7.600e-04, forward_time=0.198, loss_ctc=77.810, loss_att=56.459, acc=0.705, loss=62.865, backward_time=1.034, grad_norm=136.066, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=4.809e-05, train_time=2.866 +[gpub001:0/64] 2023-07-16 00:41:30,733 (trainer:732) INFO: 56epoch:train:701-800batch: iter_time=9.654e-05, forward_time=0.146, loss_ctc=68.615, loss_att=51.269, acc=0.712, loss=56.472, backward_time=1.030, grad_norm=132.539, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.808e-05, train_time=2.776 +srun: Job step aborted: Waiting up to 32 seconds for job step to finish. +slurmstepd: error: *** STEP 2157595.0 ON gpub001 CANCELLED AT 2023-07-16T00:41:51 ***